diff --git a/benchmarks/pandas/bench_between.py b/benchmarks/pandas/bench_between.py new file mode 100644 index 00000000..7ddfd202 --- /dev/null +++ b/benchmarks/pandas/bench_between.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.between() — element-wise range check.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.between(25000.0, 75000.0) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.between(25000.0, 75000.0) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"between","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_clip.py b/benchmarks/pandas/bench_clip.py new file mode 100644 index 00000000..30be9d0b --- /dev/null +++ b/benchmarks/pandas/bench_clip.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.clip() — clip values to a range.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.clip(lower=10000.0, upper=90000.0) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.clip(lower=10000.0, upper=90000.0) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"clip","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_corr.py b/benchmarks/pandas/bench_corr.py new file mode 100644 index 00000000..fde4e7c3 --- /dev/null +++ b/benchmarks/pandas/bench_corr.py @@ -0,0 +1,21 @@ +"""Benchmark: DataFrame.corr — pairwise correlation of numeric columns.""" +import json, time +import pandas as pd + +SIZE = 10_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[float(i*1.1) for i in range(SIZE)],"b":[float(i*0.7+0.3) for i in range(SIZE)],"c":[float(i*-0.5+100) for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.corr() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.corr() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"corr","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_cov.py b/benchmarks/pandas/bench_cov.py new file mode 100644 index 00000000..95e9c5c3 --- /dev/null +++ b/benchmarks/pandas/bench_cov.py @@ -0,0 +1,21 @@ +"""Benchmark: DataFrame.cov — pairwise covariance of numeric columns.""" +import json, time +import pandas as pd + +SIZE = 10_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[float(i*1.1) for i in range(SIZE)],"b":[float(i*0.7+0.3) for i in range(SIZE)],"c":[float(i*-0.5+100) for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.cov() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.cov() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"cov","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_crosstab.py b/benchmarks/pandas/bench_crosstab.py new file mode 100644 index 00000000..10237533 --- /dev/null +++ b/benchmarks/pandas/bench_crosstab.py @@ -0,0 +1,24 @@ +"""Benchmark: pd.crosstab() — compute a cross-tabulation.""" +import json, time +import pandas as pd + +SIZE = 50_000 +WARMUP = 5 +ITERATIONS = 50 + +import random +random.seed(42) +a = pd.Series([random.choice(["x","y","z"]) for _ in range(SIZE)]) +b = pd.Series([random.choice(["p","q","r","s"]) for _ in range(SIZE)]) + +for _ in range(WARMUP): + pd.crosstab(a, b) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + pd.crosstab(a, b) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"crosstab","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_cut.py b/benchmarks/pandas/bench_cut.py new file mode 100644 index 00000000..b6254397 --- /dev/null +++ b/benchmarks/pandas/bench_cut.py @@ -0,0 +1,21 @@ +"""Benchmark: pd.cut() — bin a Series into discrete intervals.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i) for i in range(SIZE)]) + +for _ in range(WARMUP): + pd.cut(s, bins=10) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + pd.cut(s, bins=10) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"cut","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_dataframe_astype.py b/benchmarks/pandas/bench_dataframe_astype.py new file mode 100644 index 00000000..f2f685f0 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_astype.py @@ -0,0 +1,21 @@ +"""Benchmark: DataFrame.astype() — cast column dtypes.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[float(i) for i in range(SIZE)],"b":[i for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.astype({"a": "float32", "b": "int32"}) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.astype({"a": "float32", "b": "int32"}) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"dataframe_astype","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_dataframe_head_tail.py b/benchmarks/pandas/bench_dataframe_head_tail.py new file mode 100644 index 00000000..7f7891f6 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_head_tail.py @@ -0,0 +1,23 @@ +"""Benchmark: DataFrame.head() and .tail() — slice first/last N rows.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[float(i) for i in range(SIZE)],"b":[i*2 for i in range(SIZE)],"c":[str(i) for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.head(100) + df.tail(100) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.head(100) + df.tail(100) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"dataframe_head_tail","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_diff.py b/benchmarks/pandas/bench_diff.py new file mode 100644 index 00000000..72ff53a5 --- /dev/null +++ b/benchmarks/pandas/bench_diff.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.diff() — first discrete difference.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.diff() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.diff() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"diff","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_drop_duplicates.py b/benchmarks/pandas/bench_drop_duplicates.py new file mode 100644 index 00000000..eafc3158 --- /dev/null +++ b/benchmarks/pandas/bench_drop_duplicates.py @@ -0,0 +1,21 @@ +"""Benchmark: DataFrame.drop_duplicates() — remove duplicate rows.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[i % 1000 for i in range(SIZE)],"b":[i % 500 for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.drop_duplicates() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.drop_duplicates() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"drop_duplicates","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_duplicated.py b/benchmarks/pandas/bench_duplicated.py new file mode 100644 index 00000000..e5eb52d3 --- /dev/null +++ b/benchmarks/pandas/bench_duplicated.py @@ -0,0 +1,21 @@ +"""Benchmark: DataFrame.duplicated() — detect duplicate rows.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({"a":[i % 1000 for i in range(SIZE)],"b":[i % 500 for i in range(SIZE)]}) + +for _ in range(WARMUP): + df.duplicated() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.duplicated() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"duplicated","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_expanding_mean.py b/benchmarks/pandas/bench_expanding_mean.py new file mode 100644 index 00000000..7effcf51 --- /dev/null +++ b/benchmarks/pandas/bench_expanding_mean.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.expanding().mean() — expanding window mean.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.expanding().mean() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.expanding().mean() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"expanding_mean","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_interpolate.py b/benchmarks/pandas/bench_interpolate.py new file mode 100644 index 00000000..ab3e81d9 --- /dev/null +++ b/benchmarks/pandas/bench_interpolate.py @@ -0,0 +1,23 @@ +"""Benchmark: Series.interpolate() — linear interpolation over NaN values.""" +import json, time +import pandas as pd +import math + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +data = [float(i) if i % 5 != 0 else math.nan for i in range(SIZE)] +s = pd.Series(data) + +for _ in range(WARMUP): + s.interpolate(method="linear") + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.interpolate(method="linear") + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"interpolate","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_isin.py b/benchmarks/pandas/bench_isin.py new file mode 100644 index 00000000..6340ccb8 --- /dev/null +++ b/benchmarks/pandas/bench_isin.py @@ -0,0 +1,22 @@ +"""Benchmark: Series.isin() — membership test.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([i % 5000 for i in range(SIZE)]) +test_set = list(range(0, 2500)) + +for _ in range(WARMUP): + s.isin(test_set) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.isin(test_set) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"isin","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_melt.py b/benchmarks/pandas/bench_melt.py new file mode 100644 index 00000000..b01c66d1 --- /dev/null +++ b/benchmarks/pandas/bench_melt.py @@ -0,0 +1,23 @@ +"""Benchmark: DataFrame.melt — unpivots wide-format DataFrame to long-format.""" +import json, time +import pandas as pd + +SIZE = 10_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({f"col{i}": [float(j*i+0.5) for j in range(SIZE)] for i in range(1, 6)}) +id_vars = ["col1"] +value_vars = ["col2", "col3", "col4", "col5"] + +for _ in range(WARMUP): + df.melt(id_vars=id_vars, value_vars=value_vars) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.melt(id_vars=id_vars, value_vars=value_vars) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"melt","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_nlargest.py b/benchmarks/pandas/bench_nlargest.py new file mode 100644 index 00000000..d02e1145 --- /dev/null +++ b/benchmarks/pandas/bench_nlargest.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.nlargest() — get the n largest values.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.nlargest(100) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.nlargest(100) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"nlargest","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_pct_change.py b/benchmarks/pandas/bench_pct_change.py new file mode 100644 index 00000000..70673422 --- /dev/null +++ b/benchmarks/pandas/bench_pct_change.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.pct_change() — percentage change between elements.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i*1.1+1.0) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.pct_change() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.pct_change() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"pct_change","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_qcut.py b/benchmarks/pandas/bench_qcut.py new file mode 100644 index 00000000..ad958a17 --- /dev/null +++ b/benchmarks/pandas/bench_qcut.py @@ -0,0 +1,21 @@ +"""Benchmark: pd.qcut() — quantile-based binning.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i) for i in range(SIZE)]) + +for _ in range(WARMUP): + pd.qcut(s, q=10) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + pd.qcut(s, q=10) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"qcut","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_rank.py b/benchmarks/pandas/bench_rank.py new file mode 100644 index 00000000..11f3cb78 --- /dev/null +++ b/benchmarks/pandas/bench_rank.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.rank() — rank values with average tie-breaking.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i % 1000) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.rank() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.rank() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"rank","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_rolling_std.py b/benchmarks/pandas/bench_rolling_std.py new file mode 100644 index 00000000..88522698 --- /dev/null +++ b/benchmarks/pandas/bench_rolling_std.py @@ -0,0 +1,22 @@ +"""Benchmark: Series.rolling().std() — rolling standard deviation.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WINDOW = 20 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.rolling(WINDOW).std() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.rolling(WINDOW).std() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"rolling_std","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_series_abs.py b/benchmarks/pandas/bench_series_abs.py new file mode 100644 index 00000000..9d1163f0 --- /dev/null +++ b/benchmarks/pandas/bench_series_abs.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.abs() — element-wise absolute value.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i - 50000) for i in range(SIZE)]) + +for _ in range(WARMUP): + s.abs() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.abs() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"series_abs","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_series_map.py b/benchmarks/pandas/bench_series_map.py new file mode 100644 index 00000000..c7ffd0ff --- /dev/null +++ b/benchmarks/pandas/bench_series_map.py @@ -0,0 +1,22 @@ +"""Benchmark: Series.map() with a dictionary lookup.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([i % 1000 for i in range(SIZE)]) +lookup = {i: float(i * 2.5) for i in range(1000)} + +for _ in range(WARMUP): + s.map(lookup) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.map(lookup) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"series_map","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_series_nunique.py b/benchmarks/pandas/bench_series_nunique.py new file mode 100644 index 00000000..db67b43c --- /dev/null +++ b/benchmarks/pandas/bench_series_nunique.py @@ -0,0 +1,21 @@ +"""Benchmark: Series.nunique() — count unique values.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([i % 1000 for i in range(SIZE)]) + +for _ in range(WARMUP): + s.nunique() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.nunique() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"series_nunique","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_stack.py b/benchmarks/pandas/bench_stack.py new file mode 100644 index 00000000..4a9b4b87 --- /dev/null +++ b/benchmarks/pandas/bench_stack.py @@ -0,0 +1,22 @@ +"""Benchmark: DataFrame.stack() — pivot innermost column level to row index.""" +import json, time +import pandas as pd + +ROWS = 1_000 +COLS = 20 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({f"c{j}": [float(i*j+0.5) for i in range(ROWS)] for j in range(1, COLS+1)}) + +for _ in range(WARMUP): + df.stack() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.stack() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"stack","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_unstack.py b/benchmarks/pandas/bench_unstack.py new file mode 100644 index 00000000..4b2dca2a --- /dev/null +++ b/benchmarks/pandas/bench_unstack.py @@ -0,0 +1,24 @@ +"""Benchmark: DataFrame.unstack() — pivot innermost index level to columns.""" +import json, time +import pandas as pd + +ROWS = 500 +COLS = 10 +WARMUP = 5 +ITERATIONS = 50 + +import numpy as np +idx = pd.MultiIndex.from_product([range(ROWS), range(COLS)], names=["row","col"]) +s = pd.Series([float(i) for i in range(ROWS * COLS)], index=idx) + +for _ in range(WARMUP): + s.unstack() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.unstack() + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"unstack","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/pandas/bench_where.py b/benchmarks/pandas/bench_where.py new file mode 100644 index 00000000..096f6b48 --- /dev/null +++ b/benchmarks/pandas/bench_where.py @@ -0,0 +1,22 @@ +"""Benchmark: Series.where() — conditional replacement.""" +import json, time +import pandas as pd + +SIZE = 100_000 +WARMUP = 5 +ITERATIONS = 50 + +s = pd.Series([float(i) for i in range(SIZE)]) +cond = s > 50000.0 + +for _ in range(WARMUP): + s.where(cond, other=0.0) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.where(cond, other=0.0) + times.append((time.perf_counter() - t0) * 1000) + +total_ms = sum(times) +print(json.dumps({"function":"where","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)})) diff --git a/benchmarks/results.json b/benchmarks/results.json index c883f334..17a01c1c 100644 --- a/benchmarks/results.json +++ b/benchmarks/results.json @@ -1,5 +1,27 @@ { "benchmarks": [ + { + "function": "between", + "tsb": null, + "pandas": { + "function": "between", + "mean_ms": 0.115, + "iterations": 50, + "total_ms": 5.771 + }, + "ratio": null + }, + { + "function": "clip", + "tsb": null, + "pandas": { + "function": "clip", + "mean_ms": 0.706, + "iterations": 50, + "total_ms": 35.277 + }, + "ratio": null + }, { "function": "concat", "tsb": null, @@ -11,6 +33,50 @@ }, "ratio": null }, + { + "function": "corr", + "tsb": null, + "pandas": { + "function": "corr", + "mean_ms": 0.29, + "iterations": 50, + "total_ms": 14.512 + }, + "ratio": null + }, + { + "function": "cov", + "tsb": null, + "pandas": { + "function": "cov", + "mean_ms": 0.221, + "iterations": 50, + "total_ms": 11.046 + }, + "ratio": null + }, + { + "function": "crosstab", + "tsb": null, + "pandas": { + "function": "crosstab", + "mean_ms": 12.885, + "iterations": 50, + "total_ms": 644.272 + }, + "ratio": null + }, + { + "function": "cut", + "tsb": null, + "pandas": { + "function": "cut", + "mean_ms": 1.459, + "iterations": 50, + "total_ms": 72.96 + }, + "ratio": null + }, { "function": "dataframe_apply", "tsb": null, @@ -22,6 +88,17 @@ }, "ratio": null }, + { + "function": "dataframe_astype", + "tsb": null, + "pandas": { + "function": "dataframe_astype", + "mean_ms": 0.509, + "iterations": 50, + "total_ms": 25.454 + }, + "ratio": null + }, { "function": "dataframe_creation", "tsb": null, @@ -55,6 +132,17 @@ }, "ratio": null }, + { + "function": "dataframe_head_tail", + "tsb": null, + "pandas": { + "function": "dataframe_head_tail", + "mean_ms": 0.059, + "iterations": 50, + "total_ms": 2.925 + }, + "ratio": null + }, { "function": "dataframe_rename", "tsb": null, @@ -88,6 +176,39 @@ }, "ratio": null }, + { + "function": "diff", + "tsb": null, + "pandas": { + "function": "diff", + "mean_ms": 0.143, + "iterations": 50, + "total_ms": 7.175 + }, + "ratio": null + }, + { + "function": "drop_duplicates", + "tsb": null, + "pandas": { + "function": "drop_duplicates", + "mean_ms": 3.303, + "iterations": 50, + "total_ms": 165.161 + }, + "ratio": null + }, + { + "function": "duplicated", + "tsb": null, + "pandas": { + "function": "duplicated", + "mean_ms": 3.22, + "iterations": 50, + "total_ms": 160.983 + }, + "ratio": null + }, { "function": "ewm_mean", "tsb": null, @@ -99,6 +220,17 @@ }, "ratio": null }, + { + "function": "expanding_mean", + "tsb": null, + "pandas": { + "function": "expanding_mean", + "mean_ms": 1.11, + "iterations": 50, + "total_ms": 55.505 + }, + "ratio": null + }, { "function": "groupby_mean", "tsb": null, @@ -110,6 +242,39 @@ }, "ratio": null }, + { + "function": "interpolate", + "tsb": null, + "pandas": { + "function": "interpolate", + "mean_ms": 3.356, + "iterations": 50, + "total_ms": 167.813 + }, + "ratio": null + }, + { + "function": "isin", + "tsb": null, + "pandas": { + "function": "isin", + "mean_ms": 0.673, + "iterations": 50, + "total_ms": 33.666 + }, + "ratio": null + }, + { + "function": "melt", + "tsb": null, + "pandas": { + "function": "melt", + "mean_ms": 2.551, + "iterations": 50, + "total_ms": 127.542 + }, + "ratio": null + }, { "function": "merge", "tsb": null, @@ -121,6 +286,28 @@ }, "ratio": null }, + { + "function": "nlargest", + "tsb": null, + "pandas": { + "function": "nlargest", + "mean_ms": 0.717, + "iterations": 50, + "total_ms": 35.865 + }, + "ratio": null + }, + { + "function": "pct_change", + "tsb": null, + "pandas": { + "function": "pct_change", + "mean_ms": 0.193, + "iterations": 50, + "total_ms": 9.668 + }, + "ratio": null + }, { "function": "pivot_table", "tsb": null, @@ -132,6 +319,28 @@ }, "ratio": null }, + { + "function": "qcut", + "tsb": null, + "pandas": { + "function": "qcut", + "mean_ms": 2.569, + "iterations": 50, + "total_ms": 128.474 + }, + "ratio": null + }, + { + "function": "rank", + "tsb": null, + "pandas": { + "function": "rank", + "mean_ms": 3.057, + "iterations": 50, + "total_ms": 152.835 + }, + "ratio": null + }, { "function": "read_csv", "tsb": null, @@ -154,6 +363,28 @@ }, "ratio": null }, + { + "function": "rolling_std", + "tsb": null, + "pandas": { + "function": "rolling_std", + "mean_ms": 3.437, + "iterations": 50, + "total_ms": 171.832 + }, + "ratio": null + }, + { + "function": "series_abs", + "tsb": null, + "pandas": { + "function": "series_abs", + "mean_ms": 0.037, + "iterations": 50, + "total_ms": 1.857 + }, + "ratio": null + }, { "function": "series_arithmetic", "tsb": null, @@ -198,6 +429,28 @@ }, "ratio": null }, + { + "function": "series_map", + "tsb": null, + "pandas": { + "function": "series_map", + "mean_ms": 0.821, + "iterations": 50, + "total_ms": 41.036 + }, + "ratio": null + }, + { + "function": "series_nunique", + "tsb": null, + "pandas": { + "function": "series_nunique", + "mean_ms": 0.426, + "iterations": 50, + "total_ms": 21.3 + }, + "ratio": null + }, { "function": "series_shift", "tsb": null, @@ -241,7 +494,40 @@ "total_ms": 92.12644899997713 }, "ratio": null + }, + { + "function": "stack", + "tsb": null, + "pandas": { + "function": "stack", + "mean_ms": 0.337, + "iterations": 50, + "total_ms": 16.831 + }, + "ratio": null + }, + { + "function": "unstack", + "tsb": null, + "pandas": { + "function": "unstack", + "mean_ms": 0.398, + "iterations": 50, + "total_ms": 19.887 + }, + "ratio": null + }, + { + "function": "where", + "tsb": null, + "pandas": { + "function": "where", + "mean_ms": 0.23, + "iterations": 50, + "total_ms": 11.504 + }, + "ratio": null } ], - "timestamp": "2026-04-12T15:46:00Z" -} \ No newline at end of file + "timestamp": "2026-04-12T17:15:00Z" +} diff --git a/benchmarks/tsb/bench_between.ts b/benchmarks/tsb/bench_between.ts new file mode 100644 index 00000000..4e06570c --- /dev/null +++ b/benchmarks/tsb/bench_between.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.between() — element-wise range check. + * Outputs JSON: {"function": "between", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + s.between(25000.0, 75000.0); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.between(25000.0, 75000.0); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "between", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_clip.ts b/benchmarks/tsb/bench_clip.ts new file mode 100644 index 00000000..77ce3688 --- /dev/null +++ b/benchmarks/tsb/bench_clip.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.clip() — clip values to a range. + * Outputs JSON: {"function": "clip", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + s.clip({ lower: 10000.0, upper: 90000.0 }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.clip({ lower: 10000.0, upper: 90000.0 }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "clip", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_corr.ts b/benchmarks/tsb/bench_corr.ts new file mode 100644 index 00000000..39821d71 --- /dev/null +++ b/benchmarks/tsb/bench_corr.ts @@ -0,0 +1,37 @@ +/** + * Benchmark: DataFrame.corr — pairwise correlation of numeric columns. + * Outputs JSON: {"function": "corr", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 10_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i * 1.1), + b: Array.from({ length: SIZE }, (_, i) => i * 0.7 + 0.3), + c: Array.from({ length: SIZE }, (_, i) => i * -0.5 + 100), +}); + +for (let i = 0; i < WARMUP; i++) { + df.corr(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.corr(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "corr", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_cov.ts b/benchmarks/tsb/bench_cov.ts new file mode 100644 index 00000000..af60be69 --- /dev/null +++ b/benchmarks/tsb/bench_cov.ts @@ -0,0 +1,37 @@ +/** + * Benchmark: DataFrame.cov — pairwise covariance of numeric columns. + * Outputs JSON: {"function": "cov", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 10_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i * 1.1), + b: Array.from({ length: SIZE }, (_, i) => i * 0.7 + 0.3), + c: Array.from({ length: SIZE }, (_, i) => i * -0.5 + 100), +}); + +for (let i = 0; i < WARMUP; i++) { + df.cov(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.cov(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "cov", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_crosstab.ts b/benchmarks/tsb/bench_crosstab.ts new file mode 100644 index 00000000..24b2fde7 --- /dev/null +++ b/benchmarks/tsb/bench_crosstab.ts @@ -0,0 +1,42 @@ +/** + * Benchmark: crosstab() — compute a cross-tabulation. + * Outputs JSON: {"function": "crosstab", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series, crosstab } from "../../src/index.ts"; + +const SIZE = 50_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const choices_a = ["x", "y", "z"]; +const choices_b = ["p", "q", "r", "s"]; +let seed = 42; +function rand(): number { + seed = (seed * 1664525 + 1013904223) & 0x7fffffff; + return seed; +} + +const a = new Series({ data: Array.from({ length: SIZE }, () => choices_a[rand() % 3]) }); +const b = new Series({ data: Array.from({ length: SIZE }, () => choices_b[rand() % 4]) }); + +for (let i = 0; i < WARMUP; i++) { + crosstab(a, b); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + crosstab(a, b); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "crosstab", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_cut.ts b/benchmarks/tsb/bench_cut.ts new file mode 100644 index 00000000..e205b6d4 --- /dev/null +++ b/benchmarks/tsb/bench_cut.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: cut() — bin a Series into discrete intervals. + * Outputs JSON: {"function": "cut", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series, cut } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + cut(s, { bins: 10 }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + cut(s, { bins: 10 }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "cut", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_dataframe_apply.ts b/benchmarks/tsb/bench_dataframe_apply.ts index 32a99a68..3e0a0020 100644 --- a/benchmarks/tsb/bench_dataframe_apply.ts +++ b/benchmarks/tsb/bench_dataframe_apply.ts @@ -13,12 +13,12 @@ const b = Float64Array.from({ length: ROWS }, (_, i) => i * 2.0); const df = new DataFrame({ a, b }); for (let i = 0; i < WARMUP; i++) { - df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 }); + df.apply((row) => (row.a as number) + (row.b as number), { axis: 1 }); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.apply((row) => (row["a"] as number) + (row["b"] as number), { axis: 1 }); + df.apply((row) => (row.a as number) + (row.b as number), { axis: 1 }); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_dataframe_astype.ts b/benchmarks/tsb/bench_dataframe_astype.ts new file mode 100644 index 00000000..39a34529 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_astype.ts @@ -0,0 +1,36 @@ +/** + * Benchmark: DataFrame.astype() — cast column dtypes. + * Outputs JSON: {"function": "dataframe_astype", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i * 1.0), + b: Array.from({ length: SIZE }, (_, i) => i), +}); + +for (let i = 0; i < WARMUP; i++) { + df.astype({ a: "float32", b: "int32" }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.astype({ a: "float32", b: "int32" }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "dataframe_astype", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_dataframe_dropna.ts b/benchmarks/tsb/bench_dataframe_dropna.ts index e4fef46b..87fd867a 100644 --- a/benchmarks/tsb/bench_dataframe_dropna.ts +++ b/benchmarks/tsb/bench_dataframe_dropna.ts @@ -7,8 +7,8 @@ const ROWS = 100_000; const WARMUP = 5; const ITERATIONS = 20; -const a = Float64Array.from({ length: ROWS }, (_, i) => (i % 10 === 0 ? NaN : i * 1.1)); -const b = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? NaN : i * 2.2)); +const a = Float64Array.from({ length: ROWS }, (_, i) => (i % 10 === 0 ? Number.NaN : i * 1.1)); +const b = Float64Array.from({ length: ROWS }, (_, i) => (i % 7 === 0 ? Number.NaN : i * 2.2)); const df = new DataFrame({ a, b }); for (let i = 0; i < WARMUP; i++) { diff --git a/benchmarks/tsb/bench_dataframe_filter.ts b/benchmarks/tsb/bench_dataframe_filter.ts index 57d78bd7..efbcc1cd 100644 --- a/benchmarks/tsb/bench_dataframe_filter.ts +++ b/benchmarks/tsb/bench_dataframe_filter.ts @@ -11,12 +11,12 @@ const vals = Float64Array.from({ length: ROWS }, (_, i) => i * 0.1); const df = new DataFrame({ value: vals }); for (let i = 0; i < WARMUP; i++) { - df.filter((row) => (row["value"] as number) > 5000); + df.filter((row) => (row.value as number) > 5000); } const start = performance.now(); for (let i = 0; i < ITERATIONS; i++) { - df.filter((row) => (row["value"] as number) > 5000); + df.filter((row) => (row.value as number) > 5000); } const total = performance.now() - start; diff --git a/benchmarks/tsb/bench_dataframe_head_tail.ts b/benchmarks/tsb/bench_dataframe_head_tail.ts new file mode 100644 index 00000000..b903c6ab --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_head_tail.ts @@ -0,0 +1,39 @@ +/** + * Benchmark: DataFrame.head() and .tail() — slice first/last N rows. + * Outputs JSON: {"function": "dataframe_head_tail", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i * 1.0), + b: Array.from({ length: SIZE }, (_, i) => i * 2), + c: Array.from({ length: SIZE }, (_, i) => String(i)), +}); + +for (let i = 0; i < WARMUP; i++) { + df.head(100); + df.tail(100); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.head(100); + df.tail(100); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "dataframe_head_tail", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_diff.ts b/benchmarks/tsb/bench_diff.ts new file mode 100644 index 00000000..b65b42ac --- /dev/null +++ b/benchmarks/tsb/bench_diff.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.diff() — first discrete difference. + * Outputs JSON: {"function": "diff", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.1 + 0.5) }); + +for (let i = 0; i < WARMUP; i++) { + s.diff(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.diff(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "diff", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_drop_duplicates.ts b/benchmarks/tsb/bench_drop_duplicates.ts new file mode 100644 index 00000000..bfc65bc6 --- /dev/null +++ b/benchmarks/tsb/bench_drop_duplicates.ts @@ -0,0 +1,36 @@ +/** + * Benchmark: DataFrame.drop_duplicates() — remove duplicate rows. + * Outputs JSON: {"function": "drop_duplicates", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i % 1000), + b: Array.from({ length: SIZE }, (_, i) => i % 500), +}); + +for (let i = 0; i < WARMUP; i++) { + df.drop_duplicates(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.drop_duplicates(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "drop_duplicates", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_duplicated.ts b/benchmarks/tsb/bench_duplicated.ts new file mode 100644 index 00000000..054e80e9 --- /dev/null +++ b/benchmarks/tsb/bench_duplicated.ts @@ -0,0 +1,36 @@ +/** + * Benchmark: DataFrame.duplicated() — detect duplicate rows. + * Outputs JSON: {"function": "duplicated", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = new DataFrame({ + a: Array.from({ length: SIZE }, (_, i) => i % 1000), + b: Array.from({ length: SIZE }, (_, i) => i % 500), +}); + +for (let i = 0; i < WARMUP; i++) { + df.duplicated(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.duplicated(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "duplicated", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_expanding_mean.ts b/benchmarks/tsb/bench_expanding_mean.ts new file mode 100644 index 00000000..8f06c721 --- /dev/null +++ b/benchmarks/tsb/bench_expanding_mean.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.expanding().mean() — expanding window mean. + * Outputs JSON: {"function": "expanding_mean", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.1 + 0.5) }); + +for (let i = 0; i < WARMUP; i++) { + s.expanding().mean(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.expanding().mean(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "expanding_mean", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_interpolate.ts b/benchmarks/tsb/bench_interpolate.ts new file mode 100644 index 00000000..cc1dc495 --- /dev/null +++ b/benchmarks/tsb/bench_interpolate.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: Series.interpolate() — linear interpolation over NaN values. + * Outputs JSON: {"function": "interpolate", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const data = Array.from({ length: SIZE }, (_, i) => (i % 5 === 0 ? Number.NaN : i * 1.0)); +const s = new Series({ data }); + +for (let i = 0; i < WARMUP; i++) { + s.interpolate({ method: "linear" }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.interpolate({ method: "linear" }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "interpolate", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_isin.ts b/benchmarks/tsb/bench_isin.ts new file mode 100644 index 00000000..2f3741b1 --- /dev/null +++ b/benchmarks/tsb/bench_isin.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: Series.isin() — membership test. + * Outputs JSON: {"function": "isin", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 5000) }); +const testSet = Array.from({ length: 2500 }, (_, i) => i); + +for (let i = 0; i < WARMUP; i++) { + s.isin(testSet); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.isin(testSet); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "isin", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_melt.ts b/benchmarks/tsb/bench_melt.ts new file mode 100644 index 00000000..fc8a3e1f --- /dev/null +++ b/benchmarks/tsb/bench_melt.ts @@ -0,0 +1,37 @@ +/** + * Benchmark: DataFrame.melt — unpivots wide-format DataFrame to long-format. + * Outputs JSON: {"function": "melt", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const SIZE = 10_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const cols: Record = {}; +for (let i = 1; i <= 5; i++) { + cols[`col${i}`] = Array.from({ length: SIZE }, (_, j) => j * i + 0.5); +} +const df = new DataFrame(cols); + +for (let i = 0; i < WARMUP; i++) { + df.melt({ idVars: ["col1"], valueVars: ["col2", "col3", "col4", "col5"] }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.melt({ idVars: ["col1"], valueVars: ["col2", "col3", "col4", "col5"] }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "melt", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_nlargest.ts b/benchmarks/tsb/bench_nlargest.ts new file mode 100644 index 00000000..609dce74 --- /dev/null +++ b/benchmarks/tsb/bench_nlargest.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.nlargest() — get the n largest values. + * Outputs JSON: {"function": "nlargest", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.1 + 0.5) }); + +for (let i = 0; i < WARMUP; i++) { + s.nlargest(100); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.nlargest(100); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "nlargest", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_pct_change.ts b/benchmarks/tsb/bench_pct_change.ts new file mode 100644 index 00000000..5c142bc7 --- /dev/null +++ b/benchmarks/tsb/bench_pct_change.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.pct_change() — percentage change between elements. + * Outputs JSON: {"function": "pct_change", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.1 + 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + s.pct_change(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.pct_change(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "pct_change", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_qcut.ts b/benchmarks/tsb/bench_qcut.ts new file mode 100644 index 00000000..5746722b --- /dev/null +++ b/benchmarks/tsb/bench_qcut.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: qcut() — quantile-based binning. + * Outputs JSON: {"function": "qcut", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series, qcut } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + qcut(s, { q: 10 }); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + qcut(s, { q: 10 }); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "qcut", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_rank.ts b/benchmarks/tsb/bench_rank.ts new file mode 100644 index 00000000..93c245b9 --- /dev/null +++ b/benchmarks/tsb/bench_rank.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.rank() — rank values with average tie-breaking. + * Outputs JSON: {"function": "rank", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 1000) * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + s.rank(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.rank(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "rank", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_rolling_std.ts b/benchmarks/tsb/bench_rolling_std.ts new file mode 100644 index 00000000..67bc1335 --- /dev/null +++ b/benchmarks/tsb/bench_rolling_std.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: Series.rolling().std() — rolling standard deviation. + * Outputs JSON: {"function": "rolling_std", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WINDOW = 20; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.1 + 0.5) }); + +for (let i = 0; i < WARMUP; i++) { + s.rolling(WINDOW).std(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.rolling(WINDOW).std(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "rolling_std", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_series_abs.ts b/benchmarks/tsb/bench_series_abs.ts new file mode 100644 index 00000000..1034fdba --- /dev/null +++ b/benchmarks/tsb/bench_series_abs.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.abs() — element-wise absolute value. + * Outputs JSON: {"function": "series_abs", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i - 50000) * 1.0) }); + +for (let i = 0; i < WARMUP; i++) { + s.abs(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.abs(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "series_abs", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_series_fillna.ts b/benchmarks/tsb/bench_series_fillna.ts index 3e658b01..86c259fb 100644 --- a/benchmarks/tsb/bench_series_fillna.ts +++ b/benchmarks/tsb/bench_series_fillna.ts @@ -8,7 +8,7 @@ const WARMUP = 5; const ITERATIONS = 20; // Create series with every 5th value as NaN -const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 5 === 0 ? NaN : i * 1.1)); +const data = Float64Array.from({ length: ROWS }, (_, i) => (i % 5 === 0 ? Number.NaN : i * 1.1)); const s = new Series(data); for (let i = 0; i < WARMUP; i++) { diff --git a/benchmarks/tsb/bench_series_map.ts b/benchmarks/tsb/bench_series_map.ts new file mode 100644 index 00000000..899cd0bb --- /dev/null +++ b/benchmarks/tsb/bench_series_map.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: Series.map() with a dictionary lookup. + * Outputs JSON: {"function": "series_map", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 1000) }); +const lookup = new Map(Array.from({ length: 1000 }, (_, i) => [i, i * 2.5])); + +for (let i = 0; i < WARMUP; i++) { + s.map(lookup); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.map(lookup); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "series_map", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_series_nunique.ts b/benchmarks/tsb/bench_series_nunique.ts new file mode 100644 index 00000000..3a40da23 --- /dev/null +++ b/benchmarks/tsb/bench_series_nunique.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.nunique() — count unique values. + * Outputs JSON: {"function": "series_nunique", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 1000) }); + +for (let i = 0; i < WARMUP; i++) { + s.nunique(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.nunique(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "series_nunique", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_stack.ts b/benchmarks/tsb/bench_stack.ts new file mode 100644 index 00000000..6a4bb944 --- /dev/null +++ b/benchmarks/tsb/bench_stack.ts @@ -0,0 +1,38 @@ +/** + * Benchmark: DataFrame.stack() — pivot innermost column level to row index. + * Outputs JSON: {"function": "stack", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const ROWS = 1_000; +const COLS = 20; +const WARMUP = 5; +const ITERATIONS = 50; + +const cols: Record = {}; +for (let j = 1; j <= COLS; j++) { + cols[`c${j}`] = Array.from({ length: ROWS }, (_, i) => i * j + 0.5); +} +const df = new DataFrame(cols); + +for (let i = 0; i < WARMUP; i++) { + df.stack(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + df.stack(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "stack", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_unstack.ts b/benchmarks/tsb/bench_unstack.ts new file mode 100644 index 00000000..9bebfac0 --- /dev/null +++ b/benchmarks/tsb/bench_unstack.ts @@ -0,0 +1,39 @@ +/** + * Benchmark: Series.unstack() — pivot innermost MultiIndex level to columns. + * Outputs JSON: {"function": "unstack", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const ROWS = 500; +const COLS = 10; +const WARMUP = 5; +const ITERATIONS = 50; + +const data = Array.from({ length: ROWS * COLS }, (_, i) => i * 1.0); +const index = Array.from( + { length: ROWS * COLS }, + (_, i) => [Math.floor(i / COLS), i % COLS] as [number, number], +); +const s = new Series({ data, index }); + +for (let i = 0; i < WARMUP; i++) { + s.unstack(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.unstack(); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "unstack", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/benchmarks/tsb/bench_where.ts b/benchmarks/tsb/bench_where.ts new file mode 100644 index 00000000..14843151 --- /dev/null +++ b/benchmarks/tsb/bench_where.ts @@ -0,0 +1,34 @@ +/** + * Benchmark: Series.where() — conditional replacement. + * Outputs JSON: {"function": "where", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const SIZE = 100_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i * 1.0) }); +const cond = s.gt(50000.0); + +for (let i = 0; i < WARMUP; i++) { + s.where(cond, 0.0); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const start = performance.now(); + s.where(cond, 0.0); + times.push(performance.now() - start); +} + +const totalMs = times.reduce((a, b) => a + b, 0); +const meanMs = totalMs / ITERATIONS; +console.log( + JSON.stringify({ + function: "where", + mean_ms: Math.round(meanMs * 1000) / 1000, + iterations: ITERATIONS, + total_ms: Math.round(totalMs * 1000) / 1000, + }), +); diff --git a/biome.json b/biome.json index 5e64d5ce..29a04fd7 100644 --- a/biome.json +++ b/biome.json @@ -69,6 +69,20 @@ "rules": { "nursery": { "noSecrets": "off" + }, + "complexity": { + "useLiteralKeys": "off" + } + } + } + }, + { + "include": ["benchmarks/**"], + "linter": { + "rules": { + "suspicious": { + "noConsole": "off", + "noConsoleLog": "off" } } } diff --git a/src/core/api_types.ts b/src/core/api_types.ts index 860d2050..5b78bbd7 100644 --- a/src/core/api_types.ts +++ b/src/core/api_types.ts @@ -23,8 +23,8 @@ * @module */ -import { Dtype } from "./dtype.ts"; import type { DtypeName } from "../types.ts"; +import { Dtype } from "./dtype.ts"; // ─── internal helper ────────────────────────────────────────────────────────── @@ -95,14 +95,19 @@ export function isListLike(val: unknown): boolean { return false; } // Has Symbol.iterator and is not a plain number/boolean/bigint/symbol - if (typeof val === "number" || typeof val === "boolean" || typeof val === "bigint" || typeof val === "symbol") { + if ( + typeof val === "number" || + typeof val === "boolean" || + typeof val === "bigint" || + typeof val === "symbol" + ) { return false; } if (typeof val === "object" || typeof val === "function") { if (Symbol.iterator in (val as object)) { return true; } - const len = (val as Record)["length"]; + const len = (val as { readonly length?: unknown }).length; if (typeof len === "number" && len >= 0 && Number.isInteger(len)) { return true; } @@ -134,7 +139,7 @@ export function isArrayLike(val: unknown): boolean { if (typeof val !== "object" && typeof val !== "function") { return false; } - const len = (val as Record)["length"]; + const len = (val as { readonly length?: unknown }).length; return typeof len === "number" && len >= 0 && Number.isInteger(len); } @@ -192,7 +197,7 @@ export function isIterator(val: unknown): boolean { if (typeof val !== "object" && typeof val !== "function") { return false; } - return typeof (val as Record)["next"] === "function"; + return typeof (val as { readonly next?: unknown }).next === "function"; } /** diff --git a/src/core/attrs.ts b/src/core/attrs.ts index 81c6be1c..771f5325 100644 --- a/src/core/attrs.ts +++ b/src/core/attrs.ts @@ -227,7 +227,9 @@ export function setAttr(obj: object, key: string, value: unknown): void { */ export function deleteAttr(obj: object, key: string): void { const existing = registry.get(obj); - if (existing === undefined) return; + if (existing === undefined) { + return; + } const { [key]: _removed, ...rest } = existing; if (Object.keys(rest).length === 0) { registry.delete(obj); diff --git a/src/core/frame.ts b/src/core/frame.ts index 1e260fe0..578cc4b8 100644 --- a/src/core/frame.ts +++ b/src/core/frame.ts @@ -100,12 +100,19 @@ export class DataFrame { * Low-level constructor. Prefer the static factory methods for typical use. * * @param columns - Ordered map of column name → Series (all same length and index). - * @param index - Row index (must match each Series' length). + * @param index - Row index (must match each Series' length). Defaults to a + * `RangeIndex` derived from the first Series when omitted. */ - constructor(columns: ReadonlyMap>, index: Index