Skip to content
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_between.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.between() — element-wise range check."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i) for i in range(SIZE)])

for _ in range(WARMUP):
s.between(25000.0, 75000.0)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.between(25000.0, 75000.0)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"between","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.clip() — clip values to a range."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i) for i in range(SIZE)])

for _ in range(WARMUP):
s.clip(lower=10000.0, upper=90000.0)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.clip(lower=10000.0, upper=90000.0)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"clip","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: DataFrame.corr — pairwise correlation of numeric columns."""
import json, time
import pandas as pd

SIZE = 10_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[float(i*1.1) for i in range(SIZE)],"b":[float(i*0.7+0.3) for i in range(SIZE)],"c":[float(i*-0.5+100) for i in range(SIZE)]})

for _ in range(WARMUP):
df.corr()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.corr()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"corr","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_cov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: DataFrame.cov — pairwise covariance of numeric columns."""
import json, time
import pandas as pd

SIZE = 10_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[float(i*1.1) for i in range(SIZE)],"b":[float(i*0.7+0.3) for i in range(SIZE)],"c":[float(i*-0.5+100) for i in range(SIZE)]})

for _ in range(WARMUP):
df.cov()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.cov()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"cov","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
24 changes: 24 additions & 0 deletions benchmarks/pandas/bench_crosstab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Benchmark: pd.crosstab() — compute a cross-tabulation."""
import json, time
import pandas as pd

SIZE = 50_000
WARMUP = 5
ITERATIONS = 50

import random
random.seed(42)
a = pd.Series([random.choice(["x","y","z"]) for _ in range(SIZE)])
b = pd.Series([random.choice(["p","q","r","s"]) for _ in range(SIZE)])

for _ in range(WARMUP):
pd.crosstab(a, b)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
pd.crosstab(a, b)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"crosstab","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_cut.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: pd.cut() — bin a Series into discrete intervals."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i) for i in range(SIZE)])

for _ in range(WARMUP):
pd.cut(s, bins=10)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
pd.cut(s, bins=10)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"cut","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_dataframe_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: DataFrame.astype() — cast column dtypes."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[float(i) for i in range(SIZE)],"b":[i for i in range(SIZE)]})

for _ in range(WARMUP):
df.astype({"a": "float32", "b": "int32"})

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.astype({"a": "float32", "b": "int32"})
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"dataframe_astype","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
23 changes: 23 additions & 0 deletions benchmarks/pandas/bench_dataframe_head_tail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Benchmark: DataFrame.head() and .tail() — slice first/last N rows."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[float(i) for i in range(SIZE)],"b":[i*2 for i in range(SIZE)],"c":[str(i) for i in range(SIZE)]})

for _ in range(WARMUP):
df.head(100)
df.tail(100)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.head(100)
df.tail(100)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"dataframe_head_tail","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.diff() — first discrete difference."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)])

for _ in range(WARMUP):
s.diff()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.diff()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"diff","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_drop_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: DataFrame.drop_duplicates() — remove duplicate rows."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[i % 1000 for i in range(SIZE)],"b":[i % 500 for i in range(SIZE)]})

for _ in range(WARMUP):
df.drop_duplicates()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.drop_duplicates()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"drop_duplicates","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_duplicated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: DataFrame.duplicated() — detect duplicate rows."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({"a":[i % 1000 for i in range(SIZE)],"b":[i % 500 for i in range(SIZE)]})

for _ in range(WARMUP):
df.duplicated()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.duplicated()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"duplicated","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_expanding_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.expanding().mean() — expanding window mean."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)])

for _ in range(WARMUP):
s.expanding().mean()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.expanding().mean()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"expanding_mean","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
23 changes: 23 additions & 0 deletions benchmarks/pandas/bench_interpolate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Benchmark: Series.interpolate() — linear interpolation over NaN values."""
import json, time
import pandas as pd
import math

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

data = [float(i) if i % 5 != 0 else math.nan for i in range(SIZE)]
s = pd.Series(data)

for _ in range(WARMUP):
s.interpolate(method="linear")

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.interpolate(method="linear")
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"interpolate","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
22 changes: 22 additions & 0 deletions benchmarks/pandas/bench_isin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Benchmark: Series.isin() — membership test."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([i % 5000 for i in range(SIZE)])
test_set = list(range(0, 2500))

for _ in range(WARMUP):
s.isin(test_set)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.isin(test_set)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"isin","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
23 changes: 23 additions & 0 deletions benchmarks/pandas/bench_melt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Benchmark: DataFrame.melt — unpivots wide-format DataFrame to long-format."""
import json, time
import pandas as pd

SIZE = 10_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({f"col{i}": [float(j*i+0.5) for j in range(SIZE)] for i in range(1, 6)})
id_vars = ["col1"]
value_vars = ["col2", "col3", "col4", "col5"]

for _ in range(WARMUP):
df.melt(id_vars=id_vars, value_vars=value_vars)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
df.melt(id_vars=id_vars, value_vars=value_vars)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"melt","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_nlargest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.nlargest() — get the n largest values."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i*1.1+0.5) for i in range(SIZE)])

for _ in range(WARMUP):
s.nlargest(100)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.nlargest(100)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"nlargest","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_pct_change.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.pct_change() — percentage change between elements."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i*1.1+1.0) for i in range(SIZE)])

for _ in range(WARMUP):
s.pct_change()

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.pct_change()
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"pct_change","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
Loading