Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
46f4b4c
Iteration 135: Add 8 benchmark pairs (388 total, +8 vs best 380)
github-actions[bot] Apr 16, 2026
9191c69
Iteration 136: Add 8 benchmark pairs (396 total, +8 vs 388)
github-actions[bot] Apr 16, 2026
3165c28
Iteration 137: Add 8 benchmark pairs (404 total, +8 vs 396)
github-actions[bot] Apr 16, 2026
26437c0
Iteration 138: Add 8 benchmark pairs (412 total, +8 vs 404)
github-actions[bot] Apr 16, 2026
bae766a
Iteration 139: Add 8 benchmark pairs (420 total, +8 vs 412)
github-actions[bot] Apr 17, 2026
081cb30
Iteration 142: Add 9 benchmark pairs (429 total, +9 vs 420)
github-actions[bot] Apr 17, 2026
a84aca2
Iteration 143: Add 8 benchmark pairs (437 total, +8 vs 429)
github-actions[bot] Apr 17, 2026
5899214
Iteration 144: Add 8 benchmark pairs (445 total, +8 vs best 437)
github-actions[bot] Apr 17, 2026
8568c4a
Iteration 145: Add 9 benchmark pairs (454 total, +9 vs best 445)
github-actions[bot] Apr 17, 2026
b854980
Iteration 147: Add 8 benchmark pairs (462 total, +2 vs best 460)
github-actions[bot] Apr 17, 2026
e3c731c
Iteration 148: Add 6 benchmark pairs (468 total, +6 vs best 462)
github-actions[bot] Apr 17, 2026
e4521f9
Iteration 150: Add 5 benchmark pairs (473 total, +5 vs best 468)
github-actions[bot] Apr 17, 2026
952d479
Iteration 151: Add 5 benchmark pairs (478 total, +5 vs best 473)
github-actions[bot] Apr 17, 2026
3e89725
Iteration 153: Add 5 benchmark pairs (483 total, +5 vs best 478)
github-actions[bot] Apr 17, 2026
86d054b
Iteration 154: Add 5 benchmark pairs (488 total, +5 vs best 483)
github-actions[bot] Apr 17, 2026
313b4f3
Iteration 155: DataFrameExpanding std/var/sum/count/median/apply + TZ…
github-actions[bot] Apr 17, 2026
df6dab9
Iteration 156: 5 new benchmark pairs
github-actions[bot] Apr 17, 2026
ba235be
Iteration 157: 5 new benchmark pairs (503 total, +5 vs best 498)
github-actions[bot] Apr 17, 2026
5b7ea6d
Iteration 158: 5 new benchmark pairs (508 total, +5 vs best 503)
github-actions[bot] Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions benchmarks/pandas/bench_align_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Benchmark: DataFrame.align — align two 10k-row DataFrames on inner/outer/left join.
Outputs JSON: {"function": "align_dataframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import numpy as np
import pandas as pd

SIZE = 10_000
WARMUP = 5
ITERATIONS = 30

idx_a = [i * 2 for i in range(SIZE)]
idx_b = [i * 3 for i in range(SIZE)]

df_a = pd.DataFrame(
{"x": [i * 1.0 for i in range(SIZE)], "y": [i * 2.0 for i in range(SIZE)], "z": [i * 3.0 for i in range(SIZE)]},
index=idx_a,
)
df_b = pd.DataFrame(
{"y": [i * 10.0 for i in range(SIZE)], "z": [i * 20.0 for i in range(SIZE)], "w": [i * 30.0 for i in range(SIZE)]},
index=idx_b,
)

for _ in range(WARMUP):
df_a.align(df_b, join="inner")
df_a.align(df_b, join="outer")
df_a.align(df_b, join="left")

start = time.perf_counter()
for _ in range(ITERATIONS):
df_a.align(df_b, join="inner")
df_a.align(df_b, join="outer")
df_a.align(df_b, join="left")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "align_dataframe",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
36 changes: 36 additions & 0 deletions benchmarks/pandas/bench_align_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Benchmark: Series.align — align two 50k-element Series on inner/outer/left join.
Outputs JSON: {"function": "align_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import numpy as np
import pandas as pd

SIZE = 50_000
WARMUP = 5
ITERATIONS = 30

idx_a = [i * 2 for i in range(SIZE)]
idx_b = [i * 3 for i in range(SIZE)]
s_a = pd.Series([i * 1.0 for i in range(SIZE)], index=idx_a)
s_b = pd.Series([i * 2.0 for i in range(SIZE)], index=idx_b)

for _ in range(WARMUP):
s_a.align(s_b, join="inner")
s_a.align(s_b, join="outer")
s_a.align(s_b, join="left")

start = time.perf_counter()
for _ in range(ITERATIONS):
s_a.align(s_b, join="inner")
s_a.align(s_b, join="outer")
s_a.align(s_b, join="left")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "align_series",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
24 changes: 24 additions & 0 deletions benchmarks/pandas/bench_argsort_scalars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Benchmark: np.argsort / np.searchsorted — sort/search utilities on 100k-element arrays."""
import json
import time
import numpy as np

SIZE = 100_000
WARMUP = 3
ITERATIONS = 20

arr = np.sin(np.arange(SIZE) * 0.001) * SIZE
sorted_arr = np.sort(arr)
queries = (np.arange(1000) - 500) * SIZE / 500

for _ in range(WARMUP):
np.argsort(arr)
np.searchsorted(sorted_arr, queries)

start = time.perf_counter()
for _ in range(ITERATIONS):
np.argsort(arr)
np.searchsorted(sorted_arr, queries)
total = (time.perf_counter() - start) * 1000

print(json.dumps({"function": "argsort_scalars", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
25 changes: 25 additions & 0 deletions benchmarks/pandas/bench_bdate_range.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Benchmark: pd.bdate_range — generate business-day DatetimeIndex with 1000 periods.
Outputs JSON: {"function": "bdate_range", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import pandas as pd

WARMUP = 5
ITERATIONS = 100

for _ in range(WARMUP):
pd.bdate_range(start="2020-01-01", periods=1000)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.bdate_range(start="2020-01-01", periods=1000)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "bdate_range",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
36 changes: 36 additions & 0 deletions benchmarks/pandas/bench_cast_scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Benchmark: Python type coercion equivalents — int(), float(), str(), bool() conversions.
Outputs JSON: {"function": "cast_scalar", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

int_values = [i % 1000 for i in range(SIZE)]
float_values = [i * 0.5 for i in range(SIZE)]
str_values = [str(i % 1000) for i in range(SIZE)]
bool_values = [i % 2 == 0 for i in range(SIZE)]

for _ in range(WARMUP):
for j in range(SIZE):
int(float_values[j])
float(int_values[j])
int(str_values[j])
int(bool_values[j])

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
for j in range(SIZE):
int(float_values[j])
float(int_values[j])
int(str_values[j])
int(bool_values[j])
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS
print(json.dumps({"function": "cast_scalar", "mean_ms": round(mean_ms, 3), "iterations": ITERATIONS, "total_ms": round(total_ms, 3)}))
42 changes: 42 additions & 0 deletions benchmarks/pandas/bench_cat_codes_accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Benchmark: pd.Categorical.codes / categories / ordered — category accessor properties
on a 100k-element categorical Series.
Outputs JSON: {"function": "cat_codes_accessor", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import numpy as np
import pandas as pd

SIZE = 100_000
CATS = 50
WARMUP = 5
ITERATIONS = 30

categories = [f"cat_{i}" for i in range(CATS)]
data = [categories[i % CATS] for i in range(SIZE)]
s = pd.Categorical(data, categories=categories)
ps = pd.Series(s)

for _ in range(WARMUP):
_ = ps.cat.codes
_ = ps.cat.categories
_ = ps.cat.ordered
_ = len(ps.cat.categories)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
_ = ps.cat.codes
_ = ps.cat.categories
_ = ps.cat.ordered
_ = len(ps.cat.categories)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({
"function": "cat_codes_accessor",
"mean_ms": round(total_ms / ITERATIONS, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
41 changes: 41 additions & 0 deletions benchmarks/pandas/bench_categorical_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Benchmark: pandas.CategoricalIndex — creation, get_loc, add_categories, set operations on 100k elements.
Outputs JSON: {"function": "categorical_index", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 30

CATS = ["alpha", "beta", "gamma", "delta", "epsilon"]
labels = [CATS[i % len(CATS)] for i in range(SIZE)]
ci = pd.CategoricalIndex(labels)
labels2 = [CATS[(i + 2) % len(CATS)] for i in range(SIZE // 2)]
ci2 = pd.CategoricalIndex(labels2)

for _ in range(WARMUP):
pd.CategoricalIndex(labels)
ci.get_loc("beta")
ci.add_categories(["zeta"])
ci.union(ci2)
ci.intersection(ci2)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.CategoricalIndex(labels)
ci.get_loc("beta")
ci.add_categories(["zeta"])
ci.union(ci2)
ci.intersection(ci2)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "categorical_index",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
41 changes: 41 additions & 0 deletions benchmarks/pandas/bench_clip_series_bounds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Benchmark: Series.clip(lower=, upper=) / DataFrame.clip(lower=, upper=) — element-wise clip bounds.
Outputs JSON: {"function": "clip_series_bounds", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

data = np.arange(SIZE) - SIZE / 2
s = pd.Series(data)
lower_s = pd.Series(np.full(SIZE, -10000.0))
upper_s = pd.Series(np.full(SIZE, 10000.0))

df = pd.DataFrame({
"a": np.arange(SIZE) - SIZE / 2,
"b": np.sin(np.arange(SIZE) * 0.01) * 100,
})
lower_df = pd.DataFrame({"a": np.full(SIZE, -10000.0), "b": np.full(SIZE, -50.0)})
upper_df = pd.DataFrame({"a": np.full(SIZE, 10000.0), "b": np.full(SIZE, 50.0)})

for _ in range(WARMUP):
s.clip(lower=lower_s, upper=upper_s)
df.clip(lower=lower_df, upper=upper_df)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.clip(lower=lower_s, upper=upper_s)
df.clip(lower=lower_df, upper=upper_df)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "clip_series_bounds",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
36 changes: 36 additions & 0 deletions benchmarks/pandas/bench_combine_first_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Benchmark: DataFrame.combine_first — fill NaN values from another DataFrame (union of indexes).
Mirrors tsb bench_combine_first_dataframe.ts.
"""
import json, time
import numpy as np
import pandas as pd

SIZE = 5_000
WARMUP = 5
ITERATIONS = 30

rows1 = list(range(SIZE))
data1a = [None if i % 3 == 0 else i * 1.5 for i in range(SIZE)]
data1b = [None if i % 5 == 0 else i * 0.5 for i in range(SIZE)]
df1 = pd.DataFrame({"a": data1a, "b": data1b}, index=rows1)

rows2 = list(range(SIZE + 500))
data2a = [i * 2.0 for i in range(SIZE + 500)]
data2b = [i * 1.0 for i in range(SIZE + 500)]
data2c = [i * 0.1 for i in range(SIZE + 500)]
df2 = pd.DataFrame({"a": data2a, "b": data2b, "c": data2c}, index=rows2)

for _ in range(WARMUP):
df1.combine_first(df2)

start = time.perf_counter()
for _ in range(ITERATIONS):
df1.combine_first(df2)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "combine_first_dataframe",
"mean_ms": round(total / ITERATIONS, 3),
"iterations": ITERATIONS,
"total_ms": round(total, 3),
}))
37 changes: 37 additions & 0 deletions benchmarks/pandas/bench_concat_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
Benchmark: pandas concat with join="inner" and ignore_index=True options.
Outputs JSON: {"function": "concat_options", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""
import json
import time
import pandas as pd

ROWS = 50_000
WARMUP = 5
ITERATIONS = 20

df1 = pd.DataFrame({
"a": [i * 1.0 for i in range(ROWS)],
"b": [i * 2.0 for i in range(ROWS)],
"c": [i * 3.0 for i in range(ROWS)],
})
df2 = pd.DataFrame({
"a": [i * 1.5 for i in range(ROWS)],
"b": [i * 2.5 for i in range(ROWS)],
"d": [i * 4.0 for i in range(ROWS)],
})

for _ in range(WARMUP):
pd.concat([df1, df2], join="inner", ignore_index=True)
pd.concat([df1, df2], join="outer", ignore_index=True)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
pd.concat([df1, df2], join="inner", ignore_index=True)
pd.concat([df1, df2], join="outer", ignore_index=True)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS
print(json.dumps({"function": "concat_options", "mean_ms": round(mean_ms, 3), "iterations": ITERATIONS, "total_ms": round(total_ms, 3)}))
24 changes: 24 additions & 0 deletions benchmarks/pandas/bench_cut_interval_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Benchmark: cutIntervalIndex / qcutIntervalIndex — pd.cut/qcut returning IntervalIndex on 100k-element Series."""
import json
import time
import numpy as np
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 30

data = (np.arange(SIZE) % 1000) * 0.1
s = pd.Series(data)

for _ in range(WARMUP):
pd.cut(s, 20, retbins=False)
pd.qcut(s, 10, duplicates="drop")

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.cut(s, 20, retbins=False)
pd.qcut(s, 10, duplicates="drop")
total = (time.perf_counter() - start) * 1000

print(json.dumps({"function": "cut_interval_index", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
Loading
Loading