Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
5af8b59
Iteration 47: Add 15 new benchmark pairs
github-actions[bot] Apr 13, 2026
650cc47
Iteration 57: Rebuild 97 pairs from hashed branches + 60 new (iter52-…
github-actions[bot] Apr 14, 2026
eb4c4fd
Iteration 60: Add 29 new benchmark pairs (157 recovered + 29 new = 18…
github-actions[bot] Apr 14, 2026
bd235a2
Iteration 68: Add 48 new benchmark pairs (186 → 234 total)
github-actions[bot] Apr 14, 2026
818729d
Iteration 72: Add 17 new benchmark pairs (251 total, +5 vs best 246)
github-actions[bot] Apr 14, 2026
8f1c852
Iteration 74: Add 8 new benchmark pairs (259 total, +3 vs best 256)
github-actions[bot] Apr 14, 2026
dfa2c7c
Iteration 75: Add 6 new benchmark pairs (265 total)
github-actions[bot] Apr 14, 2026
05a64d7
Iteration 80: Add 18 new benchmark pairs (269 total, +18 vs best 251)
github-actions[bot] Apr 14, 2026
4e51e6d
Iteration 94: Add 12 new benchmark pairs (281 total, +12 vs best 269)
github-actions[bot] Apr 15, 2026
541e868
Iteration 95: Add 12 new benchmark pairs (293 total, +12 vs best 281)
github-actions[bot] Apr 15, 2026
311c284
Iteration 97: Add 12 new benchmark pairs (305 total, +12 vs best 293)
github-actions[bot] Apr 15, 2026
bee51f7
Iteration 99: Add 12 new benchmark pairs (317 total, +12 vs best 305)
github-actions[bot] Apr 15, 2026
f79269f
Iteration 102: Add 9 new benchmark pairs (326 total, +9 vs best 317)
github-actions[bot] Apr 15, 2026
62b943a
Iteration 106: Add 6 new benchmark pairs (332 total, +6 vs best 326)
github-actions[bot] Apr 15, 2026
67d0546
Iteration 114: value_type_checks + dtype_predicates benchmarks
github-actions[bot] Apr 15, 2026
8bbcb23
Iteration 115: Add 6 new benchmark pairs (340 total, +6 vs best 334)
github-actions[bot] Apr 15, 2026
a0d31b4
Iteration 116: Add 5 benchmark pairs (345 total, +5 vs best 340)
github-actions[bot] Apr 15, 2026
0fbd161
Iteration 127: Add 8 benchmark pairs (353 total, re-establishing from…
github-actions[bot] Apr 16, 2026
14687fd
Merge origin/main to resolve PR conflicts
Copilot Apr 16, 2026
e75b53e
Iteration 130: Add 8 benchmark pairs (364 total, +8 vs best 356)
github-actions[bot] Apr 16, 2026
a92abcc
Iteration 131: Add 8 benchmark pairs (372 total, +8 vs best 364)
github-actions[bot] Apr 16, 2026
28a1a07
Iteration 132: Add 8 benchmark pairs (380 total, +8 vs best 372)
github-actions[bot] Apr 16, 2026
a06fe02
chore: trigger CI rerun after action_required runs
Copilot Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
17 changes: 17 additions & 0 deletions benchmarks/pandas/bench_apply_dataframe_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Benchmark: DataFrame.map formatter on 10k-row DataFrame"""
import json, time
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10
df = pd.DataFrame({"a": [i * 1.234 for i in range(ROWS)], "b": [i * 5.678 for i in range(ROWS)]})

for _ in range(WARMUP):
df.map(lambda v: f"{v:.2f}")

start = time.perf_counter()
for _ in range(ITERATIONS):
df.map(lambda v: f"{v:.2f}")
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "apply_dataframe_formatter", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
17 changes: 17 additions & 0 deletions benchmarks/pandas/bench_apply_series_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Benchmark: apply formatter to 100k-element pandas Series"""
import json, time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10
s = pd.Series([i * 1.234 for i in range(ROWS)])

for _ in range(WARMUP):
s.map(lambda v: f"{v:.2f}")

start = time.perf_counter()
for _ in range(ITERATIONS):
s.map(lambda v: f"{v:.2f}")
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "apply_series_formatter", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
18 changes: 18 additions & 0 deletions benchmarks/pandas/bench_arange_linspace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Benchmark: np.arange and np.linspace generating 100k-element arrays"""
import json, time
import numpy as np

N = 100_000
WARMUP = 3
ITERATIONS = 10

for _ in range(WARMUP):
np.arange(0, N, 1)
np.linspace(0, 1, N)

start = time.perf_counter()
for _ in range(ITERATIONS):
np.arange(0, N, 1)
np.linspace(0, 1, N)
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "arange_linspace", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_astype_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: Series.astype() — cast Series dtype."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

float_series = pd.Series([i * 1.5 for i in range(SIZE)])
int_series = pd.Series([i for i in range(SIZE)])

for _ in range(WARMUP):
float_series.astype("int32")
int_series.astype("float64")
int_series.astype("str")

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
float_series.astype("int32")
int_series.astype("float64")
int_series.astype("str")
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"astype_series","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
30 changes: 30 additions & 0 deletions benchmarks/pandas/bench_attrs_advanced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Benchmark: pandas Series attrs advanced — individual attr get/set/delete/copy/merge"""
import json, time
import pandas as pd

WARMUP = 3
ITERATIONS = 1_000

s = pd.Series(range(1_000))
s2 = pd.Series(range(1_000))

for _ in range(WARMUP):
s.attrs["unit"] = "meters"
_ = s.attrs.get("unit")
_ = bool(s.attrs)
s2.attrs.update(dict(s.attrs))
s.attrs.update({"version": 1})
s.attrs.pop("unit", None)
s.attrs.clear()

start = time.perf_counter()
for i in range(ITERATIONS):
s.attrs["unit"] = "meters"
_ = s.attrs.get("unit")
_ = bool(s.attrs)
s2.attrs.update(dict(s.attrs))
s.attrs.update({"version": i})
s.attrs.pop("unit", None)
s.attrs.clear()
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "attrs_advanced", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
15 changes: 15 additions & 0 deletions benchmarks/pandas/bench_attrs_count_keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd, time, json
N = 100_000
s = pd.Series(range(N))
s.attrs = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8}
WARMUP = 3
ITERS = 10_000
for _ in range(WARMUP):
_ = len(s.attrs)
_ = list(s.attrs.keys())
t0 = time.perf_counter()
for _ in range(ITERS):
_ = len(s.attrs)
_ = list(s.attrs.keys())
total = (time.perf_counter() - t0) * 1000
print(json.dumps({"function": "attrs_count_keys", "mean_ms": total / ITERS, "iterations": ITERS, "total_ms": total}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_attrs_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas as pd, time, json
N = 10_000
s = pd.Series(range(N))
attrs_data = {"unit": "meters", "created": "2024-01-01", "source": "sensor-1", "version": 2}
WARMUP = 3
ITERS = 100
for _ in range(WARMUP):
s.attrs.update(attrs_data)
_ = dict(s.attrs)
s.attrs["version"] = 99
s2 = s.copy()
s2.attrs.update({"extra": "x"})
t0 = time.perf_counter()
for i in range(ITERS):
s.attrs.update(attrs_data)
_ = dict(s.attrs)
s.attrs["version"] = i
s2 = s.copy()
s2.attrs.update({"extra": "x"})
total = (time.perf_counter() - t0) * 1000
print(json.dumps({"function": "attrs_ops", "mean_ms": total / ITERS, "iterations": ITERS, "total_ms": total}))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_between.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Series.between() — element-wise range check."""
import json, time
import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

s = pd.Series([float(i) for i in range(SIZE)])

for _ in range(WARMUP):
s.between(25000.0, 75000.0)

times = []
for _ in range(ITERATIONS):
t0 = time.perf_counter()
s.between(25000.0, 75000.0)
times.append((time.perf_counter() - t0) * 1000)

total_ms = sum(times)
print(json.dumps({"function":"between","mean_ms":round(total_ms/ITERATIONS,3),"iterations":ITERATIONS,"total_ms":round(total_ms,3)}))
23 changes: 23 additions & 0 deletions benchmarks/pandas/bench_cat_add_remove_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Benchmark: cat_add_remove_categories — pandas CategoricalIndex add_categories/remove_categories on 100k-element Series"""
import json
import time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

cats = ["a", "b", "c", "d"]
s = pd.Categorical([cats[i % len(cats)] for i in range(ROWS)], categories=cats)

for _ in range(WARMUP):
_ = s.add_categories(["e", "f"])
_ = s.remove_categories(["d"])

start = time.perf_counter()
for _ in range(ITERATIONS):
_ = s.add_categories(["e", "f"])
_ = s.remove_categories(["d"])
total = (time.perf_counter() - start) * 1000

print(json.dumps({"function": "cat_add_remove_categories", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
20 changes: 20 additions & 0 deletions benchmarks/pandas/bench_cat_cross_tab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Benchmark: pd.crosstab on two 100k-element categorical Series"""
import json, time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10
cats1 = ["a", "b", "c", "d"]
cats2 = ["x", "y", "z"]
s1 = pd.Series([cats1[i % 4] for i in range(ROWS)])
s2 = pd.Series([cats2[i % 3] for i in range(ROWS)])

for _ in range(WARMUP):
pd.crosstab(s1, s2)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.crosstab(s1, s2)
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "cat_cross_tab", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
22 changes: 22 additions & 0 deletions benchmarks/pandas/bench_cat_equal_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Benchmark: compare categorical categories equality (10k iterations)"""
import json, time
import pandas as pd

WARMUP = 3
ITERATIONS = 10
cats1 = ["cat_0", "cat_1", "cat_2"]
cats2 = ["cat_0", "cat_1", "cat_2"]
c1 = pd.CategoricalDtype(categories=cats1)
c2 = pd.CategoricalDtype(categories=cats2)
REPS = 10_000

for _ in range(WARMUP):
for _ in range(REPS):
set(c1.categories) == set(c2.categories)

start = time.perf_counter()
for _ in range(ITERATIONS):
for _ in range(REPS):
set(c1.categories) == set(c2.categories)
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "cat_equal_categories", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
18 changes: 18 additions & 0 deletions benchmarks/pandas/bench_cat_freq_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Benchmark: value_counts on 100k-element categorical Series"""
import json, time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10
cats = ["low", "med", "high", "ultra"]
s = pd.Series([cats[i % 4] for i in range(ROWS)])

for _ in range(WARMUP):
s.value_counts()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.value_counts()
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "cat_freq_table", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
20 changes: 20 additions & 0 deletions benchmarks/pandas/bench_cat_recode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Benchmark: catRecode on 100k-element categorical Series"""
import json, time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10
cats = ["a", "b", "c"]
data = [cats[i % 3] for i in range(ROWS)]
s = pd.Series(pd.Categorical(data))
rmap = {"a": "x", "b": "y", "c": "z"}

for _ in range(WARMUP):
s.cat.rename_categories(rmap)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.cat.rename_categories(rmap)
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "cat_recode", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
29 changes: 29 additions & 0 deletions benchmarks/pandas/bench_cat_remove_unused.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Benchmark: cat_remove_unused — pd.Categorical.remove_unused_categories() on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

cats = ["a", "b", "c"]
data = [cats[i % len(cats)] for i in range(ROWS)]
# Add unused categories
cat_type = pd.CategoricalDtype(categories=["a", "b", "c", "x", "y", "z"])
s = pd.Series(data, dtype=cat_type)

for _ in range(WARMUP):
s.cat.remove_unused_categories()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.cat.remove_unused_categories()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "cat_remove_unused",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
23 changes: 23 additions & 0 deletions benchmarks/pandas/bench_cat_rename_set_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Benchmark: cat_rename_set_categories — pandas Categorical rename_categories/set_categories on 100k-element Series"""
import json
import time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

cats = ["a", "b", "c", "d"]
s = pd.Categorical([cats[i % len(cats)] for i in range(ROWS)], categories=cats)

for _ in range(WARMUP):
_ = s.rename_categories({"a": "alpha", "b": "beta"})
_ = s.set_categories(["a", "b", "c", "d", "e"])

start = time.perf_counter()
for _ in range(ITERATIONS):
_ = s.rename_categories({"a": "alpha", "b": "beta"})
_ = s.set_categories(["a", "b", "c", "d", "e"])
total = (time.perf_counter() - start) * 1000

print(json.dumps({"function": "cat_rename_set_categories", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
25 changes: 25 additions & 0 deletions benchmarks/pandas/bench_cat_reorder_as_ordered.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Benchmark: cat_reorder_as_ordered — pandas Categorical reorder_categories/as_ordered/as_unordered on 100k-element Series"""
import json
import time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

cats = ["a", "b", "c", "d"]
s = pd.Categorical([cats[i % len(cats)] for i in range(ROWS)], categories=cats)

for _ in range(WARMUP):
_ = s.reorder_categories(["d", "c", "b", "a"])
_ = s.as_ordered()
_ = s.as_unordered()

start = time.perf_counter()
for _ in range(ITERATIONS):
_ = s.reorder_categories(["d", "c", "b", "a"])
_ = s.as_ordered()
_ = s.as_unordered()
total = (time.perf_counter() - start) * 1000

print(json.dumps({"function": "cat_reorder_as_ordered", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
24 changes: 24 additions & 0 deletions benchmarks/pandas/bench_cat_set_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Benchmark: categorical set operations (union, intersect, diff)"""
import json, time
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10
cats1 = [f"cat_{i}" for i in range(500)]
cats2 = [f"cat_{i+250}" for i in range(500)]
c1 = pd.CategoricalDtype(categories=cats1)
c2 = pd.CategoricalDtype(categories=cats2)

for _ in range(WARMUP):
set(c1.categories) | set(c2.categories)
set(c1.categories) & set(c2.categories)
set(c1.categories) - set(c2.categories)

start = time.perf_counter()
for _ in range(ITERATIONS):
set(c1.categories) | set(c2.categories)
set(c1.categories) & set(c2.categories)
set(c1.categories) - set(c2.categories)
total = (time.perf_counter() - start) * 1000
print(json.dumps({"function": "cat_set_ops", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total}))
Loading
Loading