githubnext · github-actions · Apr 13, 2026
diff --git a/benchmarks/pandas/bench_expanding_mean.py b/benchmarks/pandas/bench_expanding_mean.py
@@ -0,0 +1,38 @@
+"""
+Benchmark: Expanding mean
+
+Computes the expanding mean of a large numeric Series.
+Outputs JSON: {"function": "expanding_mean", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 50_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [i * 1.1 + 0.5 for i in range(SIZE)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.expanding().mean()
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    s.expanding().mean()
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "expanding_mean",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_melt.py b/benchmarks/pandas/bench_melt.py
@@ -0,0 +1,47 @@
+"""
+Benchmark: DataFrame melt (unpivot)
+
+Creates a wide DataFrame and melts it into long format.
+Outputs JSON: {"function": "melt", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 5
+ITERATIONS = 50
+
+
+def make_frame() -> pd.DataFrame:
+    return pd.DataFrame({
+        "id": list(range(ROWS)),
+        "a": [i * 1.1 for i in range(ROWS)],
+        "b": [i * 2.2 for i in range(ROWS)],
+        "c": [i * 3.3 for i in range(ROWS)],
+    })
+
+
+df = make_frame()
+
+for _ in range(WARMUP):
+    df.melt(id_vars=["id"], value_vars=["a", "b", "c"])
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    df.melt(id_vars=["id"], value_vars=["a", "b", "c"])
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "melt",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_nlargest.py b/benchmarks/pandas/bench_nlargest.py
@@ -0,0 +1,39 @@
+"""
+Benchmark: Series nlargest
+
+Returns the N largest values from a large numeric Series.
+Outputs JSON: {"function": "nlargest", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+N = 100
+WARMUP = 5
+ITERATIONS = 50
+
+data = [(i * 7919) % SIZE for i in range(SIZE)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.nlargest(N)
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    s.nlargest(N)
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "nlargest",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_pearson_corr.py b/benchmarks/pandas/bench_pearson_corr.py
@@ -0,0 +1,38 @@
+"""
+Benchmark: Pearson correlation
+
+Computes the Pearson correlation coefficient between two large numeric Series.
+Outputs JSON: {"function": "pearson_corr", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+x = pd.Series([i * 1.1 + 0.5 for i in range(SIZE)])
+y = pd.Series([i * 0.9 - 0.3 for i in range(SIZE)])
+
+for _ in range(WARMUP):
+    x.corr(y, method="pearson")
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    x.corr(y, method="pearson")
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "pearson_corr",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_rank.py b/benchmarks/pandas/bench_rank.py
@@ -0,0 +1,38 @@
+"""
+Benchmark: Series rank
+
+Ranks a large numeric Series using average tie-breaking.
+Outputs JSON: {"function": "rank", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [float((i // 3) * 1.5) for i in range(SIZE)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+    s.rank(method="average")
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    s.rank(method="average")
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "rank",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_read_json.py b/benchmarks/pandas/bench_read_json.py
@@ -0,0 +1,42 @@
+"""
+Benchmark: DataFrame read_json
+
+Parses a JSON string into a DataFrame (records orient).
+Outputs JSON: {"function": "read_json", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+import io
+
+import pandas as pd
+
+ROWS = 5_000
+WARMUP = 5
+ITERATIONS = 50
+
+records = [
+    {"id": i, "x": i * 1.1, "y": i * 2.2, "label": f"item_{i % 100}"}
+    for i in range(ROWS)
+]
+json_str = json.dumps(records)
+
+for _ in range(WARMUP):
+    pd.read_json(io.StringIO(json_str))
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    pd.read_json(io.StringIO(json_str))
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "read_json",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_to_csv.py b/benchmarks/pandas/bench_to_csv.py
@@ -0,0 +1,43 @@
+"""
+Benchmark: DataFrame to_csv
+
+Serializes a large DataFrame to a CSV string.
+Outputs JSON: {"function": "to_csv", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+import io
+
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 5
+ITERATIONS = 50
+
+df = pd.DataFrame({
+    "id": list(range(ROWS)),
+    "x": [i * 1.1 for i in range(ROWS)],
+    "y": [i * 2.2 for i in range(ROWS)],
+    "label": [f"item_{i % 100}" for i in range(ROWS)],
+})
+
+for _ in range(WARMUP):
+    df.to_csv(index=False)
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    df.to_csv(index=False)
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "to_csv",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))
diff --git a/benchmarks/pandas/bench_zscore.py b/benchmarks/pandas/bench_zscore.py
@@ -0,0 +1,41 @@
+"""
+Benchmark: Series zscore (z-score normalization)
+
+Computes the z-score of a large numeric Series.
+Outputs JSON: {"function": "zscore", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [i * 1.1 + 0.5 for i in range(SIZE)]
+s = pd.Series(data)
+
+def zscore(series: pd.Series) -> pd.Series:
+    return (series - series.mean()) / series.std(ddof=1)
+
+for _ in range(WARMUP):
+    zscore(s)
+
+times: "list[float]" = []
+for _ in range(ITERATIONS):
+    start = time.perf_counter()
+    zscore(s)
+    end = time.perf_counter()
+    times.append((end - start) * 1000)
+
+total_ms = sum(times)
+mean_ms = total_ms / ITERATIONS
+
+print(json.dumps({
+    "function": "zscore",
+    "mean_ms": round(mean_ms, 3),
+    "iterations": ITERATIONS,
+    "total_ms": round(total_ms, 3),
+}))