Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions benchmarks/pandas/bench_expanding_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Benchmark: Expanding mean

Computes the expanding mean of a large numeric Series.
Outputs JSON: {"function": "expanding_mean", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

SIZE = 50_000
WARMUP = 5
ITERATIONS = 50

data = [i * 1.1 + 0.5 for i in range(SIZE)]
s = pd.Series(data)

for _ in range(WARMUP):
s.expanding().mean()

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
s.expanding().mean()
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "expanding_mean",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
47 changes: 47 additions & 0 deletions benchmarks/pandas/bench_melt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Benchmark: DataFrame melt (unpivot)

Creates a wide DataFrame and melts it into long format.
Outputs JSON: {"function": "melt", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

ROWS = 10_000
WARMUP = 5
ITERATIONS = 50


def make_frame() -> pd.DataFrame:
return pd.DataFrame({
"id": list(range(ROWS)),
"a": [i * 1.1 for i in range(ROWS)],
"b": [i * 2.2 for i in range(ROWS)],
"c": [i * 3.3 for i in range(ROWS)],
})


df = make_frame()

for _ in range(WARMUP):
df.melt(id_vars=["id"], value_vars=["a", "b", "c"])

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
df.melt(id_vars=["id"], value_vars=["a", "b", "c"])
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "melt",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
39 changes: 39 additions & 0 deletions benchmarks/pandas/bench_nlargest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
Benchmark: Series nlargest

Returns the N largest values from a large numeric Series.
Outputs JSON: {"function": "nlargest", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

SIZE = 100_000
N = 100
WARMUP = 5
ITERATIONS = 50

data = [(i * 7919) % SIZE for i in range(SIZE)]
s = pd.Series(data)

for _ in range(WARMUP):
s.nlargest(N)

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
s.nlargest(N)
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "nlargest",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
38 changes: 38 additions & 0 deletions benchmarks/pandas/bench_pearson_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Benchmark: Pearson correlation

Computes the Pearson correlation coefficient between two large numeric Series.
Outputs JSON: {"function": "pearson_corr", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

x = pd.Series([i * 1.1 + 0.5 for i in range(SIZE)])
y = pd.Series([i * 0.9 - 0.3 for i in range(SIZE)])

for _ in range(WARMUP):
x.corr(y, method="pearson")

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
x.corr(y, method="pearson")
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "pearson_corr",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
38 changes: 38 additions & 0 deletions benchmarks/pandas/bench_rank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Benchmark: Series rank

Ranks a large numeric Series using average tie-breaking.
Outputs JSON: {"function": "rank", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

data = [float((i // 3) * 1.5) for i in range(SIZE)]
s = pd.Series(data)

for _ in range(WARMUP):
s.rank(method="average")

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
s.rank(method="average")
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "rank",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
42 changes: 42 additions & 0 deletions benchmarks/pandas/bench_read_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Benchmark: DataFrame read_json

Parses a JSON string into a DataFrame (records orient).
Outputs JSON: {"function": "read_json", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time
import io

import pandas as pd

ROWS = 5_000
WARMUP = 5
ITERATIONS = 50

records = [
{"id": i, "x": i * 1.1, "y": i * 2.2, "label": f"item_{i % 100}"}
for i in range(ROWS)
]
json_str = json.dumps(records)

for _ in range(WARMUP):
pd.read_json(io.StringIO(json_str))

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
pd.read_json(io.StringIO(json_str))
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "read_json",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
43 changes: 43 additions & 0 deletions benchmarks/pandas/bench_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Benchmark: DataFrame to_csv

Serializes a large DataFrame to a CSV string.
Outputs JSON: {"function": "to_csv", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time
import io

import pandas as pd

ROWS = 10_000
WARMUP = 5
ITERATIONS = 50

df = pd.DataFrame({
"id": list(range(ROWS)),
"x": [i * 1.1 for i in range(ROWS)],
"y": [i * 2.2 for i in range(ROWS)],
"label": [f"item_{i % 100}" for i in range(ROWS)],
})

for _ in range(WARMUP):
df.to_csv(index=False)

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
df.to_csv(index=False)
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "to_csv",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
41 changes: 41 additions & 0 deletions benchmarks/pandas/bench_zscore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Benchmark: Series zscore (z-score normalization)

Computes the z-score of a large numeric Series.
Outputs JSON: {"function": "zscore", "mean_ms": ..., "iterations": ..., "total_ms": ...}
"""

import json
import time

import pandas as pd

SIZE = 100_000
WARMUP = 5
ITERATIONS = 50

data = [i * 1.1 + 0.5 for i in range(SIZE)]
s = pd.Series(data)

def zscore(series: pd.Series) -> pd.Series:
return (series - series.mean()) / series.std(ddof=1)

for _ in range(WARMUP):
zscore(s)

times: "list[float]" = []
for _ in range(ITERATIONS):
start = time.perf_counter()
zscore(s)
end = time.perf_counter()
times.append((end - start) * 1000)

total_ms = sum(times)
mean_ms = total_ms / ITERATIONS

print(json.dumps({
"function": "zscore",
"mean_ms": round(mean_ms, 3),
"iterations": ITERATIONS,
"total_ms": round(total_ms, 3),
}))
Loading