Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions benchmarks/pandas/bench_dataframe_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Benchmark: DataFrame correlation matrix on 10k-row x 5-column DataFrame"""
import json, time
import numpy as np
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10

df = pd.DataFrame({
"A": np.sin(np.arange(ROWS) * 0.01),
"B": np.cos(np.arange(ROWS) * 0.01),
"C": np.sin(np.arange(ROWS) * 0.02),
"D": np.cos(np.arange(ROWS) * 0.02),
"E": np.sin(np.arange(ROWS) * 0.03),
})

for _ in range(WARMUP):
df.corr()

start = time.perf_counter()
for _ in range(ITERATIONS):
df.corr()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "dataframe_corr",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_expanding_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: expanding mean on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.expanding().mean()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.expanding().mean()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "expanding_mean",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
29 changes: 29 additions & 0 deletions benchmarks/pandas/bench_melt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Benchmark: melt (wide to long) on 10k-row DataFrame"""
import json, time
import numpy as np
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10

df = pd.DataFrame({
"A": np.arange(ROWS) * 0.1,
"B": np.arange(ROWS) * 0.2,
"C": np.arange(ROWS) * 0.3,
})

for _ in range(WARMUP):
df.melt(value_vars=["A", "B", "C"])

start = time.perf_counter()
for _ in range(ITERATIONS):
df.melt(value_vars=["A", "B", "C"])
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "melt",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_min_max_normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: min-max normalization on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01) * 100 + 50
s = pd.Series(data)

for _ in range(WARMUP):
(s - s.min()) / (s.max() - s.min())

start = time.perf_counter()
for _ in range(ITERATIONS):
(s - s.min()) / (s.max() - s.min())
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "min_max_normalize",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
28 changes: 28 additions & 0 deletions benchmarks/pandas/bench_pearson_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Benchmark: Pearson correlation between two 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

a = np.sin(np.arange(ROWS) * 0.01)
b = np.cos(np.arange(ROWS) * 0.01)
sa = pd.Series(a)
sb = pd.Series(b)

for _ in range(WARMUP):
sa.corr(sb)

start = time.perf_counter()
for _ in range(ITERATIONS):
sa.corr(sb)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "pearson_corr",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_rolling_std.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: rolling standard deviation with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).std()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).std()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "rolling_std",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_rolling_sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: rolling sum with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).sum()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).sum()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "rolling_sum",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_series_nlargest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: nlargest on 100k-element Series (top 1000)"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01) * 1000
s = pd.Series(data)

for _ in range(WARMUP):
s.nlargest(1000)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.nlargest(1000)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "series_nlargest",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_series_rank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: Series rank on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01) * 1000
s = pd.Series(data)

for _ in range(WARMUP):
s.rank()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rank()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "series_rank",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
30 changes: 30 additions & 0 deletions benchmarks/pandas/bench_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Benchmark: to_csv — serialize a 10k-row DataFrame to CSV string"""
import json, time
import numpy as np
import pandas as pd
import io

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10

df = pd.DataFrame({
"id": np.arange(ROWS, dtype=float),
"value": np.arange(ROWS) * 1.1,
"score": np.sin(np.arange(ROWS) * 0.01),
})

for _ in range(WARMUP):
df.to_csv(index=False)

start = time.perf_counter()
for _ in range(ITERATIONS):
df.to_csv(index=False)
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "to_csv",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
29 changes: 29 additions & 0 deletions benchmarks/pandas/bench_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Benchmark: to_json — serialize a 10k-row DataFrame to JSON string"""
import json, time
import numpy as np
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 10

df = pd.DataFrame({
"id": np.arange(ROWS, dtype=float),
"value": np.arange(ROWS) * 1.1,
"score": np.sin(np.arange(ROWS) * 0.01),
})

for _ in range(WARMUP):
df.to_json(orient="records")

start = time.perf_counter()
for _ in range(ITERATIONS):
df.to_json(orient="records")
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "to_json",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
26 changes: 26 additions & 0 deletions benchmarks/pandas/bench_zscore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Benchmark: zscore normalization on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01) * 100 + 50
s = pd.Series(data)

for _ in range(WARMUP):
(s - s.mean()) / s.std()

start = time.perf_counter()
for _ in range(ITERATIONS):
(s - s.mean()) / s.std()
total = (time.perf_counter() - start) * 1000

print(json.dumps({
"function": "zscore",
"mean_ms": total / ITERATIONS,
"iterations": ITERATIONS,
"total_ms": total,
}))
35 changes: 35 additions & 0 deletions benchmarks/tsb/bench_dataframe_corr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/**
* Benchmark: DataFrame correlation matrix on 10k-row x 5-column DataFrame
*/
import { DataFrame, dataFrameCorr } from "../../src/index.js";

const ROWS = 10_000;
const WARMUP = 3;
const ITERATIONS = 10;

const df = new DataFrame({
A: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.01)),
B: Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.01)),
C: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.02)),
D: Float64Array.from({ length: ROWS }, (_, i) => Math.cos(i * 0.02)),
E: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i * 0.03)),
});

for (let i = 0; i < WARMUP; i++) {
dataFrameCorr(df);
}

const start = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
dataFrameCorr(df);
}
const total = performance.now() - start;

console.log(
JSON.stringify({
function: "dataframe_corr",
mean_ms: total / ITERATIONS,
iterations: ITERATIONS,
total_ms: total,
}),
);
Loading