Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_cat_from_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: Categorical from codes on 100k-element array"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

categories = ["apple", "banana", "cherry", "date", "elderberry"]
codes = np.arange(ROWS) % len(categories)

for _ in range(WARMUP):
pd.Categorical.from_codes(codes, categories=categories)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.Categorical.from_codes(codes, categories=categories)
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "cat_from_codes", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_cut.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: cut (bin into 10 bins) on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = (np.arange(ROWS) % 10000) * 0.01
s = pd.Series(data)

for _ in range(WARMUP):
pd.cut(s, 10)

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.cut(s, 10)
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "cut", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
22 changes: 22 additions & 0 deletions benchmarks/pandas/bench_dataframe_cov.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Benchmark: DataFrame covariance matrix on 1000x10 DataFrame"""
import json, time
import numpy as np
import pandas as pd

ROWS = 1_000
COLS = 10
WARMUP = 3
ITERATIONS = 10

data = {f"col{c}": np.sin(np.arange(ROWS) * 0.01 + c) for c in range(COLS)}
df = pd.DataFrame(data)

for _ in range(WARMUP):
df.cov()

start = time.perf_counter()
for _ in range(ITERATIONS):
df.cov()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "dataframe_cov", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_ewm_std.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: ewm std (alpha=0.1) on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.ewm(alpha=0.1).std()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.ewm(alpha=0.1).std()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "ewm_std", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_ewm_var.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: ewm var (alpha=0.1) on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.ewm(alpha=0.1).var()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.ewm(alpha=0.1).var()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "ewm_var", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_expanding_std.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: expanding std on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.expanding().std()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.expanding().std()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "expanding_std", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_expanding_sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: expanding sum on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.expanding().sum()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.expanding().sum()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "expanding_sum", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_expanding_var.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: expanding var on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.expanding().var()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.expanding().var()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "expanding_var", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
29 changes: 29 additions & 0 deletions benchmarks/pandas/bench_insert_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Benchmark: DataFrame insert column on 10000x3 DataFrame"""
import json, time
import numpy as np
import pandas as pd

ROWS = 10_000
WARMUP = 3
ITERATIONS = 20

new_col = np.arange(ROWS, dtype=float) * 4

def make_df():
return pd.DataFrame({
"a": np.arange(ROWS, dtype=float),
"b": np.arange(ROWS, dtype=float) * 2,
"c": np.arange(ROWS, dtype=float) * 3,
})

for _ in range(WARMUP):
df = make_df()
df.insert(1, "new_col", new_col)

start = time.perf_counter()
for _ in range(ITERATIONS):
df = make_df()
df.insert(1, "new_col", new_col)
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "insert_column", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_qcut.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: qcut (10 quantile bins) on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = (np.arange(ROWS) % 10000) * 0.01
s = pd.Series(data)

for _ in range(WARMUP):
pd.qcut(s, 10, duplicates="drop")

start = time.perf_counter()
for _ in range(ITERATIONS):
pd.qcut(s, 10, duplicates="drop")
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "qcut", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling count with window=100 on 100k-element Series (with NaNs)"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS, dtype=float))
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).count()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).count()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_count", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_kurt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling kurt with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).kurt()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).kurt()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_kurt", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling max with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.cos(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).max()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).max()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_max", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_median.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling median with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.1)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).median()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).median()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_median", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_min.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling min with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).min()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).min()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_min", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_quantile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling quantile (0.75) with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).quantile(0.75)

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).quantile(0.75)
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_quantile", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
21 changes: 21 additions & 0 deletions benchmarks/pandas/bench_rolling_sem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Benchmark: rolling SEM with window=100 on 100k-element Series"""
import json, time
import numpy as np
import pandas as pd

ROWS = 100_000
WARMUP = 3
ITERATIONS = 10

data = np.sin(np.arange(ROWS) * 0.01)
s = pd.Series(data)

for _ in range(WARMUP):
s.rolling(100).sem()

start = time.perf_counter()
for _ in range(ITERATIONS):
s.rolling(100).sem()
total = (time.perf_counter() - start) * 1000

print(json.dumps({ "function": "rolling_sem", "mean_ms": total / ITERATIONS, "iterations": ITERATIONS, "total_ms": total }))
Loading