diff --git a/.github/workflows/persistence-performance.yml b/.github/workflows/persistence-performance.yml new file mode 100644 index 00000000..6cc6aa7c --- /dev/null +++ b/.github/workflows/persistence-performance.yml @@ -0,0 +1,26 @@ +name: HF integration tests + +on: + schedule: + - cron: '0 9 * * 0' # every sunday at 9:00 UTC + workflow_dispatch: + +jobs: + check-persistence-performance: + + runs-on: ubuntu-latest + if: "github.repository == 'skops-dev/skops'" + + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + - name: Install requirements + run: | + pip install .[tests] + pip --version + pip list + - name: Run persistence performance checks + run: python scripts/check_persistence_performance.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0bca976d..d95e8edf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.4.0 hooks: - id: check-yaml exclude: .github/conda/meta.yaml @@ -10,11 +10,11 @@ repos: - id: check-case-conflict - id: check-merge-conflict - repo: https://github.com/psf/black - rev: 22.6.0 + rev: 23.1.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + rev: 6.0.0 hooks: - id: flake8 types: [file, python] @@ -23,7 +23,7 @@ repos: hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.0.0 + rev: v1.0.1 hooks: - id: mypy args: [--config-file=pyproject.toml] diff --git a/scripts/check_persistence_performance.py b/scripts/check_persistence_performance.py new file mode 100644 index 00000000..5f6d2ca1 --- /dev/null +++ b/scripts/check_persistence_performance.py @@ -0,0 +1,117 @@ +"""Check that the performance of skops persistence is not too slow + +Load each (fitted) estimator and persist it with pickle and with skops. Measure +the time it takes and record it. Report the estimators that were slowest. If +skops is much slower than pickle (in absolute terms), raise an error to make the +GH action fail. + +""" + +from __future__ import annotations + +import pickle +import timeit +import warnings +from typing import Any + +import pandas as pd +from sklearn.utils._tags import _safe_tags +from sklearn.utils._testing import set_random_state + +import skops.io as sio +from skops.io.tests.test_persist import ( + _get_check_estimator_ids, + _tested_estimators, + get_input, +) + +ATOL = 1 # seconds absolute difference allowed at max +NUM_REPS = 10 # number of times the check is repeated +TOPK = 10 # number of slowest estimators reported + + +def check_persist_performance() -> None: + """Run all performance checks on all estimators and print results. + + For each estimator, record how long it takes to dump+load with pickle and + with skops. If any estimator takes much longer (in absolute time) with skops + than pickle, raise a RuntimeError. Print the worst results to sdtout. + + """ + results: dict[str, list[Any]] = {"name": [], "pickle (s)": [], "skops (s)": []} + for estimator in _tested_estimators(): + set_random_state(estimator, random_state=0) + + X, y = get_input(estimator) + tags = _safe_tags(estimator) + if tags.get("requires_fit", True): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", module="sklearn") + if y is not None: + estimator.fit(X, y) + else: + estimator.fit(X) + + name = _get_check_estimator_ids(estimator) + time_pickle, time_skops = run_check(estimator, number=NUM_REPS) + + results["name"].append(name) + results["pickle (s)"].append(time_pickle) + results["skops (s)"].append(time_skops) + + format_result(results, topk=TOPK) + + +def run_check(estimator, number: int) -> tuple[float, float]: + """Run performance check with the given estimator for pickle and skops. + + The test is run multiple times to get more robust results, ``number`` + indicates how often the it is run. + + """ + + def run_pickle(): + pickle.loads(pickle.dumps(estimator)) + + def run_skops(): + sio.loads(sio.dumps(estimator), trusted=True) + + time_pickle = timeit.timeit(run_pickle, number=number) / number + time_skops = timeit.timeit(run_skops, number=number) / number + + return time_pickle, time_skops + + +def format_result(results: dict[str, list[Any]], topk: int) -> None: + """Report results from performance checks. + + Print the ``topk`` slowest results. If any estimator takes much longer (in + absolute time) with skops than pickle, raise a RuntimeError. + + """ + df = pd.DataFrame(results) + df = df.assign( + abs_diff=df["skops (s)"] - df["pickle (s)"], + rel_diff=df["skops (s)"] / df["pickle (s)"], + too_slow=lambda d: d["abs_diff"] > ATOL, + ) + + df = df.sort_values(["abs_diff"], ascending=False).reset_index(drop=True) + print(f"{topk} largest differences:") + print(df.head(10)) + + df_slow = df.query("too_slow") + if df_slow.empty: + print("No estimator was found to be unacceptably slow") + return + + print( + f"Found {len(df_slow)} estimator(s) that are at least {ATOL:.1f} sec slower " + "with skops:" + ) + print(", ".join(df_slow["name"].tolist())) + raise RuntimeError("Skops persistence too slow") + + +if __name__ == "__main__": + check_persist_performance()