diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index cfd785f47ebc..b55ee7fed73c 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Copyright 2022 The HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +16,7 @@ import copy import os from dataclasses import dataclass -from typing import Any, Optional +from typing import Any import yaml @@ -33,7 +32,13 @@ "NETWORK_DEBUG_REPORT": True, } # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical -COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None, "random-order-bucket": "module", "random-order-seed": "${CIRCLE_BUILD_NUM:-0}"} +COMMON_PYTEST_OPTIONS = { + "max-worker-restart": 0, + "vvv": None, + "rsfE": None, + "random-order-bucket": "module", + "random-order-seed": "${CIRCLE_BUILD_NUM:-0}", +} DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}] # Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures` @@ -60,14 +65,18 @@ class EmptyJob: job_name = "empty" def to_dict(self): - steps = [{"run": 'ls -la'}] + steps = [{"run": "ls -la"}] if self.job_name == "collection_job": steps.extend( [ "checkout", {"run": "pip install requests || true"}, - {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""}, - {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'}, + { + "run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true""" + }, + { + "run": "python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true" + }, {"store_artifacts": {"path": "outputs"}}, {"run": 'echo "All required jobs have now completed"'}, ] @@ -86,15 +95,15 @@ class CircleCIJob: additional_env: dict[str, Any] = None docker_image: list[dict[str, str]] = None install_steps: list[str] = None - marker: Optional[str] = None - parallelism: Optional[int] = 0 + marker: str | None = None + parallelism: int | None = 0 pytest_num_workers: int = 8 pytest_options: dict[str, Any] = None - resource_class: Optional[str] = "xlarge" - tests_to_run: Optional[list[str]] = None - num_test_files_per_worker: Optional[int] = 10 + resource_class: str | None = "xlarge" + tests_to_run: list[str] | None = None + num_test_files_per_worker: int | None = 10 # This should be only used for doctest job! - command_timeout: Optional[int] = None + command_timeout: int | None = None def __post_init__(self): # Deal with defaults for mutable attributes. @@ -106,7 +115,10 @@ def __post_init__(self): else: # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED print(os.environ.get("GIT_COMMIT_MESSAGE")) - if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci": + if ( + "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") + or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci" + ): self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" print(f"Using {self.docker_image} docker image") if self.install_steps is None: @@ -120,7 +132,7 @@ def __post_init__(self): if isinstance(self.tests_to_run, str): self.tests_to_run = [self.tests_to_run] else: - test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt") + test_file = os.path.join("test_preparation", f"{self.job_name}_test_list.txt") print("Looking for ", test_file) if os.path.exists(test_file): with open(test_file, encoding="utf-8") as f: @@ -139,7 +151,7 @@ def to_dict(self): # fmt: on # Do not run tests decorated by @is_flaky on pull requests - env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == "" + env["RUN_FLAKY"] = os.environ.get("CIRCLE_PULL_REQUEST", "") == "" env.update(self.additional_env) job = { @@ -150,51 +162,90 @@ def to_dict(self): job["resource_class"] = self.resource_class all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options} - pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()] + pytest_flags = [ + f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" + for key, value in all_options.items() + ] pytest_flags.append( f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" ) - # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues + # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else "" marker_cmd = f"-m '{self.marker}'" if self.marker is not None else "" junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS) repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'" - parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> ' + parallel = f" << pipeline.parameters.{self.job_name}_parallelism >> " steps = [ "checkout", {"attach_workspace": {"at": "test_preparation"}}, {"run": "apt-get update && apt-get install -y curl"}, {"run": " && ".join(self.install_steps)}, - {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"}, - {"run": { + { + "run": { + "name": "Download NLTK files", + "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """, + } + if "example" in self.name + else "echo Skipping" + }, + { + "run": { "name": "Show installed libraries and their size", - "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""} + "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true""", + } }, - {"run": { - "name": "Show installed libraries and their versions", - "command": """pip list --format=freeze | tee installed.txt || true"""} + { + "run": { + "name": "Show installed libraries and their versions", + "command": """pip list --format=freeze | tee installed.txt || true""", + } }, - {"run": { - "name": "Show biggest libraries", - "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""} + { + "run": { + "name": "Show biggest libraries", + "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true""", + } }, {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}, - {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}}, - {"run": {"name": "Split tests across parallel nodes: show current parallel tests", - "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" - } + { + "run": { + "name": "Get files to test", + "command": f'curl -L -o {self.job_name}_test_list.txt <> --header "Circle-Token: $CIRCLE_TOKEN"' + if self.name != "pr_documentation_tests" + else 'echo "Skipped"', + } + }, + { + "run": { + "name": "Split tests across parallel nodes: show current parallel tests", + "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" + if self.parallelism + else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt", + } }, # During the CircleCI docker images build time, we might already (or not) download the data. # If it's done already, the files are inside the directory `/test_data/`. - {"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}}, - {"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}}, - {"run": { - "name": "Run tests", - "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} + { + "run": { + "name": "fetch hub objects before pytest", + "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py", + } + }, + { + "run": { + "name": "download and unzip hub cache", + "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/', + } }, - {"run": - { + { + "run": { + "name": "Run tests", + "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)", + } + }, + { + "run": { "name": "Check for test crashes", "when": "always", "command": """if [ ! -f tests_output.txt ]; then @@ -206,12 +257,30 @@ def to_dict(self): exit 1 else echo "Tests output file exists and no worker crashes detected" - fi""" + fi""", }, }, - {"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, - {"run": {"name": "Failed tests: show reasons", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, - {"run": {"name": "Errors", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, + { + "run": { + "name": "Expand to show skipped tests", + "when": "always", + "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip", + } + }, + { + "run": { + "name": "Failed tests: show reasons", + "when": "always", + "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail", + } + }, + { + "run": { + "name": "Errors", + "when": "always", + "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors", + } + }, {"store_test_results": {"path": "test-results"}}, {"store_artifacts": {"path": "test-results/junit.xml"}}, {"store_artifacts": {"path": "reports"}}, @@ -227,7 +296,11 @@ def to_dict(self): @property def job_name(self): - return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}" + return ( + self.name + if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) + else f"tests_{self.name}" + ) # JOBS @@ -263,7 +336,7 @@ def job_name(self): pipelines_torch_job = CircleCIJob( "pipelines_torch", additional_env={"RUN_PIPELINE_TESTS": True}, - docker_image=[{"image":"huggingface/transformers-torch-light"}], + docker_image=[{"image": "huggingface/transformers-torch-light"}], marker="is_pipeline_test", parallelism=4, ) @@ -277,7 +350,7 @@ def job_name(self): examples_torch_job = CircleCIJob( "examples_torch", additional_env={"OMP_NUM_THREADS": 8}, - docker_image=[{"image":"huggingface/transformers-examples-torch"}], + docker_image=[{"image": "huggingface/transformers-examples-torch"}], # TODO @ArthurZucker remove this once docker is easier to build install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], pytest_num_workers=4, @@ -285,14 +358,14 @@ def job_name(self): exotic_models_job = CircleCIJob( "exotic_models", - docker_image=[{"image":"huggingface/transformers-exotic-models"}], + docker_image=[{"image": "huggingface/transformers-exotic-models"}], parallelism=4, pytest_options={"durations": 100}, ) repo_utils_job = CircleCIJob( "repo_utils", - docker_image=[{"image":"huggingface/transformers-consistency"}], + docker_image=[{"image": "huggingface/transformers-consistency"}], pytest_num_workers=4, resource_class="large", ) @@ -333,7 +406,7 @@ def job_name(self): command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt' doc_test_job = CircleCIJob( "pr_documentation_tests", - docker_image=[{"image":"huggingface/transformers-consistency"}], + docker_image=[{"image": "huggingface/transformers-consistency"}], additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"}, install_steps=[ # Add an empty file to keep the test step running correctly even no file is selected to be tested. @@ -341,7 +414,7 @@ def job_name(self): "touch dummy.py", command, "cat pr_documentation_tests_temp.txt", - "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt" + "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt", ], tests_to_run="$(cat pr_documentation_tests.txt)", # noqa pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None}, @@ -349,7 +422,7 @@ def job_name(self): pytest_num_workers=1, ) -REGULAR_TESTS = [torch_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip +REGULAR_TESTS = [torch_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip EXAMPLES_TESTS = [examples_torch_job] PIPELINE_TESTS = [pipelines_torch_job] REPO_UTIL_TESTS = [repo_utils_job] @@ -363,13 +436,16 @@ def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() os.environ["test_preparation_dir"] = folder - jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )] + jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation", f"{k.job_name}_test_list.txt"))] print("The following jobs will be run ", jobs) if len(jobs) == 0: jobs = [EmptyJob()] else: - print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) + print( + "Full list of job name inputs", + {j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs}, + ) # Add a job waiting all the test jobs and aggregate their test summary files at the end collection_job = EmptyJob() collection_job.job_name = "collection_job" @@ -386,19 +462,26 @@ def create_circleci_config(folder=None): "GHA_Event": {"type": "string", "default": ""}, "GHA_Meta": {"type": "string", "default": ""}, "tests_to_run": {"type": "string", "default": ""}, - **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}, - **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs}, + **{j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs}, + **{j.job_name + "_parallelism": {"type": "integer", "default": 1} for j in jobs}, }, - "jobs": {j.job_name: j.to_dict() for j in jobs} + "jobs": {j.job_name: j.to_dict() for j in jobs}, } if "CIRCLE_TOKEN" in os.environ: # For private forked repo. (e.g. new model addition) - config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}} + config["workflows"] = { + "version": 2, + "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}, + } else: # For public repo. (e.g. `transformers`) config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} with open(os.path.join(folder, "generated_config.yml"), "w", encoding="utf-8") as f: - f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>")) + f.write( + yaml.dump(config, sort_keys=False, default_flow_style=False) + .replace("' << pipeline", " << pipeline") + .replace(">> '", " >>") + ) if __name__ == "__main__": diff --git a/utils/check_types.py b/utils/check_types.py index 2400863bc2d3..5befe597dd97 100644 --- a/utils/check_types.py +++ b/utils/check_types.py @@ -18,12 +18,14 @@ "src/transformers/utils/**/*.py", "src/transformers/generation/**/*.py", "src/transformers/quantizers/**/*.py", + ".circleci/create_circleci_config.py", ], "check_args": [ "src/transformers/_typing.py", "src/transformers/utils", "src/transformers/generation", "src/transformers/quantizers", + ".circleci/create_circleci_config.py", ], "fix_args": None, } diff --git a/utils/checkers.py b/utils/checkers.py index 76028f6b74e6..15583cd77c5a 100644 --- a/utils/checkers.py +++ b/utils/checkers.py @@ -156,6 +156,7 @@ def _discover_checkers() -> tuple[dict, dict]: "src/**/*.py", "utils/**/*.py", "scripts/**/*.py", + ".circleci/create_circleci_config.py", "benchmark/**/*.py", "benchmark_v2/**/*.py", "setup.py", @@ -167,6 +168,7 @@ def _discover_checkers() -> tuple[dict, dict]: "src/**/*.py", "utils/**/*.py", "scripts/**/*.py", + ".circleci/create_circleci_config.py", "benchmark/**/*.py", "benchmark_v2/**/*.py", "setup.py", @@ -387,7 +389,18 @@ def run_imports_checker(fix=False, line_callback=None): return 0, output -RUFF_TARGETS = ["examples", "tests", "src", "utils", "scripts", "benchmark", "benchmark_v2", "setup.py", "conftest.py"] +RUFF_TARGETS = [ + "examples", + "tests", + "src", + "utils", + "scripts", + ".circleci/create_circleci_config.py", + "benchmark", + "benchmark_v2", + "setup.py", + "conftest.py", +] def run_ruff_check(fix=False, line_callback=None):