diff --git a/bdiff/__init__.py b/github_scripts/__init__.py similarity index 100% rename from bdiff/__init__.py rename to github_scripts/__init__.py diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py new file mode 100644 index 00000000..cc857cf1 --- /dev/null +++ b/github_scripts/get_git_sources.py @@ -0,0 +1,124 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* +""" +Clone sources for a rose-stem run for use with git bdiff module in scripts +""" + +import re +import subprocess +from typing import Optional +from pathlib import Path +from shutil import rmtree + + +def run_command( + command: str, rval: bool = False +) -> Optional[subprocess.CompletedProcess]: + """ + Run a subprocess command and return the result object + Inputs: + - command, str with command to run + Outputs: + - result object from subprocess.run + """ + command = command.split() + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=300, + shell=False, + check=False, + ) + if result.returncode: + print(result.stdout, end="\n\n\n") + raise RuntimeError( + f"[FAIL] Issue found running command {command}\n\n{result.stderr}" + ) + if rval: + return result + + +def clone_repo_mirror( + source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path +) -> None: + """ + Clone a repo source using a local git mirror. + Assume the mirror is set up as per the Met Office + """ + + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + command = f"git clone {mirror_loc} {loc}" + run_command(command) + + # If not provided a ref, return + if not repo_ref: + return + + source = source.removeprefix("git@github.com:") + user = source.split("/")[0] + # Check that the user is different to the Upstream User + if user in parent.split("/")[0]: + user = None + + # If the ref is a hash then we don't need the fork user as part of the fetch. + # Equally, if the user is the Upstream User, it's not needed + if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): + fetch = repo_ref + else: + fetch = f"{user}/{repo_ref}" + commands = ( + f"git -C {loc} fetch origin {fetch}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) + + +def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: + """ + Clone the repo and checkout the provided ref + Only if a remote source + """ + + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + # Create a clean clone location + loc.mkdir(parents=True) + + commands = ( + f"git -C {loc} init", + f"git -C {loc} remote add origin {repo_source}", + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) + + +def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: + """ + Rsync a local git clone and checkout the provided ref + """ + + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + # Create a clean clone location + loc.mkdir(parents=True) + + # Trailing slash required for rsync + command = f"rsync -av {repo_source}/ {loc}" + run_command(command) + if repo_ref: + command = f"git -C {loc} checkout {repo_ref}" + run_command(command) diff --git a/bdiff/git_bdiff.py b/github_scripts/git_bdiff.py similarity index 100% rename from bdiff/git_bdiff.py rename to github_scripts/git_bdiff.py diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py new file mode 100755 index 00000000..05fa081c --- /dev/null +++ b/github_scripts/rose_stem_extract_source.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* +""" +Clone sources for a rose-stem run for use with git bdiff module in scripts +Only intended for use with rose-stem suites that have provided appropriate environment +variables +""" + +import os +from datetime import datetime +from pathlib import Path +from ast import literal_eval +from get_git_sources import clone_repo, clone_repo_mirror, sync_repo +from typing import Dict + + +def set_https(dependencies: Dict) -> Dict: + """ + Change sources in a dependencies dictions to use https instead of ssh + """ + + print("Modifying Dependencies") + for dependency, values in dependencies.items(): + if values["source"].startswith("git@github.com:"): + source = dependencies[dependency]["source"] + dependencies[dependency]["source"] = source.replace( + "git@github.com:", "https://github.com/" + ) + + return dependencies + + +def main() -> None: + + clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) + + dependencies: Dict = literal_eval(os.environ["DEPENDENCIES"]) + + if os.environ.get("USE_TOKENS", "False") == "True": + dependencies = set_https(dependencies) + + for dependency, values in dependencies.items(): + + print( + f"Extracting {dependency} at time {datetime.now()} " + f"using source {values['source']} and ref {values['ref']}" + ) + + loc = clone_loc / dependency + + if ".git" in values["source"]: + if os.environ.get("USE_MIRRORS", "False") == "True": + mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] + clone_repo_mirror( + values["source"], values["ref"], values["parent"], mirror_loc, loc + ) + else: + clone_repo(values["source"], values["ref"], loc) + else: + sync_repo(values["source"], values["ref"], loc) + + +if __name__ == "__main__": + main() diff --git a/suite_report_git/suite_data.py b/github_scripts/suite_data.py similarity index 88% rename from suite_report_git/suite_data.py rename to github_scripts/suite_data.py index 145dc1ab..b00cec5a 100644 --- a/suite_report_git/suite_data.py +++ b/github_scripts/suite_data.py @@ -8,9 +8,6 @@ Class containing helper methods for gathering data needed for a SuiteReport object """ -import sys - -sys.path.append("../") import re import shutil import sqlite3 @@ -19,18 +16,8 @@ from collections import defaultdict from pathlib import Path from typing import Dict, List, Optional, Set, Union - -try: - from bdiff.git_bdiff import GitBDiff, GitInfo -except ImportError: - try: - from git_bdiff import GitBDiff, GitInfo - except ImportError as err: - raise ImportError( - "Unable to import from git_bdiff module. This is included in the same " - "repository as this script and included with a relative import. Ensure " - "this script is being called from the correct place." - ) from err +from git_bdiff import GitBDiff, GitInfo +from get_git_sources import clone_repo, sync_repo class SuiteData: @@ -217,20 +204,9 @@ def clone_sources(self) -> None: for dependency, data in self.dependencies.items(): loc = self.temp_directory / dependency if data["source"].endswith(".git"): - commands = [ - f"git clone {data['source']} {loc}", - f"git -C {loc} checkout {data['ref']}", - ] - for command in commands: - self.run_command(command) + clone_repo(data["source"], data["ref"], loc) else: - source = data["source"] - if not source.endswith("/"): - source = source + "/" - command = ( - f'rsync -e "ssh -o StrictHostKeyChecking=no" -avl {source} {loc}' - ) - self.run_command(command, shell=True) + sync_repo(data["source"], data["ref"], loc) def determine_primary_source(self) -> str: """ @@ -298,21 +274,18 @@ def read_rose_conf(self) -> Dict[str, str]: def find_unknown_dependency(self, dependency: str) -> str: """ - TEMPORARY The primary dependency may be unset in the dependencies file. In this case find - it from the *_SOURCE variable in the rose-suite.conf. - TODO: Once cylc provides the location of the source code itself, this method - should be changed to use that instead, as then the _SOURCE variable will be - removed - """ - - var = f"{dependency.upper()}_SOURCE".replace('"', "") - if var not in self.rose_data: - raise RuntimeError(f"Cant determine source for {dependency}") - rval = self.rose_data[var] - if "$ROSE_ORIG_HOST" in rval: - rval = rval.replace("$ROSE_ORIG_HOST", self.rose_data["ROSE_ORIG_HOST"]) - return rval + it from the CYLC_WORKFLOW_SRC_DIR variable that gets set in the + flow-processed.cylc file + """ + + pattern = re.compile(rf"{dependency.upper()} SOURCE CLONE=(\S+)") + log_file = self.suite_path / "log" / "scheduler" / "log" + with open(log_file, "r") as f: + for line in f: + if match := pattern.search(line): + return match.group(1).rstrip("/") + raise RuntimeError(f"Unable to find source for dependency {dependency}") def read_dependencies(self) -> Dict[str, Dict]: """ diff --git a/suite_report_git/suite_report_git.py b/github_scripts/suite_report_git.py similarity index 99% rename from suite_report_git/suite_report_git.py rename to github_scripts/suite_report_git.py index 7b96e53a..7fc5e28d 100755 --- a/suite_report_git/suite_report_git.py +++ b/github_scripts/suite_report_git.py @@ -373,8 +373,8 @@ def main() -> None: args = parse_args() + suite_report = SuiteReport(args.suite_path) try: - suite_report = SuiteReport(args.suite_path) suite_report.create_log() suite_report.write_log(args.log_path) finally: diff --git a/bdiff/tests/__init__.py b/github_scripts/tests/__init__.py similarity index 100% rename from bdiff/tests/__init__.py rename to github_scripts/tests/__init__.py diff --git a/bdiff/tests/test_git_bdiff.py b/github_scripts/tests/test_git_bdiff.py similarity index 100% rename from bdiff/tests/test_git_bdiff.py rename to github_scripts/tests/test_git_bdiff.py