From d5eadf420f71138527570281f36e349644362038 Mon Sep 17 00:00:00 2001 From: James Bruten Date: Mon, 20 Oct 2025 13:56:39 +0100 Subject: [PATCH 1/7] move files around --- {bdiff => github_scripts}/__init__.py | 0 github_scripts/get_git_sources.py | 114 ++++++++++++++++++ {bdiff => github_scripts}/git_bdiff.py | 0 github_scripts/rose_stem_extract_source.py | 47 ++++++++ .../suite_data.py | 17 +-- .../suite_report_git.py | 0 {bdiff => github_scripts}/tests/__init__.py | 0 .../tests/test_git_bdiff.py | 0 8 files changed, 162 insertions(+), 16 deletions(-) rename {bdiff => github_scripts}/__init__.py (100%) create mode 100644 github_scripts/get_git_sources.py rename {bdiff => github_scripts}/git_bdiff.py (100%) create mode 100755 github_scripts/rose_stem_extract_source.py rename {suite_report_git => github_scripts}/suite_data.py (96%) rename {suite_report_git => github_scripts}/suite_report_git.py (100%) rename {bdiff => github_scripts}/tests/__init__.py (100%) rename {bdiff => github_scripts}/tests/test_git_bdiff.py (100%) diff --git a/bdiff/__init__.py b/github_scripts/__init__.py similarity index 100% rename from bdiff/__init__.py rename to github_scripts/__init__.py diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py new file mode 100644 index 00000000..a1058522 --- /dev/null +++ b/github_scripts/get_git_sources.py @@ -0,0 +1,114 @@ +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* +""" +Clone sources for a rose-stem run for use with git bdiff module in scripts +""" + +import re +import subprocess +from typing import Optional +from pathlib import Path +from shutil import rmtree + + +def run_command( + command: str, rval: bool = False +) -> Optional[subprocess.CompletedProcess]: + """ + Run a subprocess command and return the result object + Inputs: + - command, str with command to run + Outputs: + - result object from subprocess.run + """ + command = command.split() + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=300, + shell=False, + check=False, + ) + if result.returncode: + print(result.stdout, end="\n\n\n") + raise RuntimeError( + f"[FAIL] Issue found running command {command}\n\n{result.stderr}" + ) + if rval: + return result + + +def clone_repo_mirror( + source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path +) -> None: + """ + Clone a repo source using a local git mirror. + Assume the mirror is set up as per the Met Office + """ + + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + command = f"git clone {mirror_loc} {loc}" + run_command(command) + + # If not provided a ref, return + if not repo_ref: + return + + source = source.removeprefix("git@github.com:") + user = source.split("/")[0] + # Check that the user is different to the Upstream User + if user in parent.split("/")[0]: + user = None + + # If the ref is a hash then we don't need the fork user as part of the fetch. + # Equally, if the user is the Upstream User, it's not needed + if re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref) or not user: + fetch = repo_ref + else: + fetch = f"{user}/{repo_ref}" + commands = ( + f"git -C {loc} fetch origin {fetch}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) + + +def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: + """ + Clone the repo and checkout the provided ref + Only if a remote source + """ + + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + commands = ( + f"git -C {loc} init", + f"git -C {loc} remote add origin {repo_source}" + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD" + ) + for command in commands: + run_command(command) + + +def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: + """ + Rsync a local git clone and checkout the provided ref + """ + + # Trailing slash required for rsync + command = f"rsync -av {repo_source}/ {loc}" + run_command(command) + if repo_ref: + command = f"git -C {loc} checkout {repo_ref}" + run_command(command) diff --git a/bdiff/git_bdiff.py b/github_scripts/git_bdiff.py similarity index 100% rename from bdiff/git_bdiff.py rename to github_scripts/git_bdiff.py diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py new file mode 100755 index 00000000..f7a8b810 --- /dev/null +++ b/github_scripts/rose_stem_extract_source.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# *****************************COPYRIGHT******************************* +# (C) Crown copyright Met Office. All rights reserved. +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* +""" +Clone sources for a rose-stem run for use with git bdiff module in scripts +Only intended for use with rose-stem suites that have provided appropriate environment +variables +""" + +import os +from datetime import datetime +from pathlib import Path +from ast import literal_eval +from get_git_sources import clone_repo, clone_repo_mirror, sync_repo +from typing import Dict + + +def main() -> None: + + clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) + clone_loc.mkdir(parents=True) + + dependencies: Dict = literal_eval(os.environ["DEPENDENCIES"]) + + for dependency, values in dependencies.items(): + + print(f"Extracting {dependency} at time {datetime.now()}") + + loc = clone_loc / dependency + + if ".git" in values["source"]: + if os.environ["USE_MIRRORS"] == "True": + mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] + clone_repo_mirror( + values["source"], values["ref"], values["parent"], mirror_loc, loc + ) + else: + clone_repo(values["source"], values["ref"], loc) + else: + sync_repo(values["source"], values["ref"], loc) + + +if __name__ == "__main__": + main() diff --git a/suite_report_git/suite_data.py b/github_scripts/suite_data.py similarity index 96% rename from suite_report_git/suite_data.py rename to github_scripts/suite_data.py index 145dc1ab..699bdcaa 100644 --- a/suite_report_git/suite_data.py +++ b/github_scripts/suite_data.py @@ -8,9 +8,6 @@ Class containing helper methods for gathering data needed for a SuiteReport object """ -import sys - -sys.path.append("../") import re import shutil import sqlite3 @@ -19,19 +16,7 @@ from collections import defaultdict from pathlib import Path from typing import Dict, List, Optional, Set, Union - -try: - from bdiff.git_bdiff import GitBDiff, GitInfo -except ImportError: - try: - from git_bdiff import GitBDiff, GitInfo - except ImportError as err: - raise ImportError( - "Unable to import from git_bdiff module. This is included in the same " - "repository as this script and included with a relative import. Ensure " - "this script is being called from the correct place." - ) from err - +from git_bdiff import GitBDiff, GitInfo class SuiteData: """ diff --git a/suite_report_git/suite_report_git.py b/github_scripts/suite_report_git.py similarity index 100% rename from suite_report_git/suite_report_git.py rename to github_scripts/suite_report_git.py diff --git a/bdiff/tests/__init__.py b/github_scripts/tests/__init__.py similarity index 100% rename from bdiff/tests/__init__.py rename to github_scripts/tests/__init__.py diff --git a/bdiff/tests/test_git_bdiff.py b/github_scripts/tests/test_git_bdiff.py similarity index 100% rename from bdiff/tests/test_git_bdiff.py rename to github_scripts/tests/test_git_bdiff.py From 0f624bd85ebdffd38cc68b0dff2aa853dae90797 Mon Sep 17 00:00:00 2001 From: James Bruten Date: Tue, 21 Oct 2025 11:47:28 +0100 Subject: [PATCH 2/7] update suite_report --- github_scripts/get_git_sources.py | 12 ++++++- github_scripts/rose_stem_extract_source.py | 1 - github_scripts/suite_data.py | 42 ++++++++-------------- github_scripts/suite_report_git.py | 2 +- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index a1058522..99bb4ae5 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -91,9 +91,12 @@ def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: if loc.exists(): rmtree(loc) + # Create a clean clone location + loc.mkdir(parents=True) + commands = ( f"git -C {loc} init", - f"git -C {loc} remote add origin {repo_source}" + f"git -C {loc} remote add origin {repo_source}", f"git -C {loc} fetch origin {repo_ref}", f"git -C {loc} checkout FETCH_HEAD" ) @@ -106,6 +109,13 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: Rsync a local git clone and checkout the provided ref """ + # Remove if this clone already exists + if loc.exists(): + rmtree(loc) + + # Create a clean clone location + loc.mkdir(parents=True) + # Trailing slash required for rsync command = f"rsync -av {repo_source}/ {loc}" run_command(command) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index f7a8b810..8c9277e4 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -21,7 +21,6 @@ def main() -> None: clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) - clone_loc.mkdir(parents=True) dependencies: Dict = literal_eval(os.environ["DEPENDENCIES"]) diff --git a/github_scripts/suite_data.py b/github_scripts/suite_data.py index 699bdcaa..55c7a951 100644 --- a/github_scripts/suite_data.py +++ b/github_scripts/suite_data.py @@ -17,6 +17,8 @@ from pathlib import Path from typing import Dict, List, Optional, Set, Union from git_bdiff import GitBDiff, GitInfo +from get_git_sources import clone_repo, sync_repo + class SuiteData: """ @@ -202,20 +204,9 @@ def clone_sources(self) -> None: for dependency, data in self.dependencies.items(): loc = self.temp_directory / dependency if data["source"].endswith(".git"): - commands = [ - f"git clone {data['source']} {loc}", - f"git -C {loc} checkout {data['ref']}", - ] - for command in commands: - self.run_command(command) + clone_repo(data['source'], data['ref'], loc) else: - source = data["source"] - if not source.endswith("/"): - source = source + "/" - command = ( - f'rsync -e "ssh -o StrictHostKeyChecking=no" -avl {source} {loc}' - ) - self.run_command(command, shell=True) + sync_repo(data['source'], data['ref'], loc) def determine_primary_source(self) -> str: """ @@ -283,21 +274,18 @@ def read_rose_conf(self) -> Dict[str, str]: def find_unknown_dependency(self, dependency: str) -> str: """ - TEMPORARY The primary dependency may be unset in the dependencies file. In this case find - it from the *_SOURCE variable in the rose-suite.conf. - TODO: Once cylc provides the location of the source code itself, this method - should be changed to use that instead, as then the _SOURCE variable will be - removed - """ - - var = f"{dependency.upper()}_SOURCE".replace('"', "") - if var not in self.rose_data: - raise RuntimeError(f"Cant determine source for {dependency}") - rval = self.rose_data[var] - if "$ROSE_ORIG_HOST" in rval: - rval = rval.replace("$ROSE_ORIG_HOST", self.rose_data["ROSE_ORIG_HOST"]) - return rval + it from the CYLC_WORKFLOW_SRC_DIR variable that gets set in the + flow-processed.cylc file + """ + + log_file = self.suite_path / "log" / "scheduler" / "log" + with open(log_file, "r") as f: + for line in f: + line = line.strip() + if re.search(f"{dependency.upper()} SOURCE CLONE=", line): + return line.split("=")[1].rstrip("/") + raise RuntimeError(f"Unable to find source for dependency {dependency}") def read_dependencies(self) -> Dict[str, Dict]: """ diff --git a/github_scripts/suite_report_git.py b/github_scripts/suite_report_git.py index 7b96e53a..7fc5e28d 100755 --- a/github_scripts/suite_report_git.py +++ b/github_scripts/suite_report_git.py @@ -373,8 +373,8 @@ def main() -> None: args = parse_args() + suite_report = SuiteReport(args.suite_path) try: - suite_report = SuiteReport(args.suite_path) suite_report.create_log() suite_report.write_log(args.log_path) finally: From 96a4755766a63e6c46b94b49a6656f6e2c811a6f Mon Sep 17 00:00:00 2001 From: James Bruten Date: Tue, 21 Oct 2025 11:52:27 +0100 Subject: [PATCH 3/7] black --- github_scripts/get_git_sources.py | 2 +- github_scripts/suite_data.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 99bb4ae5..8519ed0b 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -98,7 +98,7 @@ def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: f"git -C {loc} init", f"git -C {loc} remote add origin {repo_source}", f"git -C {loc} fetch origin {repo_ref}", - f"git -C {loc} checkout FETCH_HEAD" + f"git -C {loc} checkout FETCH_HEAD", ) for command in commands: run_command(command) diff --git a/github_scripts/suite_data.py b/github_scripts/suite_data.py index 55c7a951..ab94e30a 100644 --- a/github_scripts/suite_data.py +++ b/github_scripts/suite_data.py @@ -204,9 +204,9 @@ def clone_sources(self) -> None: for dependency, data in self.dependencies.items(): loc = self.temp_directory / dependency if data["source"].endswith(".git"): - clone_repo(data['source'], data['ref'], loc) + clone_repo(data["source"], data["ref"], loc) else: - sync_repo(data['source'], data['ref'], loc) + sync_repo(data["source"], data["ref"], loc) def determine_primary_source(self) -> str: """ From 9165ebfb33743b616b34895df67fde17e5fb2c44 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 21 Oct 2025 14:30:35 +0100 Subject: [PATCH 4/7] Update github_scripts/get_git_sources.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 8519ed0b..cc857cf1 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -69,7 +69,7 @@ def clone_repo_mirror( # If the ref is a hash then we don't need the fork user as part of the fetch. # Equally, if the user is the Upstream User, it's not needed - if re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref) or not user: + if not user or re.match(r"^\s*([0-9a-f]{40})\s*$", repo_ref): fetch = repo_ref else: fetch = f"{user}/{repo_ref}" From 43b1ff9768b1892f8e89ac15873ae167818aafb1 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 21 Oct 2025 14:33:36 +0100 Subject: [PATCH 5/7] Update github_scripts/suite_data.py Co-authored-by: Sam Clarke-Green <74185251+t00sa@users.noreply.github.com> --- github_scripts/suite_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/github_scripts/suite_data.py b/github_scripts/suite_data.py index ab94e30a..b00cec5a 100644 --- a/github_scripts/suite_data.py +++ b/github_scripts/suite_data.py @@ -279,12 +279,12 @@ def find_unknown_dependency(self, dependency: str) -> str: flow-processed.cylc file """ + pattern = re.compile(rf"{dependency.upper()} SOURCE CLONE=(\S+)") log_file = self.suite_path / "log" / "scheduler" / "log" with open(log_file, "r") as f: for line in f: - line = line.strip() - if re.search(f"{dependency.upper()} SOURCE CLONE=", line): - return line.split("=")[1].rstrip("/") + if match := pattern.search(line): + return match.group(1).rstrip("/") raise RuntimeError(f"Unable to find source for dependency {dependency}") def read_dependencies(self) -> Dict[str, Dict]: From f0c6279ed2ec460c89e4e07826b2c3915b184148 Mon Sep 17 00:00:00 2001 From: James Bruten Date: Wed, 22 Oct 2025 13:46:59 +0100 Subject: [PATCH 6/7] add option for using https --- github_scripts/rose_stem_extract_source.py | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 8c9277e4..57fac662 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -18,20 +18,44 @@ from typing import Dict +def set_https(dependencies: Dict) -> Dict: + """ + Change sources in a dependencies dictions to use https instead of ssh + """ + + print("Modifying Dependencies") + for dependency, values in dependencies.items(): + if values["source"].startswith("git@github.com:"): + source = dependencies[dependency]["source"] + dependencies[dependency]["source"] = source.replace( + "git@github.com:", "https://github.com/" + ) + + return dependencies + + def main() -> None: clone_loc = Path(os.environ["SOURCE_DIRECTORY"]) dependencies: Dict = literal_eval(os.environ["DEPENDENCIES"]) + if os.environ.get("USE_TOKENS", "False") == "True": + dependencies = set_https(dependencies) + for dependency, values in dependencies.items(): - print(f"Extracting {dependency} at time {datetime.now()}") + print( + f"Extracting {dependency} at time {datetime.now()} " + f"using source {values['source']} and ref {values['ref']}" + ) loc = clone_loc / dependency if ".git" in values["source"]: - if os.environ["USE_MIRRORS"] == "True": + print(os.environ.get("USE_MIRRORS", "False")) + print(type(os.environ.get("USE_MIRRORS", "False"))) + if os.environ.get("USE_MIRRORS", "False") == "True": mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] clone_repo_mirror( values["source"], values["ref"], values["parent"], mirror_loc, loc From f743d7005b177c302a458c80cc2bbafb5d2967f2 Mon Sep 17 00:00:00 2001 From: James Bruten Date: Wed, 22 Oct 2025 13:48:19 +0100 Subject: [PATCH 7/7] remove debug prints --- github_scripts/rose_stem_extract_source.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/github_scripts/rose_stem_extract_source.py b/github_scripts/rose_stem_extract_source.py index 57fac662..05fa081c 100755 --- a/github_scripts/rose_stem_extract_source.py +++ b/github_scripts/rose_stem_extract_source.py @@ -53,8 +53,6 @@ def main() -> None: loc = clone_loc / dependency if ".git" in values["source"]: - print(os.environ.get("USE_MIRRORS", "False")) - print(type(os.environ.get("USE_MIRRORS", "False"))) if os.environ.get("USE_MIRRORS", "False") == "True": mirror_loc = Path(os.environ["GIT_MIRROR_LOC"]) / values["parent"] clone_repo_mirror(