From d0326b877a484bbcce6528c83ddac37e4be35e84 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:11:52 +0000 Subject: [PATCH 1/4] copy changes from apps --- github_scripts/get_git_sources.py | 151 ++++++++++++++++++------------ 1 file changed, 90 insertions(+), 61 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 323afbed..230d9cc4 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -1,11 +1,10 @@ -# ----------------------------------------------------------------------------- +# *****************************COPYRIGHT******************************* # (C) Crown copyright Met Office. All rights reserved. -# The file LICENCE, distributed with this code, contains details of the terms -# under which the code may be used. -# ----------------------------------------------------------------------------- - +# For further details please refer to the file COPYRIGHT.txt +# which you should have received as part of this distribution. +# *****************************COPYRIGHT******************************* """ -Clone sources for a rose-stem run for use with git bdiff module in scripts +Helper functions for cloning git sources in command line builds """ import re @@ -14,16 +13,32 @@ from pathlib import Path from shutil import rmtree import shlex -import logging -logger = logging.getLogger(__name__) + +def get_source( + source: str, + ref: str, + dest: Path, + repo: str, + use_mirrors: bool = False, + mirror_loc: Path = "", +) -> None: + + if ".git" in source: + if use_mirrors: + mirror_loc = Path(mirror_loc) / "MetOffice" / repo + print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") + clone_repo_mirror(source, ref, repo, mirror_loc, dest) + else: + print(f"Cloning/Updating {repo} from {source} at ref {ref}") + clone_repo(source, ref, dest) + else: + print(f"Syncing {repo} at ref {ref}") + sync_repo(source, ref, dest) def run_command( - command: str, - check: bool = True, - capture: bool = True, - timeout: int = 600 + command: str, rval: bool = False, check: bool = True ) -> Optional[subprocess.CompletedProcess]: """ Run a subprocess command and return the result object @@ -32,54 +47,59 @@ def run_command( Outputs: - result object from subprocess.run """ - - args = shlex.split(command) - - try: - # Note: text=True and capture_output=True have high overhead - # for large buffers. Use capture=False for fire-and-forget tasks. - result = subprocess.run( - args, - capture_output=capture, - text=capture, - timeout=timeout, - shell=False, - check=False + command = shlex.split(command) + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=300, + shell=False, + check=False, + ) + if check and result.returncode: + print(result.stdout, end="\n\n\n") + raise RuntimeError( + f"[FAIL] Issue found running command {command}\n\n{result.stderr}" ) - if check and result.returncode != 0: - err_msg = (result.stderr or "").strip() - logger.error(f"[FAIL] Command failed: {command}\nError: {err_msg}") - raise subprocess.CalledProcessError( - result.returncode, args, output=result.stdout, stderr=result.stderr - ) + if rval: return result - except (subprocess.TimeoutExpired, FileNotFoundError) as e: - logger.error(f"[FAIL] Execution error for '{args[0]}': {e}") - raise - def clone_repo_mirror( - source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path + repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path ) -> None: """ Clone a repo source using a local git mirror. Assume the mirror is set up as per the Met Office + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - parent: Owner of the github repository being cloned (required to construct the + mirror path) + - mirror_loc: path to the local git mirrors + - loc: path to clone the repository to """ - # Remove if this clone already exists + # If the repository exists and isn't a git repo, exit now as we don't want to + # overwrite it if loc.exists(): - rmtree(loc) + if not Path(loc / ".git").exists(): + raise RuntimeError( + f"The destination for the clone of {repo_source} already exists but " + "isn't a git directory. Exiting so as to not overwrite it." + ) - command = f"git clone {mirror_loc} {loc}" - run_command(command) + # Clone if the repo doesn't exist + else: + command = f"git clone {mirror_loc} {loc}" + run_command(command) - # If not provided a ref, return + # If not provided a ref, pull the latest repository and return if not repo_ref: + run_command(f"git -C {loc} pull") return - source = source.removeprefix("git@github.com:") - user = source.split("/")[0] + repo_source = repo_source.removeprefix("git@github.com:") + user = repo_source.split("/")[0] # Check that the user is different to the Upstream User if user in parent.split("/")[0]: user = None @@ -102,24 +122,33 @@ def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: """ Clone the repo and checkout the provided ref Only if a remote source + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - loc: path to clone the repository to """ - # Remove if this clone already exists - if loc.exists(): - rmtree(loc) - - # Create a clean clone location - loc.mkdir(parents=True) - - commands = ( - f"git -C {loc} init", - f"git -C {loc} remote add origin {repo_source}", - f"git -C {loc} fetch origin {repo_ref}", - f"git -C {loc} checkout FETCH_HEAD", - f"git -C {loc} fetch origin main:main", - ) - for command in commands: - run_command(command) + if not loc.exists(): + # Create a clean clone location + loc.mkdir(parents=True) + + # This process is equivalent to doing a git clone + # It saves a small amount of space by not fetching all refs + commands = ( + f"git -C {loc} init", + f"git -C {loc} remote add origin {repo_source}", + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + f"git -C {loc} fetch origin main:main", + ) + for command in commands: + run_command(command) + else: + commands = ( + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: @@ -165,8 +194,8 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Ignore errors - these are likely because the main branch already exists # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" - result = run_command(command, check=False) - if result and result.returncode: + result = run_command(command, check=False, rval=True) + if result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") print(f"Error message:\n\n{result.stderr}") From 70073e6b7790b6c18d78cb3485e003d6aacffdeb Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:16:52 +0000 Subject: [PATCH 2/4] revert accidental changes --- github_scripts/get_git_sources.py | 53 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 230d9cc4..b40368a7 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -1,8 +1,8 @@ -# *****************************COPYRIGHT******************************* +# ----------------------------------------------------------------------------- # (C) Crown copyright Met Office. All rights reserved. -# For further details please refer to the file COPYRIGHT.txt -# which you should have received as part of this distribution. -# *****************************COPYRIGHT******************************* +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +# ----------------------------------------------------------------------------- """ Helper functions for cloning git sources in command line builds """ @@ -13,6 +13,9 @@ from pathlib import Path from shutil import rmtree import shlex +import logging + +logger = logging.getLogger(__name__) def get_source( @@ -38,7 +41,10 @@ def get_source( def run_command( - command: str, rval: bool = False, check: bool = True + command: str, + check: bool = True, + capture: bool = True, + timeout: int = 600 ) -> Optional[subprocess.CompletedProcess]: """ Run a subprocess command and return the result object @@ -47,23 +53,32 @@ def run_command( Outputs: - result object from subprocess.run """ - command = shlex.split(command) - result = subprocess.run( - command, - capture_output=True, - text=True, - timeout=300, - shell=False, - check=False, - ) - if check and result.returncode: - print(result.stdout, end="\n\n\n") - raise RuntimeError( - f"[FAIL] Issue found running command {command}\n\n{result.stderr}" + + args = shlex.split(command) + + try: + # Note: text=True and capture_output=True have high overhead + # for large buffers. Use capture=False for fire-and-forget tasks. + result = subprocess.run( + args, + capture_output=capture, + text=capture, + timeout=timeout, + shell=False, + check=False ) - if rval: + if check and result.returncode != 0: + err_msg = (result.stderr or "").strip() + logger.error(f"[FAIL] Command failed: {command}\nError: {err_msg}") + raise subprocess.CalledProcessError( + result.returncode, args, output=result.stdout, stderr=result.stderr + ) return result + except (subprocess.TimeoutExpired, FileNotFoundError) as e: + logger.error(f"[FAIL] Execution error for '{args[0]}': {e}") + raise + def clone_repo_mirror( repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path From 538520b4b7998b4c459e4ea63bc443f4ea80b604 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:46:34 +0000 Subject: [PATCH 3/4] fix --- github_scripts/get_git_sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index b40368a7..8bd0261b 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -209,7 +209,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Ignore errors - these are likely because the main branch already exists # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" - result = run_command(command, check=False, rval=True) + result = run_command(command, check=False) if result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") From 5a0c657d5b246c14f708da0d2f2de358662efe39 Mon Sep 17 00:00:00 2001 From: James Bruten <109733895+james-bruten-mo@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:36:54 +0000 Subject: [PATCH 4/4] fix merge --- github_scripts/get_git_sources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 8bd0261b..56681973 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -24,7 +24,7 @@ def get_source( dest: Path, repo: str, use_mirrors: bool = False, - mirror_loc: Path = "", + mirror_loc: Path = Path(""), ) -> None: if ".git" in source: @@ -210,7 +210,7 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: # Instead write them as warnings command = f"git -C {loc} fetch origin main:main" result = run_command(command, check=False) - if result.returncode: + if result and result.returncode: print("Warning - fetching main from origin resulted in an error") print("This is likely due to the main branch already existing") print(f"Error message:\n\n{result.stderr}")