diff --git a/github_scripts/get_git_sources.py b/github_scripts/get_git_sources.py index 323afbed..56681973 100644 --- a/github_scripts/get_git_sources.py +++ b/github_scripts/get_git_sources.py @@ -3,9 +3,8 @@ # The file LICENCE, distributed with this code, contains details of the terms # under which the code may be used. # ----------------------------------------------------------------------------- - """ -Clone sources for a rose-stem run for use with git bdiff module in scripts +Helper functions for cloning git sources in command line builds """ import re @@ -19,6 +18,28 @@ logger = logging.getLogger(__name__) +def get_source( + source: str, + ref: str, + dest: Path, + repo: str, + use_mirrors: bool = False, + mirror_loc: Path = Path(""), +) -> None: + + if ".git" in source: + if use_mirrors: + mirror_loc = Path(mirror_loc) / "MetOffice" / repo + print(f"Cloning/Updating {repo} from mirror {mirror_loc} at ref {ref}") + clone_repo_mirror(source, ref, repo, mirror_loc, dest) + else: + print(f"Cloning/Updating {repo} from {source} at ref {ref}") + clone_repo(source, ref, dest) + else: + print(f"Syncing {repo} at ref {ref}") + sync_repo(source, ref, dest) + + def run_command( command: str, check: bool = True, @@ -60,26 +81,40 @@ def run_command( def clone_repo_mirror( - source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path + repo_source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path ) -> None: """ Clone a repo source using a local git mirror. Assume the mirror is set up as per the Met Office + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - parent: Owner of the github repository being cloned (required to construct the + mirror path) + - mirror_loc: path to the local git mirrors + - loc: path to clone the repository to """ - # Remove if this clone already exists + # If the repository exists and isn't a git repo, exit now as we don't want to + # overwrite it if loc.exists(): - rmtree(loc) + if not Path(loc / ".git").exists(): + raise RuntimeError( + f"The destination for the clone of {repo_source} already exists but " + "isn't a git directory. Exiting so as to not overwrite it." + ) - command = f"git clone {mirror_loc} {loc}" - run_command(command) + # Clone if the repo doesn't exist + else: + command = f"git clone {mirror_loc} {loc}" + run_command(command) - # If not provided a ref, return + # If not provided a ref, pull the latest repository and return if not repo_ref: + run_command(f"git -C {loc} pull") return - source = source.removeprefix("git@github.com:") - user = source.split("/")[0] + repo_source = repo_source.removeprefix("git@github.com:") + user = repo_source.split("/")[0] # Check that the user is different to the Upstream User if user in parent.split("/")[0]: user = None @@ -102,24 +137,33 @@ def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None: """ Clone the repo and checkout the provided ref Only if a remote source + - repo_source: ssh url of the source repository + - repo_ref: git ref for the source. An empty string will get the default branch + - loc: path to clone the repository to """ - # Remove if this clone already exists - if loc.exists(): - rmtree(loc) - - # Create a clean clone location - loc.mkdir(parents=True) - - commands = ( - f"git -C {loc} init", - f"git -C {loc} remote add origin {repo_source}", - f"git -C {loc} fetch origin {repo_ref}", - f"git -C {loc} checkout FETCH_HEAD", - f"git -C {loc} fetch origin main:main", - ) - for command in commands: - run_command(command) + if not loc.exists(): + # Create a clean clone location + loc.mkdir(parents=True) + + # This process is equivalent to doing a git clone + # It saves a small amount of space by not fetching all refs + commands = ( + f"git -C {loc} init", + f"git -C {loc} remote add origin {repo_source}", + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + f"git -C {loc} fetch origin main:main", + ) + for command in commands: + run_command(command) + else: + commands = ( + f"git -C {loc} fetch origin {repo_ref}", + f"git -C {loc} checkout FETCH_HEAD", + ) + for command in commands: + run_command(command) def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None: