Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d0326b8
copy changes from apps
james-bruten-mo Jan 26, 2026
70073e6
revert accidental changes
james-bruten-mo Jan 26, 2026
c191b53
Merge branch 'main' into copy_overwrite_changes
james-bruten-mo Jan 26, 2026
538520b
fix
james-bruten-mo Jan 26, 2026
37ee629
allow multiple sources
james-bruten-mo Jan 26, 2026
e324394
add merge_sources script
james-bruten-mo Jan 26, 2026
083fa1c
make path
james-bruten-mo Jan 26, 2026
a0645ca
bug
james-bruten-mo Jan 26, 2026
ef2c488
bug
james-bruten-mo Jan 26, 2026
5a0c657
fix merge
james-bruten-mo Jan 27, 2026
2c5eddc
Merge branch 'copy_overwrite_changes' into suite_merge_script
james-bruten-mo Jan 27, 2026
468c382
mirror fix
james-bruten-mo Jan 27, 2026
66e5dcf
setup merging of branches
james-bruten-mo Jan 27, 2026
da3860a
ruff
james-bruten-mo Jan 27, 2026
8a69af2
remove remote
james-bruten-mo Jan 27, 2026
732a196
working merge script
james-bruten-mo Jan 27, 2026
2ad2543
add another error
james-bruten-mo Jan 27, 2026
43c8f0e
modify argument order
james-bruten-mo Jan 28, 2026
7a9a6c7
Update github_scripts/get_git_sources.py
james-bruten-mo Jan 28, 2026
60997bd
Update github_scripts/merge_sources.py
james-bruten-mo Jan 28, 2026
9224a82
Update github_scripts/merge_sources.py
james-bruten-mo Jan 28, 2026
18f8610
Update github_scripts/rose_stem_extract_source.py
james-bruten-mo Jan 28, 2026
6bc07c6
update to use logger
james-bruten-mo Jan 28, 2026
7fb1810
loggin gchanges
james-bruten-mo Jan 28, 2026
cf12652
loggin gchanges
james-bruten-mo Jan 28, 2026
da8239d
logging changes
james-bruten-mo Jan 28, 2026
5463179
logging changes
james-bruten-mo Jan 28, 2026
5835d36
logging changes
james-bruten-mo Jan 28, 2026
6f5f623
logging changes
james-bruten-mo Jan 28, 2026
682968a
logging changes
james-bruten-mo Jan 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 208 additions & 42 deletions github_scripts/get_git_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,26 @@
# The file LICENCE, distributed with this code, contains details of the terms
# under which the code may be used.
# -----------------------------------------------------------------------------

"""
Clone sources for a rose-stem run for use with git bdiff module in scripts
Helper functions for cloning git sources in command line builds
"""

import re
import subprocess
from datetime import datetime
from typing import Optional
from pathlib import Path
from shutil import rmtree
import shlex
import sys
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to put the basicConfig() call in the main() functions of the top-level scripts, allowing potential reuse of this module without forcing an output choice on the caller.



def run_command(
command: str,
check: bool = True,
capture: bool = True,
timeout: int = 600
command: str, check: bool = True, capture: bool = True, timeout: int = 600
) -> Optional[subprocess.CompletedProcess]:
"""
Run a subprocess command and return the result object
Expand All @@ -36,15 +35,13 @@ def run_command(
args = shlex.split(command)

try:
# Note: text=True and capture_output=True have high overhead
# for large buffers. Use capture=False for fire-and-forget tasks.
result = subprocess.run(
args,
capture_output=capture,
text=capture,
timeout=timeout,
shell=False,
check=False
check=False,
)
if check and result.returncode != 0:
err_msg = (result.stderr or "").strip()
Expand All @@ -59,29 +56,172 @@ def run_command(
raise


def get_source(
source: str,
ref: str,
dest: Path,
repo: str,
use_mirrors: bool = False,
mirror_loc: Path = Path(""),
) -> None:
"""
Call functions to clone or rsync git source
"""

if ".git" in source:
if use_mirrors:
logger.info(
f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning "
f"{repo} from {mirror_loc} at ref {ref}"
)
mirror_loc = Path(mirror_loc) / "MetOffice" / repo
clone_repo_mirror(source, ref, mirror_loc, dest)
else:
logger.info(
f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Cloning "
f"{repo} from {source} at ref {ref}"
)
clone_repo(source, ref, dest)
else:
logger.info(
f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Syncing "
f"{repo} at ref {ref}"
)
sync_repo(source, ref, dest)


def merge_source(
source: str,
ref: str,
dest: Path,
repo: str,
use_mirrors: bool = False,
mirror_loc: Path = Path(""),
) -> None:
"""
Merge git source into a local git clone. Assumes dest is a git clone that this
source can be merged into.
"""

logger.info(
f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Merging "
f"{source} at ref {ref} into {repo}"
)

if use_mirrors:
remote_path = Path(mirror_loc) / "MetOffice" / repo
else:
remote_path = source
run_command(f"git -C {dest} remote add local {remote_path}")

if use_mirrors:
fetch = determine_mirror_fetch(source, ref)
else:
fetch = ref

run_command(f"git -C {dest} fetch local {fetch}")
command = f"git -C {dest} merge --no-gpg-sign FETCH_HEAD"
result = run_command(command, check=False)
if result.returncode:
unmerged_files = get_unmerged(dest)
if unmerged_files:
handle_merge_conflicts(source, ref, dest, repo)
else:
raise subprocess.CalledProcessError(
result.returncode, command, result.stdout, result.stderr
)

# Remove the added remote
run_command(f"git -C {dest} remote remove local")


def handle_merge_conflicts(source: str, ref: str, loc: Path, dependency: str) -> None:
"""
Attempt to mark merge conflicts as resolved in they are in rose-stem or
dependencies.yaml
If others remain then raise an error
"""

# For suites, merge conflicts in these files/directories are unimportant so accept
# the current changes
for filepath in ("dependencies.yaml", "rose-stem"):
logger.warning(f"Ignoring merge conflicts in {filepath}")
run_command(f"git -C {loc} checkout --ours -- {filepath}")
run_command(f"git -C {loc} add {filepath}")

# Check if there are any remaining merge conflicts
unmerged = get_unmerged(loc)
if unmerged:
files = "\n".join(f for f in unmerged)
raise RuntimeError(
"\nA merge conflict has been identified while merging the following branch "
f"into the {dependency} source:\n\nsource: {source}\nref: {ref}\n\n"
f"with conflicting files:{files}"
"\n\nThese will need changing in the source branches to be useable together"
)


def get_unmerged(loc: Path) -> list[str]:
"""
Return list of unmerged files in a git clone
"""

files = run_command(f"git -C {loc} --no-pager diff --name-only --diff-filter=U")
return files.stdout.split()


def clone_repo_mirror(
source: str, repo_ref: str, parent: str, mirror_loc: Path, loc: Path
repo_source: str,
repo_ref: str,
mirror_loc: Path,
loc: Path,
) -> None:
"""
Clone a repo source using a local git mirror.
Assume the mirror is set up as per the Met Office
- repo_source: ssh url of the source repository
- repo_ref: git ref for the source. An empty string will get the default branch
- mirror_loc: path to the local git mirrors
- loc: path to clone the repository to
"""

# Remove if this clone already exists
# If the repository exists and isn't a git repo, exit now as we don't want to
# overwrite it
if loc.exists():
rmtree(loc)
if not Path(loc / ".git").exists():
raise RuntimeError(
f"The destination for the clone of {repo_source} already exists but "
"isn't a git directory. Exiting so as to not overwrite it."
)

command = f"git clone {mirror_loc} {loc}"
run_command(command)
# Clone if the repo doesn't exist
else:
command = f"git clone {mirror_loc} {loc}"
run_command(command)

# If not provided a ref, return
# If not provided a ref, pull the latest repository and return
if not repo_ref:
run_command(f"git -C {loc} pull")
return

source = source.removeprefix("[email protected]:")
user = source.split("/")[0]
fetch = determine_mirror_fetch(repo_source, repo_ref)
commands = (
f"git -C {loc} fetch origin {fetch}",
f"git -C {loc} checkout FETCH_HEAD",
)
for command in commands:
run_command(command)


def determine_mirror_fetch(repo_source: str, repo_ref: str) -> str:
"""
Determine the fetch ref for the git mirrors
"""

repo_source = repo_source.removeprefix("[email protected]:")
user = repo_source.split("/")[0]
# Check that the user is different to the Upstream User
if user in parent.split("/")[0]:
if "MetOffice" in user:
user = None

# If the ref is a hash then we don't need the fork user as part of the fetch.
Expand All @@ -90,36 +230,41 @@ def clone_repo_mirror(
fetch = repo_ref
else:
fetch = f"{user}/{repo_ref}"
commands = (
f"git -C {loc} fetch origin {fetch}",
f"git -C {loc} checkout FETCH_HEAD",
)
for command in commands:
run_command(command)

return fetch


def clone_repo(repo_source: str, repo_ref: str, loc: Path) -> None:
"""
Clone the repo and checkout the provided ref
Only if a remote source
- repo_source: ssh url of the source repository
- repo_ref: git ref for the source. An empty string will get the default branch
- loc: path to clone the repository to
"""

# Remove if this clone already exists
if loc.exists():
rmtree(loc)

# Create a clean clone location
loc.mkdir(parents=True)
if not loc.exists():
# Create a clean clone location
loc.mkdir(parents=True)

commands = (
f"git -C {loc} init",
f"git -C {loc} remote add origin {repo_source}",
f"git -C {loc} fetch origin {repo_ref}",
f"git -C {loc} checkout FETCH_HEAD",
f"git -C {loc} fetch origin main:main",
)
for command in commands:
run_command(command)
# This process is equivalent to doing a git clone
# It saves a small amount of space by not fetching all refs
commands = (
f"git -C {loc} init",
f"git -C {loc} remote add origin {repo_source}",
f"git -C {loc} fetch origin {repo_ref}",
f"git -C {loc} checkout FETCH_HEAD",
f"git -C {loc} fetch origin main:main",
)
for command in commands:
run_command(command)
else:
commands = (
f"git -C {loc} fetch origin {repo_ref}",
f"git -C {loc} checkout FETCH_HEAD",
)
for command in commands:
run_command(command)


def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None:
Expand Down Expand Up @@ -167,10 +312,31 @@ def sync_repo(repo_source: str, repo_ref: str, loc: Path) -> None:
command = f"git -C {loc} fetch origin main:main"
result = run_command(command, check=False)
if result and result.returncode:
print("Warning - fetching main from origin resulted in an error")
print("This is likely due to the main branch already existing")
print(f"Error message:\n\n{result.stderr}")
logger.warning(
"Fetching main from origin resulted in an error."
"This is likely due to the main branch already existing"
f"\nError message:\n\n{result.stderr}"
)

if repo_ref:
command = f"git -C {loc} checkout {repo_ref}"
run_command(command)


def set_https(dependencies: dict) -> dict:
"""
Change sources in a dependencies dictions to use https instead of ssh
"""

logger.info("Modifying Dependencies to use https")
for dependency, opts in dependencies.items():
if not isinstance(opts, list):
opts = [opts]
for values in opts:
if values["source"].startswith("[email protected]:"):
source = dependencies[dependency]["source"]
dependencies[dependency]["source"] = source.replace(
"[email protected]:", "https://github.com/"
)

return dependencies
Loading