Skip to content
202 changes: 202 additions & 0 deletions src/strands/vended_plugins/skills/_url_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""Utilities for loading skills from remote Git repository URLs.

This module provides functions to detect URL-type skill sources, parse
optional version references, clone repositories with shallow depth, and
manage a local cache of cloned skill repositories.
"""

from __future__ import annotations

import hashlib
import logging
import re
import shutil
import subprocess
from pathlib import Path

logger = logging.getLogger(__name__)

_DEFAULT_CACHE_DIR = Path.home() / ".cache" / "strands" / "skills"
Comment thread
mkmeral marked this conversation as resolved.
Outdated

# Patterns that indicate a string is a URL rather than a local path
_URL_PREFIXES = ("https://", "http://", "git@", "ssh://")
Comment thread
mkmeral marked this conversation as resolved.
Outdated

# Regex to strip .git suffix from URLs before ref parsing
_GIT_SUFFIX = re.compile(r"\.git$")

# Matches GitHub /tree/<ref> or /tree/<ref>/<path> (also /blob/)
# e.g. /owner/repo/tree/main/skills/my-skill -> groups: (/owner/repo, main, skills/my-skill)
_GITHUB_TREE_PATTERN = re.compile(r"^(/[^/]+/[^/]+)/(?:tree|blob)/([^/]+)(?:/(.+?))?/?$")


def is_url(source: str) -> bool:
"""Check whether a skill source string looks like a remote URL.

Args:
source: The skill source string to check.

Returns:
True if the source appears to be a URL.
"""
return any(source.startswith(prefix) for prefix in _URL_PREFIXES)


def parse_url_ref(url: str) -> tuple[str, str | None, str | None]:
"""Parse a skill URL into a clone URL, optional Git ref, and optional subpath.

Supports an ``@ref`` suffix for specifying a branch, tag, or commit::

https://github.com/org/skill-repo@v1.0.0 -> (https://github.com/org/skill-repo, v1.0.0, None)
https://github.com/org/skill-repo -> (https://github.com/org/skill-repo, None, None)

Also supports GitHub web URLs with ``/tree/<ref>/path`` ::

https://github.com/org/repo/tree/main/skills/my-skill
-> (https://github.com/org/repo, main, skills/my-skill)

Args:
url: The skill URL, optionally with an ``@ref`` suffix or ``/tree/`` path.

Returns:
Tuple of (clone_url, ref_or_none, subpath_or_none).
"""
if url.startswith(("https://", "http://", "ssh://")):
# Find the path portion after the host
scheme_end = url.index("//") + 2
host_end = url.find("/", scheme_end)
if host_end == -1:
return url, None, None

path_part = url[host_end:]

# Handle GitHub /tree/<ref>/path and /blob/<ref>/path URLs
tree_match = _GITHUB_TREE_PATTERN.match(path_part)
if tree_match:
owner_repo = tree_match.group(1)
ref = tree_match.group(2)
subpath = tree_match.group(3) or None
clone_url = url[:host_end] + owner_repo
return clone_url, ref, subpath

# Strip .git suffix before looking for @ref so that
# "repo.git@v1" is handled correctly
clean_path = _GIT_SUFFIX.sub("", path_part)
had_git_suffix = clean_path != path_part

if "@" in clean_path:
at_idx = clean_path.rfind("@")
ref = clean_path[at_idx + 1 :]
base_path = clean_path[:at_idx]
if had_git_suffix:
base_path += ".git"
return url[:host_end] + base_path, ref, None

return url, None, None

if url.startswith("git@"):
# SSH format: git@host:owner/repo.git@ref
# The first @ is part of the SSH URL format.
first_at = url.index("@")
rest = url[first_at + 1 :]

clean_rest = _GIT_SUFFIX.sub("", rest)
had_git_suffix = clean_rest != rest

if "@" in clean_rest:
at_idx = clean_rest.rfind("@")
ref = clean_rest[at_idx + 1 :]
base_rest = clean_rest[:at_idx]
if had_git_suffix:
base_rest += ".git"
return url[: first_at + 1] + base_rest, ref, None

return url, None, None

return url, None, None


def cache_key(url: str, ref: str | None) -> str:
"""Generate a deterministic cache directory name from a URL and ref.

Args:
url: The clone URL.
ref: The optional Git ref.

Returns:
A short hex digest suitable for use as a directory name.
"""
key_input = f"{url}@{ref}" if ref else url
return hashlib.sha256(key_input.encode()).hexdigest()[:16]


def clone_skill_repo(
Comment thread
mkmeral marked this conversation as resolved.
Outdated
url: str,
*,
ref: str | None = None,
subpath: str | None = None,
cache_dir: Path | None = None,
) -> Path:
"""Clone a skill repository to a local cache directory.

Uses ``git clone --depth 1`` for efficiency. If a ``ref`` is provided it
is passed as ``--branch`` (works for branches and tags). Repositories are
cached by a hash of (url, ref) so repeated loads are instant.

If ``subpath`` is provided, the returned path points to that subdirectory
within the cloned repository (useful for mono-repos containing skills in
nested directories).

Args:
url: The Git clone URL.
ref: Optional branch or tag to check out.
subpath: Optional path within the repo to return (e.g. ``skills/my-skill``).
cache_dir: Override the default cache directory
(``~/.cache/strands/skills/``).

Returns:
Path to the cloned repository root, or to ``subpath`` within it.

Raises:
RuntimeError: If the clone fails or ``git`` is not installed.
"""
cache_dir = cache_dir or _DEFAULT_CACHE_DIR
cache_dir.mkdir(parents=True, exist_ok=True)

key = cache_key(url, ref)
target = cache_dir / key

if not target.exists():
Comment thread
mkmeral marked this conversation as resolved.
Outdated
logger.info("url=<%s>, ref=<%s> | cloning skill repository", url, ref)

cmd: list[str] = ["git", "clone", "--depth", "1"]
if ref:
cmd.extend(["--branch", ref])
cmd.extend([url, str(target)])

try:
subprocess.run( # noqa: S603
cmd,
check=True,
capture_output=True,
text=True,
timeout=120,
)
except subprocess.CalledProcessError as e:
# Clean up any partial clone
if target.exists():
shutil.rmtree(target)
raise RuntimeError(
f"url=<{url}>, ref=<{ref}> | failed to clone skill repository: {e.stderr.strip()}"
) from e
except FileNotFoundError as e:
raise RuntimeError("git is required to load skills from URLs but was not found on PATH") from e
else:
logger.debug("url=<%s>, ref=<%s> | using cached skill at %s", url, ref, target)

result = target / subpath if subpath else target

if subpath and not result.is_dir():
raise RuntimeError(f"url=<{url}>, subpath=<{subpath}> | subdirectory does not exist in cloned repository")

logger.debug("url=<%s>, ref=<%s> | resolved to %s", url, ref, result)
return result
20 changes: 19 additions & 1 deletion src/strands/vended_plugins/skills/agent_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
state_key: str = _DEFAULT_STATE_KEY,
max_resource_files: int = _DEFAULT_MAX_RESOURCE_FILES,
strict: bool = False,
cache_dir: Path | None = None,
) -> None:
"""Initialize the AgentSkills plugin.

Expand All @@ -86,11 +87,16 @@ def __init__(
- A ``str`` or ``Path`` to a skill directory (containing SKILL.md)
- A ``str`` or ``Path`` to a parent directory (containing skill subdirectories)
- A ``Skill`` dataclass instance
- A remote Git URL (``https://``, ``git@``, or ``ssh://``)
with optional ``@ref`` suffix for branch/tag pinning
state_key: Key used to store plugin state in ``agent.state``.
max_resource_files: Maximum number of resource files to list in skill responses.
strict: If True, raise on skill validation issues. If False (default), warn and load anyway.
cache_dir: Directory for caching cloned skill repositories.
Defaults to ``~/.cache/strands/skills/``.
"""
self._strict = strict
self._cache_dir = cache_dir
self._skills: dict[str, Skill] = self._resolve_skills(_normalize_sources(skills))
self._state_key = state_key
self._max_resource_files = max_resource_files
Expand Down Expand Up @@ -284,21 +290,33 @@ def _resolve_skills(self, sources: list[SkillSource]) -> dict[str, Skill]:
"""Resolve a list of skill sources into Skill instances.

Each source can be a Skill instance, a path to a skill directory,
or a path to a parent directory containing multiple skills.
a path to a parent directory containing multiple skills, or a remote
Git URL.
Comment thread
mkmeral marked this conversation as resolved.
Outdated

Args:
sources: List of skill sources to resolve.

Returns:
Dict mapping skill names to Skill instances.
"""
from ._url_loader import is_url
Comment thread
mkmeral marked this conversation as resolved.
Outdated

resolved: dict[str, Skill] = {}

for source in sources:
if isinstance(source, Skill):
if source.name in resolved:
logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", source.name)
resolved[source.name] = source
elif isinstance(source, str) and is_url(source):
try:
url_skills = Skill.from_url(source, cache_dir=self._cache_dir, strict=self._strict)
for skill in url_skills:
if skill.name in resolved:
logger.warning("name=<%s> | duplicate skill name, overwriting previous skill", skill.name)
resolved[skill.name] = skill
except (RuntimeError, ValueError) as e:
logger.warning("url=<%s> | failed to load skill from URL: %s", source, e)
else:
path = Path(source).resolve()
if not path.exists():
Expand Down
54 changes: 54 additions & 0 deletions src/strands/vended_plugins/skills/skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,60 @@ def from_content(cls, content: str, *, strict: bool = False) -> Skill:

return _build_skill_from_frontmatter(frontmatter, body)

@classmethod
def from_url(
cls,
url: str,
*,
cache_dir: Path | None = None,
strict: bool = False,
) -> list[Skill]:
"""Load skill(s) from a remote Git repository URL.

Clones the repository (or uses a cached copy) and then loads skills
using the standard filesystem methods. If the repository root contains
a ``SKILL.md`` file it is treated as a single skill; otherwise it is
scanned for skill subdirectories.

Supports an optional ``@ref`` suffix for branch or tag pinning::

skills = Skill.from_url("https://github.com/org/my-skill@v1.0.0")

Also supports GitHub web URLs pointing to subdirectories::

skills = Skill.from_url("https://github.com/org/repo/tree/main/skills/my-skill")

Args:
url: A Git-cloneable URL, optionally with an ``@ref`` suffix or
a GitHub ``/tree/<ref>/path`` URL.
cache_dir: Override the default cache directory
(``~/.cache/strands/skills/``).
strict: If True, raise on any validation issue. If False (default),
warn and load anyway.

Returns:
List of Skill instances loaded from the repository.

Raises:
RuntimeError: If the repository cannot be cloned or ``git`` is not
Comment thread
mkmeral marked this conversation as resolved.
Outdated
available.
"""
from ._url_loader import clone_skill_repo, is_url, parse_url_ref

if not is_url(url):
raise ValueError(f"url=<{url}> | not a valid remote URL")
Comment thread
mkmeral marked this conversation as resolved.
Outdated

clean_url, ref, subpath = parse_url_ref(url)
repo_path = clone_skill_repo(clean_url, ref=ref, subpath=subpath, cache_dir=cache_dir)

# If the repo root is itself a skill, load it directly
has_skill_md = (repo_path / "SKILL.md").is_file() or (repo_path / "skill.md").is_file()

if has_skill_md:
return [cls.from_file(repo_path, strict=strict)]
else:
return cls.from_directory(repo_path, strict=strict)

@classmethod
def from_directory(cls, skills_dir: str | Path, *, strict: bool = False) -> list[Skill]:
"""Load all skills from a parent directory containing skill subdirectories.
Expand Down
Loading
Loading