Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
juju-channel: 3.6/stable
provider: lxd
test-tox-env: integration-juju3.6
modules: '["test_multi_unit_same_machine", "test_charm_metrics_failure", "test_charm_metrics_success", "test_charm_fork_path_change", "test_charm_no_runner", "test_charm_upgrade", "test_reactive"]'
modules: '["test_multi_unit_same_machine", "test_charm_fork_path_change", "test_charm_no_runner", "test_charm_upgrade"]'
# INTEGRATION_TOKEN, INTEGRATION_TOKEN_ALT, OS_* are passed through INTEGRATION_TEST_SECRET_ENV_VALUE_<N>
# mapping. See CONTRIBUTING.md for more details.
extra-arguments: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_github_runner_manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
matrix:
test-module:
- test_debug_ssh
- test_metrics
- test_planner_runner
steps:
- name: Checkout code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
from github_runner_manager.errors import IssueMetricEventError
from github_runner_manager.manager.vm_manager import CodeInformation

METRICS_LOG_PATH = Path(os.getenv("METRICS_LOG_PATH", "/var/log/github-runner-metrics.log"))

_DEFAULT_METRICS_LOG_PATH = "/var/log/github-runner-metrics.log"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -156,8 +155,18 @@ def issue_event(event: Event) -> None:
Raises:
IssueMetricEventError: If the event cannot be logged.
"""
metrics_log_path = get_metrics_log_path()
try:
with METRICS_LOG_PATH.open(mode="a", encoding="utf-8") as metrics_file:
with metrics_log_path.open(mode="a", encoding="utf-8") as metrics_file:
metrics_file.write(f"{event.json(exclude_none=True)}\n")
except OSError as exc:
raise IssueMetricEventError(f"Cannot write to {METRICS_LOG_PATH}") from exc
raise IssueMetricEventError(f"Cannot write to {metrics_log_path}") from exc


def get_metrics_log_path() -> Path:
"""Get the metrics log path, reading the env var at call time rather than import time.

Returns:
The metrics log file path.
"""
return Path(os.getenv("METRICS_LOG_PATH", _DEFAULT_METRICS_LOG_PATH))
93 changes: 7 additions & 86 deletions github-runner-manager/tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"""Fixtures for github-runner-manager integration tests."""

import logging
import subprocess
import time
from pathlib import Path
from typing import Generator
Expand All @@ -15,88 +14,13 @@
from github.Auth import Token
from github.Branch import Branch
from github.Repository import Repository
from openstack.compute.v2.server import Server as OpenstackServer

from .factories import GitHubConfig, OpenStackConfig, ProxyConfig, TestConfig
from .planner_stub import PlannerStub, PlannerStubConfig

logger = logging.getLogger(__name__)


def wait_for_runner(
openstack_connection: openstack.connection.Connection,
test_config: TestConfig,
timeout: int = 300,
interval: int = 5,
) -> tuple[OpenstackServer, str] | tuple[None, None]:
"""Wait for an OpenStack runner to be created and return it with its IP.

Args:
openstack_connection: OpenStack connection object.
test_config: Test configuration with VM prefix.
timeout: Maximum time to wait in seconds.
interval: Time between checks in seconds.

Returns:
Tuple of (runner, ip) if found, or (None, None) if not found within timeout.
"""
start_time = time.time()
while time.time() - start_time < timeout:
servers = [
server
for server in openstack_connection.list_servers()
if server.name.startswith(test_config.vm_prefix)
]
if servers:
runner = servers[0]
logger.info("Found runner: %s", runner.name)

ip = None
for network_addresses in runner.addresses.values():
for address in network_addresses:
ip = address["addr"]
break
if ip:
break

if ip:
return runner, ip

time.sleep(interval)

return None, None


def wait_for_no_runners(
openstack_connection: openstack.connection.Connection,
test_config: TestConfig,
timeout: int = 900,
interval: int = 15,
) -> bool:
"""Wait until no VMs with the test prefix exist on OpenStack.

Args:
openstack_connection: OpenStack connection object.
test_config: Test configuration with VM prefix.
timeout: Maximum time to wait in seconds.
interval: Time between checks in seconds.

Returns:
True when no matching VMs exist; False if timeout is reached first.
"""
start = time.time()
while time.time() - start < timeout:
servers = [
s
for s in openstack_connection.list_servers()
if s.name.startswith(test_config.vm_prefix)
]
if not servers:
return True
time.sleep(interval)
return False


@pytest.fixture(scope="module")
def test_config(pytestconfig: pytest.Config) -> TestConfig:
"""Create a unique test configuration for parallel test execution.
Expand Down Expand Up @@ -355,16 +279,9 @@ def github_branch(
"""
test_branch = f"test-{test_config.test_id}"

sha_result = subprocess.run(
["/usr/bin/git", "rev-parse", "HEAD"],
capture_output=True,
text=True,
check=True,
)
current_commit_sha = sha_result.stdout.strip()

default_branch = github_repository.get_branch(github_repository.default_branch)
branch_ref = github_repository.create_git_ref(
ref=f"refs/heads/{test_branch}", sha=current_commit_sha
ref=f"refs/heads/{test_branch}", sha=default_branch.commit.sha
)

# Wait for branch to be available, GitHub is eventually consistent
Expand All @@ -376,7 +293,11 @@ def github_branch(
while time.time() - start_time < timeout:
try:
branch = github_repository.get_branch(test_branch)
logger.info("Created test branch: %s at SHA: %s", test_branch, current_commit_sha)
logger.info(
"Created test branch: %s at SHA: %s",
test_branch,
default_branch.commit.sha,
)
break
except Exception as e:
elapsed = time.time() - start_time
Expand Down
8 changes: 6 additions & 2 deletions github-runner-manager/tests/integration/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def create_default_config(
test_config: TestConfig | None = None,
planner_url: str | None = None,
planner_token: str | None = None,
reconcile_interval: int = 60,
base_virtual_machines: int = 1,
) -> dict[str, Any]:
"""Create a default test configuration dictionary.

Expand All @@ -154,6 +156,8 @@ def create_default_config(
Defaults to new unique values.
planner_url: Planner service URL. Omitted from config when not provided.
planner_token: Planner service token. Omitted from config when not provided.
reconcile_interval: Minutes between delete-loop reconciliation ticks.
base_virtual_machines: Floor for non-reactive desired runners.

Returns:
Configuration dictionary for the application.
Expand Down Expand Up @@ -235,7 +239,7 @@ def create_default_config(
"labels": ["noble", "x64"],
},
"flavor": {"name": openstack_config.flavor or "small", "labels": ["small"]},
"base_virtual_machines": 1,
"base_virtual_machines": base_virtual_machines,
"max_total_virtual_machines": 0,
}
]
Expand All @@ -256,5 +260,5 @@ def create_default_config(
},
**({"planner_url": planner_url} if planner_url else {}),
**({"planner_token": planner_token} if planner_token else {}),
"reconcile_interval": 60,
"reconcile_interval": reconcile_interval,
}
123 changes: 123 additions & 0 deletions github-runner-manager/tests/integration/metrics_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2026 Canonical Ltd.
# See LICENSE file for licensing details.

"""Helpers for app-level integration metrics assertions."""

import json
import time
from pathlib import Path
from typing import Any

from github.Repository import Repository

from github_runner_manager.manager.vm_manager import PostJobStatus
from github_runner_manager.types_.github import JobConclusion

TEST_WORKFLOW_NAMES = [
"Workflow Dispatch Tests",
"Workflow Dispatch Crash Tests",
"Workflow Dispatch Failure Tests 2a34f8b1-41e4-4bcb-9bbf-7a74e6c482f7",
]


def _assert_non_negative_number(metric: dict[str, Any], key: str) -> None:
"""Assert event key exists and contains a non-negative numeric value."""
assert key in metric, f"Missing metric field: {key}"
value = metric[key]
assert isinstance(value, (int, float)), f"Metric field {key} is not numeric: {value!r}"
assert value >= 0, f"Metric field {key} is negative: {value!r}"


def clear_metrics_log(metrics_log_path: Path) -> None:
"""Delete metrics log file to reset test state."""
metrics_log_path.unlink(missing_ok=True)


def get_metrics_events(metrics_log_path: Path) -> list[dict[str, Any]]:
"""Return metrics events from the log file."""
if not metrics_log_path.exists():
return []
lines = metrics_log_path.read_text(encoding="utf-8").splitlines()
return [json.loads(line) for line in lines if line.strip()]


def wait_for_events(
metrics_log_path: Path,
expected_events: set[str],
timeout: int = 10 * 60,
interval: int = 10,
) -> list[dict[str, Any]]:
"""Wait until all expected event names are present in the metrics log."""
deadline = time.time() + timeout
while time.time() < deadline:
events = get_metrics_events(metrics_log_path)
emitted = {event.get("event") for event in events}
if expected_events <= emitted:
return events
time.sleep(interval)
raise TimeoutError(f"Timed out waiting for metrics events: {sorted(expected_events)}")


def assert_events_after_reconciliation(
events: list[dict[str, Any]],
flavor: str,
github_repository: Repository,
post_job_status: PostJobStatus,
) -> None:
"""Assert runner-start/stop/reconciliation metrics for a completed test flow."""
emitted = {event.get("event") for event in events}
assert {
"runner_start",
"runner_stop",
"reconciliation",
} <= emitted, "Not all metrics events were logged"

for metric in events:
if metric.get("event") == "runner_start":
assert metric.get("flavor") == flavor
assert metric.get("workflow") in TEST_WORKFLOW_NAMES
assert metric.get("repo") == github_repository.full_name
assert metric.get("github_event") == "workflow_dispatch"
_assert_non_negative_number(metric, "idle")
_assert_non_negative_number(metric, "queue_duration")

if metric.get("event") == "runner_stop":
assert metric.get("flavor") == flavor
assert metric.get("workflow") in TEST_WORKFLOW_NAMES
assert metric.get("repo") == github_repository.full_name
assert metric.get("github_event") == "workflow_dispatch"
assert metric.get("status") == post_job_status
if post_job_status == PostJobStatus.ABNORMAL:
assert metric.get("status_info", {}).get("code", 0) != 0
assert metric.get("job_conclusion") in [None, JobConclusion.CANCELLED]
else:
assert "status_info" not in metric
assert metric.get("job_conclusion") == JobConclusion.SUCCESS
_assert_non_negative_number(metric, "job_duration")

if metric.get("event") == "reconciliation":
assert metric.get("flavor") == flavor
_assert_non_negative_number(metric, "duration")
assert metric.get("crashed_runners") == 0
_assert_non_negative_number(metric, "idle_runners")
_assert_non_negative_number(metric, "active_runners")
_assert_non_negative_number(metric, "expected_runners")


def wait_for_runner_to_be_marked_offline(
github_repository: Repository,
runner_name: str,
timeout: int = 30 * 60,
interval: int = 60,
) -> None:
"""Wait for a runner to become offline or disappear from GitHub."""
deadline = time.time() + timeout
while time.time() < deadline:
for runner in github_repository.get_self_hosted_runners():
if runner.name == runner_name:
if runner.status == "online":
time.sleep(interval)
break
else:
return
raise TimeoutError(f"Timeout while waiting for runner {runner_name} to be marked offline")
Loading
Loading