diff --git a/.github/workflows/_build-tutorials-base.yml b/.github/workflows/_build-tutorials-base.yml index e7a9fa0220..147c3973e4 100644 --- a/.github/workflows/_build-tutorials-base.yml +++ b/.github/workflows/_build-tutorials-base.yml @@ -110,6 +110,14 @@ jobs: docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" + - name: Upload build log + if: always() + uses: actions/upload-artifact@v4 + with: + name: build-log-${{ matrix.shard }} + path: _build/build.log + if-no-files-found: ignore + - name: Teardown Linux uses: pytorch/test-infra/.github/actions/teardown-linux@main if: always() @@ -119,6 +127,9 @@ jobs: needs: worker runs-on: [self-hosted, linux.2xlarge] if: ${{ inputs.UPLOAD == 1 }} + permissions: + issues: write + contents: write environment: ${{ github.ref == 'refs/heads/main' && 'pytorchbot-env' || '' }} steps: - name: Setup SSH (Click me for login details) @@ -148,6 +159,18 @@ jobs: with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Download worker build logs + uses: actions/download-artifact@v4 + with: + pattern: build-log-* + path: _build/worker-logs + + - name: Merge build logs + shell: bash + run: | + mkdir -p _build + cat _build/worker-logs/build-log-*/build.log > _build/build.log 2>/dev/null || true + - name: Build shell: bash env: @@ -158,6 +181,7 @@ jobs: JOB_TYPE: manager COMMIT_SOURCE: ${{ github.ref }} GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.PYTORCHBOT_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} USE_NIGHTLY: ${{ inputs.USE_NIGHTLY }} run: | set -ex @@ -172,6 +196,7 @@ jobs: -e JOB_TYPE \ -e COMMIT_SOURCE \ -e GITHUB_PYTORCHBOT_TOKEN \ + -e GITHUB_TOKEN \ -e USE_NIGHTLY \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --tty \ @@ -184,6 +209,26 @@ jobs: docker exec -u ci-user -t "${container_name}" sh -c ".jenkins/build.sh" + - name: Generate API deprecation report + if: always() + shell: bash + run: | + python3 -m tools.deprecation_checker.api_report \ + --build-log _build/build.log \ + -o _build/api_report.md \ + --create-issue + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true + + - name: Upload API deprecation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: api-deprecation-report + path: _build/api_report.md + if-no-files-found: ignore + - name: Upload docs preview uses: seemethere/upload-artifact-s3@v5 if: ${{ github.event_name == 'pull_request' }} diff --git a/.github/workflows/build-tutorials-nightly.yml b/.github/workflows/build-tutorials-nightly.yml index 8d6c839ebe..3baae2bc67 100644 --- a/.github/workflows/build-tutorials-nightly.yml +++ b/.github/workflows/build-tutorials-nightly.yml @@ -23,6 +23,10 @@ on: # - main workflow_dispatch: +permissions: + contents: write + issues: write + concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} cancel-in-progress: true diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index 58372d557e..d6e3a04958 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -6,6 +6,10 @@ on: branches: - main +permissions: + contents: write + issues: write + concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} cancel-in-progress: true diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 5a8449201e..a3863009dc 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -69,7 +69,10 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then export FILES_TO_RUN # Step 3: Run `make docs` to generate HTML files and static files for these tutorialis - make docs + export PYTHONWARNINGS="all::DeprecationWarning,all::FutureWarning" + mkdir -p _build + make docs 2>&1 | tee _build/build.log + test ${PIPESTATUS[0]} -eq 0 # Step 3.1: Run the post-processing script: python .jenkins/post_process_notebooks.py @@ -125,10 +128,15 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then bash $DIR/remove_invisible_code_block_batch.sh docs python .jenkins/validate_tutorials_built.py + # Step 5.1: Generate API deprecation report from build warnings + python -m tools.deprecation_checker.api_report --build-log _build/build.log -o _build/api_report.md || true + # Step 6: Copy generated files to S3, tag with commit ID if [ "${UPLOAD:-0}" -eq 1 ]; then 7z a worker_${WORKER_ID}.7z docs awsv2 s3 cp worker_${WORKER_ID}.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/worker_${WORKER_ID}.7z + # Upload build log for the manager to merge across shards + awsv2 s3 cp _build/build.log s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/build_log_${WORKER_ID}.txt || true fi elif [[ "${JOB_TYPE}" == "manager" ]]; then # Step 1: Generate no-plot HTML pages for all tutorials @@ -156,6 +164,9 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then # Step 5.1: Run post-processing script on .ipynb files: python .jenkins/post_process_notebooks.py + # Step 5.2: API deprecation report is generated by the GitHub workflow after + # merging worker build logs from artifacts. See _build-tutorials-base.yml. + # Step 6: Copy generated HTML files and static files to S3 7z a manager.7z docs awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${BUILD_PREFIX}/${COMMIT_ID}/manager.7z diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/deprecation_checker/__init__.py b/tools/deprecation_checker/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/deprecation_checker/api_report.py b/tools/deprecation_checker/api_report.py new file mode 100644 index 0000000000..37c641e436 --- /dev/null +++ b/tools/deprecation_checker/api_report.py @@ -0,0 +1,310 @@ +"""Generate Markdown deprecation reports and optionally file GitHub Issues. + +Usage:: + + NOTE: This tool is designed to run in CI where tutorials are fully executed + by Sphinx Gallery on GPU workers. Running ``make html-noplot`` locally skips + tutorial execution, so no runtime warnings are emitted. Use these commands + to re-parse a build.log downloaded from a CI run, or to test with a + synthetic log. + + # Local report to stdout + python -m tools.deprecation_checker.api_report --build-log _build/build.log + + # Local report written to a file + python -m tools.deprecation_checker.api_report --build-log _build/build.log -o _build/api_report.md + + # Create / update a GitHub Issue (requires GITHUB_TOKEN) + python -m tools.deprecation_checker.api_report --build-log _build/build.log --create-issue +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.request +import urllib.error +from collections import defaultdict +from pathlib import Path +from typing import List + +from .build_warning_parser import BuildWarning, classify_dependency, is_tutorial_source, parse_log + +# --------------------------------------------------------------------------- # +# Constants +# --------------------------------------------------------------------------- # + +REPO_OWNER = "pytorch" +REPO_NAME = "tutorials" +ISSUE_LABEL = "docs-agent-deprecations" +ISSUE_TITLE = "[CI] Deprecated API usage in tutorials" +ISSUE_CC = "svekars" + +# --------------------------------------------------------------------------- # +# Markdown report generation +# --------------------------------------------------------------------------- # + + +def _summary_table(warnings: List[BuildWarning]) -> str: + """Build a Markdown table summarising warning counts per file.""" + counts: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + for w in warnings: + counts[w.file][w.category] += 1 + + lines = [ + "| Tutorial file | DeprecationWarning | FutureWarning | Total |", + "|---|---:|---:|---:|", + ] + for file in sorted(counts): + dep = counts[file].get("DeprecationWarning", 0) + fut = counts[file].get("FutureWarning", 0) + lines.append(f"| `{file}` | {dep} | {fut} | {dep + fut} |") + + total_dep = sum(c.get("DeprecationWarning", 0) for c in counts.values()) + total_fut = sum(c.get("FutureWarning", 0) for c in counts.values()) + lines.append(f"| **Total** | **{total_dep}** | **{total_fut}** | **{total_dep + total_fut}** |") + return "\n".join(lines) + + +def _findings_section(warnings: List[BuildWarning]) -> str: + """Detailed findings grouped by file, sorted by line number.""" + by_file: dict[str, list[BuildWarning]] = defaultdict(list) + for w in warnings: + by_file[w.file].append(w) + + sections: list[str] = [] + for file in sorted(by_file): + items = sorted(by_file[file], key=lambda w: w.lineno) + parts = [f"### `{file}`\n"] + for w in items: + parts.append( + f"- **Line {w.lineno}** ({w.category}): {w.message}" + ) + sections.append("\n".join(parts)) + + return "\n\n".join(sections) + + +def generate_report(warnings: List[BuildWarning]) -> str: + """Return a full Markdown report string.""" + if not warnings: + return ( + "# API Deprecation Report\n\n" + "No `DeprecationWarning` or `FutureWarning` detected in this build. :tada:" + ) + + tutorial_warnings = [w for w in warnings if is_tutorial_source(w.file)] + other_warnings = [w for w in warnings if not is_tutorial_source(w.file)] + + parts = [ + "# API Deprecation Report", + "", + f"**{len(warnings)}** unique deprecation/future warnings found in this build.", + "", + ] + + if tutorial_warnings: + parts += [ + "## Summary (tutorial sources)", + "", + _summary_table(tutorial_warnings), + "", + "## Findings", + "", + _findings_section(tutorial_warnings), + "", + ] + + # Classify dependency warnings + pytorch_warnings = [w for w in other_warnings if classify_dependency(w.file) == "pytorch"] + pytorch_lib_warnings = [w for w in other_warnings if classify_dependency(w.file) == "pytorch_libs"] + third_party_warnings = [w for w in other_warnings if classify_dependency(w.file) == "third_party"] + + if pytorch_warnings: + parts += [ + "## PyTorch warnings", + "", + _findings_section(pytorch_warnings), + "", + ] + + if pytorch_lib_warnings: + parts += [ + "## PyTorch libraries warnings", + "", + _findings_section(pytorch_lib_warnings), + "", + ] + + if third_party_warnings: + parts += [ + "## Third-party dependency warnings", + "", + _findings_section(third_party_warnings), + "", + ] + + return "\n".join(parts) + + +# --------------------------------------------------------------------------- # +# GitHub Issue creation / update +# --------------------------------------------------------------------------- # + + +def _gh_api( + method: str, + endpoint: str, + token: str, + body: dict | None = None, +) -> dict: + """Minimal GitHub REST API helper using only stdlib.""" + url = f"https://api.github.com{endpoint}" + data = json.dumps(body).encode() if body else None + req = urllib.request.Request( + url, + data=data, + method=method, + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + with urllib.request.urlopen(req) as resp: + return json.loads(resp.read()) + + +def _ensure_label(token: str) -> None: + """Create the issue label if it doesn't exist yet.""" + try: + _gh_api( + "POST", + f"/repos/{REPO_OWNER}/{REPO_NAME}/labels", + token, + {"name": ISSUE_LABEL, "color": "d93f0b", "description": "Auto-generated deprecation report from CI"}, + ) + except urllib.error.HTTPError as exc: + if exc.code == 422: + pass # label already exists + else: + raise + + +def _find_existing_issue(token: str) -> int | None: + """Return the issue number of the existing open deprecation issue, or None.""" + results = _gh_api( + "GET", + f"/repos/{REPO_OWNER}/{REPO_NAME}/issues?labels={ISSUE_LABEL}&state=open&per_page=1", + token, + ) + if results: + return results[0]["number"] + return None + + +def create_or_update_issue(report_body: str, token: str) -> str: + """Create or update the deprecation GitHub Issue. Returns the issue URL.""" + _ensure_label(token) + existing = _find_existing_issue(token) + + body = f"cc: @{ISSUE_CC}\n\n{report_body}" + + if existing: + result = _gh_api( + "PATCH", + f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{existing}", + token, + {"body": body}, + ) + return result["html_url"] + else: + result = _gh_api( + "POST", + f"/repos/{REPO_OWNER}/{REPO_NAME}/issues", + token, + { + "title": ISSUE_TITLE, + "body": body, + "labels": [ISSUE_LABEL], + }, + ) + return result["html_url"] + + +def close_issue_if_open(token: str) -> str | None: + """Close the deprecation issue if one is open. Returns the URL or None.""" + existing = _find_existing_issue(token) + if not existing: + return None + result = _gh_api( + "PATCH", + f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{existing}", + token, + { + "state": "closed", + "body": f"cc: @{ISSUE_CC}\n\n" + "All `DeprecationWarning` and `FutureWarning` issues have been resolved. " + "This issue will reopen automatically if new deprecations are detected.", + }, + ) + return result["html_url"] + + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # + + +def main(argv: list[str] | None = None) -> None: + parser = argparse.ArgumentParser( + description="Generate an API deprecation report from a Sphinx build log.", + ) + parser.add_argument( + "--build-log", + required=True, + help="Path to the build log file (e.g. _build/build.log).", + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="Write the Markdown report to this file instead of stdout.", + ) + parser.add_argument( + "--create-issue", + action="store_true", + help="Create or update a GitHub Issue with the report (requires GITHUB_TOKEN).", + ) + args = parser.parse_args(argv) + + warnings = parse_log(args.build_log) + report = generate_report(warnings) + + if args.output: + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + Path(args.output).write_text(report) + print(f"Report written to {args.output}") + else: + print(report) + + if args.create_issue: + token = os.environ.get("GITHUB_TOKEN", "") + if not token: + print("WARNING: GITHUB_TOKEN not set — skipping issue creation.", file=sys.stderr) + return + if not warnings: + url = close_issue_if_open(token) + if url: + print(f"All warnings resolved — closed issue: {url}") + else: + print("No warnings found and no open issue to close.") + return + url = create_or_update_issue(report, token) + print(f"GitHub Issue: {url}") + + +if __name__ == "__main__": + main() diff --git a/tools/deprecation_checker/build_warning_parser.py b/tools/deprecation_checker/build_warning_parser.py new file mode 100644 index 0000000000..13b7c21f18 --- /dev/null +++ b/tools/deprecation_checker/build_warning_parser.py @@ -0,0 +1,149 @@ +"""Parse Python build logs for DeprecationWarning and FutureWarning messages. + +Reads a Sphinx Gallery build log and extracts structured warning information +so downstream tools can report on deprecated API usage in tutorials. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path +from typing import List + +# Strip ANSI escape sequences and carriage-return progress lines +_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]") + +# Matches standard Python warning output: +# /path/to/file.py:42: DeprecationWarning: some message +# :2: DeprecationWarning: invalid escape sequence '\s' +# The message may span continuation lines (indented), but we grab the first line. +_WARNING_RE = re.compile( + r"(?P/?[^\s:]+\.py|):(?P\d+):\s+" + r"(?PDeprecationWarning|FutureWarning):\s+" + r"(?P.+)$" +) + +# Docker workdir used by the CI build container +_DOCKER_PREFIX = "/var/lib/workspace/" + +# Tutorial source directories (relative to repo root) +_SOURCE_DIRS = ( + "beginner_source/", + "intermediate_source/", + "advanced_source/", + "recipes_source/", + "prototype_source/", +) + + +@dataclass +class BuildWarning: + """A single deprecation/future warning extracted from a build log.""" + + file: str + lineno: int + category: str # "DeprecationWarning" or "FutureWarning" + message: str + + +def _normalize_path(raw_path: str) -> str: + """Map an absolute Docker path back to a repo-relative tutorial source path. + + If the path doesn't belong to a known source directory the raw path is + returned as-is (it may come from a dependency — still useful to log). + """ + path = raw_path + if path.startswith(_DOCKER_PREFIX): + path = path[len(_DOCKER_PREFIX) :] + + # Strip leading "./" if present + if path.startswith("./"): + path = path[2:] + + return path + + +def parse_log(log_path: str | Path) -> List[BuildWarning]: + """Parse *log_path* and return deduplicated :class:`BuildWarning` objects. + + Deduplication key: ``(file, message)`` — only the first occurrence (by + line number) is kept so the report highlights unique issues rather than + repeating the same warning 50 times. + """ + log_path = Path(log_path) + try: + text = log_path.read_text(errors="replace") + except FileNotFoundError: + return [] + + seen: dict[tuple[str, str], BuildWarning] = {} + warnings: list[BuildWarning] = [] + + for line in text.splitlines(): + # Strip ANSI escapes and split on \r to handle progress-line overwriting + line = _ANSI_RE.sub("", line) + if "\r" in line: + line = line.rsplit("\r", 1)[-1] + m = _WARNING_RE.search(line) + if m is None: + continue + + rel_path = _normalize_path(m.group("path")) + message = m.group("message").strip() + key = (rel_path, message) + + if key in seen: + continue + + warning = BuildWarning( + file=rel_path, + lineno=int(m.group("lineno")), + category=m.group("category"), + message=message, + ) + seen[key] = warning + warnings.append(warning) + + return warnings + + +def is_tutorial_source(path: str) -> bool: + """Return True if *path* belongs to a known tutorial source directory.""" + return any(path.startswith(d) for d in _SOURCE_DIRS) + + +# Package prefixes that belong to PyTorch core +_PYTORCH_CORE_PACKAGES = ( + "/torch/", + "torch/", +) + +# Package prefixes for PyTorch ecosystem libraries +_PYTORCH_LIB_PACKAGES = ( + "/torchvision/", + "/torchaudio/", + "/torchtext/", + "/torchrl/", + "/tensordict/", + "/torchdata/", + "/torchtune/", + "/torchtitan/", + "/functorch/", + "/torch_xla/", + "/executorch/", +) + + +def classify_dependency(path: str) -> str: + """Classify a non-tutorial warning path into a dependency category. + + Returns one of: ``"pytorch"``, ``"pytorch_libs"``, ``"third_party"``. + """ + for prefix in _PYTORCH_CORE_PACKAGES: + if prefix in path: + return "pytorch" + for prefix in _PYTORCH_LIB_PACKAGES: + if prefix in path: + return "pytorch_libs" + return "third_party"