diff --git a/.github/workflows/matrix.yml b/.github/workflows/matrix.yml index 5f2d330..f0ab5a6 100644 --- a/.github/workflows/matrix.yml +++ b/.github/workflows/matrix.yml @@ -82,6 +82,16 @@ jobs: registry: ghcr.io username: "${{github.actor}}" password: "${{secrets.GITHUB_TOKEN}}" + - name: restore ccache + uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.2 + with: + path: ~/.cache/kernel-ccache + # restore-keys is important here - it lets us restore the most recent cache key, + # *ignoring* the specific run ID, as a fuzzy match. So we can use previous build's + # caches for this flavor/arch even if the runid is not the same + key: "ccache-${{ matrix.builds.flavor }}-${{ join(matrix.builds.architectures, '-') }}-${{ github.run_id }}" + restore-keys: | + ccache-${{ matrix.builds.flavor }}-${{ join(matrix.builds.architectures, '-') }}- - name: generate docker script run: "./hack/build/generate-docker-script.sh" - name: upload docker script @@ -92,3 +102,11 @@ jobs: compression-level: 0 - name: run docker script run: sh -x docker.sh + - name: save ccache + uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.2 + with: + path: ~/.cache/kernel-ccache + # The run_id here is just for write-key uniqueness, as GH doesn't allow overwriting + # existing cache keys - the `restore` action will fuzzy-match and ignore the run_id + # for subsequent runs. + key: "ccache-${{ matrix.builds.flavor }}-${{ join(matrix.builds.architectures, '-') }}-${{ github.run_id }}" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 60653d5..ed17fa2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,9 @@ on: permissions: contents: read packages: read +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: test: uses: ./.github/workflows/matrix.yml diff --git a/Dockerfile b/Dockerfile index 42b76da..0c89718 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,60 +8,54 @@ ARG FIRMWARE_SIG_URL= ADD ${FIRMWARE_URL} /firmware.tar.xz ADD ${FIRMWARE_SIG_URL} /firmware.tar.sign +FROM --platform=$BUILDPLATFORM scratch AS nvidia-modules +ARG NV_MODULES_TARBALL_URL= +ADD ${NV_MODULES_TARBALL_URL} /nvidia-modules.tar.gz + FROM --platform=$BUILDPLATFORM debian:bookworm@sha256:0a5bf4ecacfc050bad0131c8e1401063fd1e8343a418723f6dbd3cd13a7b9e33 AS buildenv RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y \ build-essential squashfs-tools python3-yaml \ patch diffutils sed mawk findutils zstd \ python3 python3-packaging curl rsync cpio gpg grep \ - flex bison pahole libssl-dev libelf-dev bc kmod && \ + flex bison pahole libssl-dev libelf-dev bc kmod ccache && \ rm -rf /var/lib/apt/lists/* ARG BUILDPLATFORM RUN if [ "${BUILDPLATFORM}" = "linux/amd64" ]; then \ apt-get update && apt-get install -y linux-headers-amd64 g++-aarch64-linux-gnu gcc-aarch64-linux-gnu && rm -rf /var/lib/apt/lists/*; fi RUN if [ "${BUILDPLATFORM}" = "linux/arm64" ] || [ "${BUILDPLATFORM}" = "linux/aarch64" ]; then \ apt-get update && apt-get install -y linux-headers-arm64 g++-x86-64-linux-gnu gcc-x86-64-linux-gnu && rm -rf /var/lib/apt/lists/*; fi +ENV PATH="/usr/lib/ccache:${PATH}" RUN useradd -ms /bin/sh build COPY --chown=build:build . /build USER build WORKDIR /build RUN chmod +x hack/build/docker-build-internal.sh -FROM buildenv AS build -ARG KERNEL_VERSION= -ARG KERNEL_FLAVOR=zone -ARG BUILDPLATFORM -ARG TARGETPLATFORM +FROM buildenv AS build-staged COPY --from=kernelsrc --chown=build:build /src.tar.xz /build/override-kernel-src.tar.xz + +FROM build-staged AS build-staged-amdgpu COPY --from=firmware --chown=build:build /firmware.tar.xz /build/override-firmware.tar.xz COPY --from=firmware --chown=build:build /firmware.tar.sign /build/override-firmware.tar.sign -RUN if [ "${KERNEL_FLAVOR}" = "zone-amdgpu" ]; then \ - FIRMWARE_SIG_URL="/build/override-firmware.tar.sign" \ - FIRMWARE_URL="/build/override-firmware.tar.xz" \ - KERNEL_SRC_URL="/build/override-kernel-src.tar.xz" \ - ./hack/build/docker-build-internal.sh; \ - else \ - KERNEL_SRC_URL="/build/override-kernel-src.tar.xz" \ - ./hack/build/docker-build-internal.sh; \ - fi -FROM alpine:3.23@sha256:25109184c71bdad752c8312a8623239686a9a2071e8825f20acb8f2198c3f659 AS sdkbuild +FROM build-staged AS build-staged-nvidiagpu +COPY --from=nvidia-modules --chown=build:build /nvidia-modules.tar.gz /build/override-nvidia-modules.tar.gz + +FROM scratch AS kernel-ccachebuild +COPY --from=ccachebuild kernel /kernel/image +COPY --from=ccachebuild config.gz /kernel/config.gz +COPY --from=ccachebuild addons.squashfs /kernel/addons.squashfs +COPY --from=ccachebuild metadata /kernel/metadata + +FROM alpine:3.23@sha256:25109184c71bdad752c8312a8623239686a9a2071e8825f20acb8f2198c3f659 AS sdkbuild-ccachebuild ARG KERNEL_VERSION= ARG KERNEL_FLAVOR=zone -COPY --from=build /build/target/sdk.tar.gz /sdk.tar.gz +COPY --from=ccachebuild sdk.tar.gz /sdk.tar.gz RUN mkdir -p /usr/src/kernel-sdk-${KERNEL_VERSION}-${KERNEL_FLAVOR} && \ tar -zx -C /usr/src/kernel-sdk-${KERNEL_VERSION}-${KERNEL_FLAVOR} -f /sdk.tar.gz && \ mkdir -p /lib/modules/${KERNEL_VERSION} && \ ln -sf /usr/src/kernel-sdk-${KERNEL_VERSION}-${KERNEL_FLAVOR} /lib/modules/${KERNEL_VERSION}/build && \ rm -rf /sdk.tar.gz -FROM scratch AS sdk -COPY --from=sdkbuild /usr/src /usr/src - -FROM scratch AS kernelcopy -COPY --from=build /build/target/kernel /kernel/image -COPY --from=build /build/target/config.gz /kernel/config.gz -COPY --from=build /build/target/addons.squashfs /kernel/addons.squashfs -COPY --from=build /build/target/metadata /kernel/metadata - -FROM scratch AS kernel -COPY --from=kernelcopy /kernel /kernel +FROM scratch AS sdk-ccachebuild +COPY --from=sdkbuild-ccachebuild /usr/src /usr/src diff --git a/hack/build/generate-docker-script.py b/hack/build/generate-docker-script.py index 0fd0f94..9ea5c01 100644 --- a/hack/build/generate-docker-script.py +++ b/hack/build/generate-docker-script.py @@ -9,6 +9,15 @@ from matrix import CONFIG from util import format_image_name, maybe, smart_script_split, parse_text_bool +# Targets that are handled via docker run + host CCACHE packaging stages. +CCACHE_TARGET_MAP = { + "kernel": "kernel-ccachebuild", + "sdk": "sdk-ccachebuild", +} + +# Targets skipped during the packaging phase (handled separately or not needed). +SKIP_PACKAGING_TARGETS = {"kernelsrc", "buildenv"} + def is_publish_enabled() -> bool: root_publish = os.getenv("KERNEL_PUBLISH", "false") @@ -18,6 +27,7 @@ def is_publish_enabled() -> bool: def quoted(text: str) -> str: return '"%s"' % text + def dockerify_version(version_string: str) -> str: # "+" is valid for both python versions and semver, # but docker rejects it for tags, so sanitize @@ -27,18 +37,124 @@ def dockerify_version(version_string: str) -> str: def docker_platforms(architectures: list[str]) -> list[str]: platforms = [] for arch in architectures: - platform = "" - if arch == "aarch64": - platform = "linux/aarch64" - elif arch == "x86_64": - platform = "linux/amd64" - if len(platform) == 0: - print("unknown platform %s" % arch, file=sys.stderr) - sys.exit(1) + platform = arch_to_platform(arch) platforms.append(platform) return platforms +def arch_to_platform(arch: str) -> str: + if arch == "aarch64": + return "linux/aarch64" + elif arch == "x86_64": + return "linux/amd64" + print("unknown arch %s" % arch, file=sys.stderr) + sys.exit(1) + + +def docker_build_staged( + version: str, + flavor: str, + architectures: list[str], + src_url: str, + firmware_url: str, + firmware_sig_url: str, +) -> list[str]: + """Build the build-staged image: buildenv with kernel source (and firmware/nvidia-modules for gpu flavors) baked in.""" + has_firmware = flavor == "zone-amdgpu" + has_nvidia = flavor == "zone-nvidiagpu" + if has_nvidia: + nv_version = version.split("+nvidia-")[1] if "+nvidia-" in version else version.split("-nvidia-")[1] + nv_modules_url = "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/%s.tar.gz" % nv_version + version = dockerify_version(version) + if has_nvidia: + target = "build-staged-nvidiagpu" + elif has_firmware: + target = "build-staged-amdgpu" + else: + target = "build-staged" + iidfile = "image-id-%s-%s-%s" % (version, flavor, target) + command = [ + "docker", "buildx", "build", + "--builder", "edera", + "--load", + "-f", quoted("Dockerfile"), + "--target", quoted(target), + "--iidfile", quoted(iidfile), + ] + for platform in docker_platforms(architectures): + command += ["--platform", quoted(platform)] + command += ["--build-arg", quoted("KERNEL_SRC_URL=%s" % src_url)] + if has_firmware: + command += [ + "--build-arg", quoted("FIRMWARE_URL=%s" % firmware_url), + "--build-arg", quoted("FIRMWARE_SIG_URL=%s" % firmware_sig_url), + ] + if has_nvidia: + command += ["--build-arg", quoted("NV_MODULES_TARBALL_URL=%s" % nv_modules_url)] + command += ["."] + return [""] + smart_script_split(command, "stage=stage flavor=%s version=%s" % (flavor, version)) + + +def docker_compile( + version: str, + flavor: str, + architectures: list[str], + firmware_url: str, + firmware_sig_url: str, +) -> list[str]: + """Generate docker run commands to compile the kernel with ccache.""" + lines = [] + has_firmware = flavor == "zone-amdgpu" + has_nvidia = flavor == "zone-nvidiagpu" + version = dockerify_version(version) + if has_nvidia: + stage_target = "build-staged-nvidiagpu" + elif has_firmware: + stage_target = "build-staged-amdgpu" + else: + stage_target = "build-staged" + staged_iidfile = "image-id-%s-%s-%s" % (version, flavor, stage_target) + + lines += ["", "rm -rf target && mkdir -p target && chmod a+rwX target"] + lines += ['mkdir -p "${HOME}/.cache/kernel-ccache" && chmod -R a+rwX "${HOME}/.cache/kernel-ccache"'] + + for arch in architectures: + platform = arch_to_platform(arch) + compile_command = [ + "docker", + "run", + "--rm", + "--platform", quoted(platform), + "-e", quoted("KERNEL_VERSION=%s" % version), + "-e", quoted("KERNEL_FLAVOR=%s" % flavor), + "-e", quoted("KERNEL_SRC_URL=/build/override-kernel-src.tar.xz"), + "-e", quoted("CCACHE_DIR=/home/build/.cache/ccache"), + "-e", quoted("CCACHE_COMPRESS=1"), + "-v", quoted("${HOME}/.cache/kernel-ccache:/home/build/.cache/ccache"), + "-v", quoted("${PWD}/target:/build/target"), + ] + if has_firmware: + compile_command += [ + "-e", quoted("FIRMWARE_URL=/build/override-firmware.tar.xz"), + "-e", quoted("FIRMWARE_SIG_URL=/build/override-firmware.tar.sign"), + ] + if has_nvidia: + compile_command += [ + "-e", quoted("NVIDIA_MODULES_PATH=/build/override-nvidia-modules.tar.gz"), + ] + compile_command += [ + '"$(cat %s)"' % staged_iidfile, + "./hack/build/docker-build-internal.sh", + ] + lines += [""] + lines += smart_script_split( + compile_command, + "stage=compile flavor=%s version=%s arch=%s" % (flavor, version, arch), + ) + return lines + + + def docker_build( target: str, name: str, @@ -47,16 +163,15 @@ def docker_build( version_info: Version, tags: list[str], architectures: list[str], - src_url: str, publish: bool, pass_build_args: bool, mark_format: Optional[str], - firmware_url: str, - firmware_sig_url: str, ) -> list[str]: lines = [] version = dockerify_version(version) + actual_target = CCACHE_TARGET_MAP.get(target, target) + root = format_image_name( image_name_format=CONFIG["imageNameFormat"], flavor=flavor, @@ -75,14 +190,17 @@ def docker_build( "-f", quoted("Dockerfile"), "--target", - quoted(target), + quoted(actual_target), "--iidfile", - quoted("image-id-%s-%s-%s" % (version, flavor, target)), + quoted("image-id-%s-%s-%s" % (version, flavor, actual_target)), ] for build_platform in docker_platforms(architectures): image_build_command += ["--platform", quoted(build_platform)] + if actual_target != target: + image_build_command += ["--build-context", quoted("ccachebuild=target")] + if mark_format is not None: image_build_command += [ "--annotation", @@ -96,13 +214,7 @@ def docker_build( "--build-arg", quoted("KERNEL_VERSION=%s" % version), "--build-arg", - quoted("KERNEL_SRC_URL=%s" % src_url), - "--build-arg", quoted("KERNEL_FLAVOR=%s" % flavor), - "--build-arg", - quoted("FIRMWARE_URL=%s" % firmware_url), - "--build-arg", - quoted("FIRMWARE_SIG_URL=%s" % firmware_sig_url), ] if mark_format is not None: @@ -149,7 +261,7 @@ def docker_build( "sign", "--yes", quoted( - '%s@$(cat "image-id-%s-%s-%s")' % (tag, version, flavor, target) + '%s@$(cat "image-id-%s-%s-%s")' % (tag, version, flavor, actual_target) ), ] lines += [""] @@ -180,9 +292,32 @@ def generate_builds( lines = [] kernel_version_info = parse(kernel_version) image_configs = CONFIG["images"] + + # Phase 1: Build the build-staged image (buildenv + kernel source + firmware baked in). + lines += docker_build_staged( + version=kernel_version, + flavor=kernel_flavor, + architectures=kernel_architectures, + src_url=kernel_src_url, + firmware_url=firmware_url, + firmware_sig_url=firmware_sig_url, + ) + + # Phase 2: Compile kernel via docker run with ccache bind-mounted from host. + lines += docker_compile( + version=kernel_version, + flavor=kernel_flavor, + architectures=kernel_architectures, + firmware_url=firmware_url, + firmware_sig_url=firmware_sig_url, + ) + + # Phase 3: Package kernel and SDK images from the ccache-built artifacts. for image_config in image_configs: + target = image_config["target"] + if target in SKIP_PACKAGING_TARGETS: + continue image_name = image_config["name"] - image_target = image_config["target"] image_version = maybe(image_config, "version", kernel_version) image_tags = maybe(image_config, "tags", kernel_tags) image_format = maybe(image_config, "format") @@ -190,8 +325,8 @@ def generate_builds( if not is_publish_enabled(): should_publish = False should_pass_build_args = maybe(image_config, "passBuildArgs", True) - image_lines = docker_build( - target=image_target, + lines += docker_build( + target=target, name=image_name, version=image_version, version_info=kernel_version_info, @@ -200,12 +335,8 @@ def generate_builds( pass_build_args=should_pass_build_args, mark_format=image_format, flavor=kernel_flavor, - src_url=kernel_src_url, architectures=kernel_architectures, - firmware_url=firmware_url, - firmware_sig_url=firmware_sig_url, ) - lines += image_lines return lines diff --git a/hack/build/nvidiagpu-common.sh b/hack/build/nvidiagpu-common.sh index 558d57a..7a9c735 100644 --- a/hack/build/nvidiagpu-common.sh +++ b/hack/build/nvidiagpu-common.sh @@ -22,25 +22,28 @@ fi echo "Fetching nvidia module release: $NV_VERSION" -RELEASE_JSON=$(curl -s --retry 5 --retry-delay 2 --retry-max-time 30 --retry-all-errors "https://api.github.com/repos/${NV_KMOD_REPO_OWNER}/${NV_KMOD_REPO_NAME}/releases/tags/${NV_VERSION}") -TARBALL_URL=$(echo "$RELEASE_JSON" | grep -o '"tarball_url": *"[^"]*"' | sed 's/"tarball_url": *"\(.*\)"/\1/') -if [ -z "$TARBALL_URL" ]; then - echo "Failed to fetch release information for version $NV_VERSION" - exit 1 +NV_WORKDIR="$(mktemp -d)/nvidia-modules/${NV_VERSION}" +mkdir -p "$NV_WORKDIR" + +if [ -n "${NVIDIA_MODULES_PATH}" ] && [ -f "${NVIDIA_MODULES_PATH}" ]; then + ARCHIVE="${NVIDIA_MODULES_PATH}" +else + RELEASE_JSON=$(curl -s --retry 5 --retry-delay 2 --retry-max-time 30 --retry-all-errors "https://api.github.com/repos/${NV_KMOD_REPO_OWNER}/${NV_KMOD_REPO_NAME}/releases/tags/${NV_VERSION}") + TARBALL_URL=$(echo "$RELEASE_JSON" | grep -o '"tarball_url": *"[^"]*"' | sed 's/"tarball_url": *"\(.*\)"/\1/') + if [ -z "$TARBALL_URL" ]; then + echo "Failed to fetch release information for version $NV_VERSION" + exit 1 + fi + ARCHIVE="$NV_WORKDIR/driver-src.tar.gz" + curl -L -o "$ARCHIVE" "$TARBALL_URL" fi echo "Building NVIDIA driver version: $NV_VERSION" -NV_WORKDIR="$(mktemp -d)/nvidia-modules/${NV_VERSION}" -ARCHIVE="$NV_WORKDIR/driver-src.tar.gz" - -mkdir -p "$NV_WORKDIR" - -curl -L -o "$ARCHIVE" "$TARBALL_URL" tar -xzf "$ARCHIVE" -C "$NV_WORKDIR" OLDPWD=$(pwd) -cd "$NV_WORKDIR"/"$NV_KMOD_REPO_OWNER"-* +cd "$(find "$NV_WORKDIR" -mindepth 1 -maxdepth 1 -type d | head -1)" # Apply nvidia hackpatches if we have them for patch in "${OLDPWD}"/patches-nvidia/*.patch; do patch -p1 < "$patch"; done diff --git a/hack/build/patchlist.py b/hack/build/patchlist.py index e9e9b9d..f617be7 100644 --- a/hack/build/patchlist.py +++ b/hack/build/patchlist.py @@ -9,7 +9,10 @@ print("Usage: patchlist ") exit(1) -target_version = parse(sys.argv[1]) +try: + target_version = parse(sys.argv[1]) +except Exception: + target_version = parse(sys.argv[1].split('-')[0]) kernel_flavor = sys.argv[2] series = "%s.%s" % (target_version.major, target_version.minor)