diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d610d70..0930a95 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,9 +1,15 @@ name: Compile and release on: + # Releases are cut on merges into main and on dev_* tags (auto-versioned + # below, like rehosting/penguin). NOTE: we deliberately do NOT trigger on + # 'v*' tags — the release step now *creates* v* tags via version-increment, + # so a 'v*' push trigger would make every release re-trigger itself. push: + branches: + - main tags: - - 'v*' + - 'dev_*' pull_request: branches: @@ -11,13 +17,19 @@ on: workflow_dispatch: +# Serialize per-ref so two main merges (or a merge + dev_* tag) can't race the +# auto-version step and compute/claim the same vX.Y.Z tag. PR runs cancel their +# own superseded runs; release (push) runs are never cancelled. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + jobs: prebuild: runs-on: rehosting-arc outputs: targets: ${{ steps.find_targets.outputs.targets }} versions: ${{ steps.find_targets.outputs.versions }} - sources_dir: ${{ steps.setup_sources.outputs.sources_dir }} steps: - uses: actions/checkout@v4 with: @@ -32,60 +44,11 @@ jobs: echo echo "Full submodule SHAs:" && git submodule foreach 'echo $name: $(git rev-parse HEAD)' - - name: Ensure local bare clone of base Linux repo - run: | - set -eux - BASE_REPO_DIR="/home/runner/_shared/linux" - BASE_REPO_URL="https://github.com/rehosting/linux" - - - # Clone bare base repo if missing - if [ ! -d "$BASE_REPO_DIR" ]; then - echo "Cloning bare base repo to $BASE_REPO_DIR" - git clone --bare "$BASE_REPO_URL" "$BASE_REPO_DIR" - cd $BASE_REPO_DIR && git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" - fi - # Always fetch latest from upstream - cd "$BASE_REPO_DIR" && git fetch origin --prune --tags --force - - - name: Ensure linux cache exists - run: | - BASE_CACHE_DIR="/home/runner/_shared/linux_builder/cache" - - if [ ! -d "$BASE_CACHE_DIR" ]; then - mkdir -p "$BASE_CACHE_DIR" - fi - - name: Install rsync - run: | - sudo apt-get update - sudo apt-get install -y rsync - - name: Setup shared Linux kernel sources - id: setup_sources - run: | - set -eux - - SOURCES_DIR="/home/runner/_shared/linux_sources/" - echo "Using stable source directory: $SOURCES_DIR" - echo "sources_dir=$SOURCES_DIR" >> $GITHUB_OUTPUT - - # Ensure the stable directory exists and copy the entire repo into it. - # The --delete flag keeps the destination in sync with the source. - mkdir -p "$SOURCES_DIR" - rsync -a --delete . "$SOURCES_DIR/" - - # Change into the stable directory to perform all subsequent git operations - cd "$SOURCES_DIR" - - BASE_REPO_DIR="/home/runner/_shared/linux" - sed -i "s|url = https://github.com/rehosting/linux.git|url = file://$BASE_REPO_DIR|g" .gitmodules - - # Sync and update submodules from within the stable repository - git submodule sync - GIT_ALLOW_PROTOCOL=file:https git submodule update --init --depth 1 --jobs 2 - - # Use rsync to move the linux directory into the stable location - # This is more robust than mv and helps preserve attributes. - rsync -a --delete linux/ "$SOURCES_DIR/linux/" + # NOTE: kernel-source preparation used to live here and wrote to the shared + # hostPath on prebuild's node, which forced every build job onto that same + # node. It now happens per-node inside each build job (see "Ensure kernel + # sources on this node" below), so prebuild only needs to discover the + # build matrix -- nothing node-specific. - name: Find valid targets and versions sets id: find_targets @@ -133,6 +96,67 @@ jobs: echo "target=$TARGET" >> $GITHUB_OUTPUT echo "Building target: $TARGET" + - name: Ensure kernel sources on this node + id: sources + run: | + set -eux + SHARED="/home/runner/_shared" + BASE_REPO_DIR="$SHARED/linux" + BASE_REPO_URL="https://github.com/rehosting/linux" + SRC_PARENT="$SHARED/linux_sources" + mkdir -p "$SRC_PARENT" + + # Cache key = the pinned linux/ submodule SHAs. The kernel source + # only changes when a submodule is bumped, so a node reuses its tree + # across runs and only re-populates on a real SHA change. + KEY=$(git submodule status | awk '{gsub(/^[-+U ]+/,"",$1); print $1}' | sort | sha1sum | cut -c1-12) + SRC_ROOT="$SRC_PARENT/$KEY" + echo "kernel_src=$SRC_ROOT/linux" >> "$GITHUB_OUTPUT" + + # Per-node arbitration: whichever build job lands on a node first + # populates that node's copy; the others block on the lock, then see + # .ready and skip. flock on the shared fs is the right primitive -- + # GH 'concurrency' is cross-node and can't serialize same-node jobs. + # This also avoids the cp/rsync races the single shared dir hit. + exec 9>"$SRC_PARENT/.populate.lock" + flock 9 + + if [ ! -e "$SRC_ROOT/.ready" ]; then + echo "Populating kernel sources for key $KEY on $(hostname)" + # Node-local bare clone so submodule update pulls over fast local + # file:// instead of hitting GitHub once per submodule per job. + if [ ! -d "$BASE_REPO_DIR" ]; then + git clone --bare "$BASE_REPO_URL" "$BASE_REPO_DIR" + git -C "$BASE_REPO_DIR" config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" + fi + git -C "$BASE_REPO_DIR" fetch origin --prune --tags --force + + rm -rf "$SRC_ROOT.tmp" + mkdir -p "$SRC_ROOT.tmp" + # Copy the (submodule-less) superproject checkout, then init the + # kernel submodules into it from the local bare clone. + cp -a "$GITHUB_WORKSPACE/." "$SRC_ROOT.tmp/" + ( cd "$SRC_ROOT.tmp" + sed -i "s|url = https://github.com/rehosting/linux.git|url = file://$BASE_REPO_DIR|g" .gitmodules + git submodule sync + GIT_ALLOW_PROTOCOL=file:https git submodule update --init --depth 1 --jobs 2 ) + # Publish atomically so a partial tree is never seen as ready. + rm -rf "$SRC_ROOT" + mv "$SRC_ROOT.tmp" "$SRC_ROOT" + touch "$SRC_ROOT/.ready" + else + echo "Reusing cached kernel sources for key $KEY on $(hostname)" + fi + # Record last-use so the GC below doesn't reap an actively-reused tree. + touch "$SRC_ROOT" + + # Best-effort GC: drop keyed trees (and stale .tmp dirs) untouched for + # 14 days so the node-local cache can't grow unbounded across bumps. + find "$SRC_PARENT" -mindepth 1 -maxdepth 1 -type d ! -path "$SRC_ROOT" -mtime +14 \ + -exec rm -rf {} + 2>/dev/null || true + + flock -u 9 + - name: Trust Harbor's self-signed certificate run: | echo "Fetching certificate from ${{ secrets.REHOSTING_ARC_REGISTRY }}" @@ -149,8 +173,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: + # Do NOT pin image=moby/buildkit:master here. A recent master + # regressed on the self-hosted runners (kernel 5.4): + # runc run failed: ... can't mask dir "/proc/acpi": mount ... + # MS_RDONLY ... invalid argument + # which fails the first RUN that needs container init. Letting buildx + # use its default pinned-stable buildkit avoids it. network=host and + # the registry config are kept. (Mirrors rehosting/penguin c35bedc5.) driver-opts: | - image=moby/buildkit:master network=host buildkitd-config-inline: | [registry."${{ secrets.REHOSTING_ARC_REGISTRY }}"] @@ -177,9 +207,10 @@ jobs: set -eux TARGET="${{ matrix.target_version }}" VERSIONS_JSON='${{ needs.prebuild.outputs.versions }}' - # BASE_CACHE_DIR="/home/runner/_shared/linux_builder/cache" - # Use the output from the prebuild job - SOURCES_DIR="${{ needs.prebuild.outputs.sources_dir }}/linux" + # Per-node kernel sources prepared by the "Ensure kernel sources on + # this node" step above (node-agnostic: no dependency on prebuild's + # node). + SOURCES_DIR="${{ steps.sources.outputs.kernel_src }}" if [ -z "$VERSIONS_JSON" ] || [ "$VERSIONS_JSON" = "[]" ]; then VERSIONS="" @@ -190,14 +221,26 @@ jobs: # Mount the stable source directory instead of the run-specific one ./build.sh --targets "$TARGET" ${VERSIONS:+--versions "$VERSIONS"} --extra-docker-opts "-v $SOURCES_DIR:/app/linux" - # Use a run-specific output directory to avoid clashes - BUILD_OUTPUT="/home/runner/_shared/runs/$GITHUB_RUN_ID/build-output" - mkdir -p $BUILD_OUTPUT - mv kernels-latest.tar.gz $BUILD_OUTPUT/kernels-latest-${TARGET}.tar.gz - mv kernel-devel-all.tar.gz $BUILD_OUTPUT/kernel-devel-all-${TARGET}.tar.gz + # Stage per-target outputs in the workspace; they are handed to the + # aggregate job via workflow artifacts (below) instead of a shared + # hostPath, so build and aggregate need not run on the same node. + mkdir -p build-output + mv kernels-latest.tar.gz build-output/kernels-latest-${TARGET}.tar.gz + mv kernel-devel-all.tar.gz build-output/kernel-devel-all-${TARGET}.tar.gz + + - name: Upload per-target kernel artifacts + uses: actions/upload-artifact@v4 + with: + name: kernels-${{ matrix.target_version }} + path: build-output/ + retention-days: 1 aggregate: - if: startsWith(github.ref, 'refs/tags/v') || github.event_name == 'workflow_dispatch' + # Runs on releases (push to main / dev_* tag) AND on manual dispatch so the + # full download+combine round-trip can be exercised without cutting a + # release (the publish step below is gated to push events only). Never runs + # for pull_request. + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' needs: build runs-on: rehosting-arc env: @@ -218,12 +261,19 @@ jobs: username: ${{ secrets.REHOSTING_ARC_REGISTRY_USER }} password: ${{ secrets.REHOSTING_ARC_REGISTRY_PASSWORD }} + - name: Download all per-target kernel artifacts + uses: actions/download-artifact@v4 + with: + pattern: kernels-* + path: build-output + merge-multiple: true + - name: Combine all kernels into a single archive run: | set -eux - RUNS_PARENT="/home/runner/_shared/runs" - RUNS_DIR="$RUNS_PARENT/$GITHUB_RUN_ID" - BUILD_OUTPUT="$RUNS_DIR/build-output" + # Artifacts downloaded by the step above land here (workspace-local, + # node-agnostic) instead of the old /home/runner/_shared/runs hostPath. + BUILD_OUTPUT="$GITHUB_WORKSPACE/build-output" echo "[DEBUG] Listing available per-target kernel archives:" find "$BUILD_OUTPUT" -maxdepth 1 -name "kernels-latest-*.tar.gz" -print || true @@ -259,9 +309,9 @@ jobs: - name: Aggregate all kernel-devel artifacts run: | set -eux - RUNS_PARENT="/home/runner/_shared/runs" - RUNS_DIR="$RUNS_PARENT/$GITHUB_RUN_ID" - BUILD_OUTPUT="$RUNS_DIR/build-output" + # Artifacts downloaded by the step above land here (workspace-local, + # node-agnostic) instead of the old /home/runner/_shared/runs hostPath. + BUILD_OUTPUT="$GITHUB_WORKSPACE/build-output" mkdir -p kernel-devel-all for archive in "$BUILD_OUTPUT"/kernel-devel-all-*.tar.gz; do @@ -271,19 +321,31 @@ jobs: done tar -czvf kernel-devel-all.tar.gz -C kernel-devel-all . + # Auto-version like rehosting/penguin: query the GitHub API for the latest + # release and increment. On main this yields a clean vX.Y.Z; on a non-main + # ref (a dev_* tag) version-increment appends a -pre suffix. + - name: Get next version + id: version + uses: reecetech/version-increment@2023.10.1 + with: + use_api: true + - name: Create and publish release + # Only publish on real release events (main merge / dev_* tag). A manual + # workflow_dispatch still runs everything above to validate the pipeline + # but does not create a release. + if: github.event_name == 'push' uses: softprops/action-gh-release@v1 with: files: | kernels-latest.tar.gz kernel-devel-all.tar.gz token: ${{ secrets.GITHUB_TOKEN }} - tag_name: ${{ github.ref_name }} - - - name: Cleanup per-run kernel clones - if: always() - run: | - RUNS_PARENT="/home/runner/_shared/runs" - RUNS_DIR="$RUNS_PARENT/$GITHUB_RUN_ID" - echo "Cleaning up kernel clones in $RUNS_DIR" - rm -rf "$RUNS_DIR" + tag_name: ${{ steps.version.outputs.v-version }} + name: Release ${{ steps.version.outputs.v-version }} + generate_release_notes: true + # dev_* tags publish as prereleases; main merges as full releases. + prerelease: ${{ startsWith(github.ref, 'refs/tags/dev_') }} + # (Removed the per-run /home/runner/_shared/runs cleanup: outputs now flow + # through workflow artifacts, which expire on their own retention, and the + # workspace build-output dir is ephemeral.) diff --git a/_in_container_build.sh b/_in_container_build.sh index 5ef9697..d7cef5f 100755 --- a/_in_container_build.sh +++ b/_in_container_build.sh @@ -280,6 +280,40 @@ for TARGET in $TARGETS; do cp "$KERNEL_SRC/Kconfig" "$OUTDIR/" || true # Ensure fixdep is present for out-of-tree module builds cp -r "$KBUILD_DIR/scripts/" "$OUTDIR/scripts/" || true + + # --- Slim the staged devel tree ------------------------------------- + # An out-of-tree module build (make -C $KDIR M=$PWD modules) only needs + # the modules_prepare result: Makefile/.config/Module.symvers, headers + # (include/, arch//include), arch Makefiles, and scripts/ host + # tools. It does not read boot images, prebuilt build objects, or most + # of tools/, so drop them -- this cuts the per-target devel archive + # ~75-85% (e.g. x86_64 ~605MB -> ~100MB uncompressed). + # + # NOTE: the `rm -rf "$OUTDIR/arch/${short_arch}/boot"` above is a no-op + # for x86_64 (real arch dir is arch/x86, but short_arch is "x86_64"), so + # the full arch/x86/boot (~120MB of bzImage/vmlinux) used to ship. The + # arch/*/boot glob here removes it properly for every arch. + rm -rf "$OUTDIR"/arch/*/boot "$OUTDIR"/arch/*/realmode || true + # Keep tools/objtool (kbuild may run it on module objects when + # CONFIG_OBJTOOL=y); drop the rest of tools/ (perf, testing, bpf = bulk). + if [ -d "$OUTDIR/tools" ]; then + find "$OUTDIR/tools" -mindepth 1 -maxdepth 1 ! -name objtool -exec rm -rf {} + || true + fi + # Drop build leftovers. Keep arch/powerpc/lib/crtsavres.o (igloo_driver + # links it for ppc targets) and everything under scripts/ and tools/ + # (host tools needed for the external-module build). + find "$OUTDIR" -name '*.cmd' -delete || true + find "$OUTDIR" -name '*.o' \ + ! -path '*/arch/powerpc/lib/crtsavres.o' \ + ! -path '*/scripts/*' \ + ! -path '*/tools/*' -delete || true + # Drop kernel .c source too: an `M=` external-module build compiles the + # module's own sources against prebuilt objects + headers, never the + # in-tree .c. Keep scripts/ and tools/ sources in case a host tool needs + # a rebuild. + find "$OUTDIR" -name '*.c' \ + ! -path '*/scripts/*' \ + ! -path '*/tools/*' -delete || true ) & # Store the PID of the background process