From 61fbd567064dd12e60ac4f21cf2aa9b033d3dd4a Mon Sep 17 00:00:00 2001
From: Hector Flores <hector.flores@htek.dev>
Date: Wed, 10 Jun 2026 11:42:41 -0500
Subject: [PATCH] feat: add 5 new error entries (runner-environment x3,
 caching-artifacts x2)

---
 ...ecture-386-wrong-cache-key-x64-windows.yml | 105 ++++++++++++++++
 ...act-v5-deploy-pages-v5-format-mismatch.yml | 119 ++++++++++++++++++
 ...e-download-403-access-denied-exception.yml |  99 +++++++++++++++
 ...og-to-stdout-multiline-per-line-prefix.yml | 105 ++++++++++++++++
 ...ker-401-vss-unauthorized-non-retryable.yml | 106 ++++++++++++++++
 5 files changed, 534 insertions(+)
 create mode 100644 errors/caching-artifacts/setup-go-architecture-386-wrong-cache-key-x64-windows.yml
 create mode 100644 errors/caching-artifacts/upload-pages-artifact-v5-deploy-pages-v5-format-mismatch.yml
 create mode 100644 errors/runner-environment/action-archive-download-403-access-denied-exception.yml
 create mode 100644 errors/runner-environment/actions-runner-print-log-to-stdout-multiline-per-line-prefix.yml
 create mode 100644 errors/runner-environment/broker-401-vss-unauthorized-non-retryable.yml

diff --git a/errors/caching-artifacts/setup-go-architecture-386-wrong-cache-key-x64-windows.yml b/errors/caching-artifacts/setup-go-architecture-386-wrong-cache-key-x64-windows.yml
new file mode 100644
index 0000000..db39ff7
--- /dev/null
+++ b/errors/caching-artifacts/setup-go-architecture-386-wrong-cache-key-x64-windows.yml
@@ -0,0 +1,105 @@
+id: caching-artifacts-128
+title: "setup-go architecture: '386' Uses Wrong Cache Key (x64 Suffix) — Cache Miss and Incorrect Isolation on Windows"
+category: caching-artifacts
+severity: silent-failure
+tags:
+  - setup-go
+  - go
+  - cache
+  - architecture
+  - 386
+  - x86
+  - windows
+  - cache-key-bug
+patterns:
+  - regex: 'setup-go-Windows-x64-go-.*(?:key|cache).*386|386.*setup-go.*Windows.*x64'
+    flags: 'i'
+  - regex: 'Cache not found.*setup-go-Windows-x64.*386|wrong.*architecture.*cache.*go'
+    flags: 'i'
+error_messages:
+  - "Cache not found for key: setup-go-Windows-x64-go-1.22.12-{hash}"
+  - "setup-go-Windows-x64-go-1.22.12-{hash}"
+root_cause: |
+  `actions/setup-go` builds its cache key using the runner OS, architecture, Go version,
+  and a hash of the `go.sum` file:
+
+  ```
+  setup-go-{os}-{arch}-go-{version}-{hash}
+  ```
+
+  When `architecture: '386'` is specified in the action's `with:` block, the intended
+  key is `setup-go-Windows-386-go-{version}-{hash}`. However, due to a bug in how
+  the architecture input is resolved for the cache key (it reads the system architecture
+  rather than the requested architecture), the actual key produced is:
+
+  ```
+  setup-go-Windows-x64-go-{version}-{hash}
+  ```
+
+  The `x64` suffix comes from the host runner architecture. A 386-targeted Go build
+  running on an x64 Windows runner generates a key that **collides** with a native x64
+  build key. This causes:
+
+  1. **Cache pollution**: if x64 and 386 builds share the same cache entry, the first
+     build to save wins, potentially restoring the wrong architecture's module cache.
+  2. **Silent cache miss**: even if the 386 cache entry was saved correctly in a prior
+     run, subsequent runs with `architecture: '386'` may restore the x64 cache (or vice
+     versa), causing unexpected build failures or incorrect binaries.
+
+  This is documented in actions/setup-go issue #749 (open as of June 2026).
+fix: |
+  **Workaround:** Provide an explicit, architecture-disambiguated cache key using the
+  `cache-dependency-path` input combined with a manual `actions/cache` step that
+  includes the architecture in the key.
+
+  Alternatively, add a manual `key:` segment that forces architecture into the cache
+  key:
+fix_code:
+  - language: yaml
+    label: "Workaround — manual cache step with architecture in key"
+    code: |
+      - name: Setup Go (386 architecture)
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.22'
+          architecture: '386'
+          cache: false            # disable built-in cache to use manual key below
+
+      - name: Cache Go modules (386-specific key)
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~\AppData\Local\go-build
+            ~\go\pkg\mod
+          # Include architecture explicitly — setup-go's built-in key omits it for 386:
+          key: setup-go-Windows-386-go-1.22-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            setup-go-Windows-386-go-1.22-
+  - language: yaml
+    label: "Workaround — use separate jobs for x64 and 386 with distinct cache keys"
+    code: |
+      strategy:
+        matrix:
+          arch: ['x64', '386']
+      steps:
+        - uses: actions/setup-go@v5
+          with:
+            go-version: '1.22'
+            architecture: ${{ matrix.arch }}
+            cache: false
+
+        - uses: actions/cache@v4
+          with:
+            path: '~\AppData\Local\go-build'
+            key: setup-go-Windows-${{ matrix.arch }}-go-1.22-${{ hashFiles('**/go.sum') }}
+prevention:
+  - "When using setup-go with architecture: '386' on Windows, disable the built-in cache and provide an explicit cache step that includes '386' in the key"
+  - "Verify cache isolation: after a 386 build, check that the saved cache key includes '386' not 'x64'"
+  - "Track actions/setup-go#749 for an official fix — once merged, the built-in cache will use the requested architecture in the key"
+docs:
+  - url: "https://github.com/actions/setup-go/issues/749"
+    label: "actions/setup-go#749 — Cache key incorrectly uses x64 instead of 386 when architecture is set to 386"
+  - url: "https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-go"
+    label: "GitHub Docs — Building and testing Go"
+  - url: "https://github.com/actions/setup-go#caching-dependency-files-and-build-outputs"
+    label: "actions/setup-go — Caching dependency files and build outputs"
diff --git a/errors/caching-artifacts/upload-pages-artifact-v5-deploy-pages-v5-format-mismatch.yml b/errors/caching-artifacts/upload-pages-artifact-v5-deploy-pages-v5-format-mismatch.yml
new file mode 100644
index 0000000..6d6ed3d
--- /dev/null
+++ b/errors/caching-artifacts/upload-pages-artifact-v5-deploy-pages-v5-format-mismatch.yml
@@ -0,0 +1,119 @@
+id: caching-artifacts-127
+title: "upload-pages-artifact@v5 + deploy-pages@v5 Incompatible — Immutable Artifact Format Not Consumed by deploy-pages"
+category: caching-artifacts
+severity: error
+tags:
+  - pages
+  - deploy-pages
+  - upload-pages-artifact
+  - v5
+  - upload-artifact-v7
+  - immutable-artifacts
+  - breaking-change
+patterns:
+  - regex: 'deploy-pages.*failed.*artifact|artifact.*not.*found.*deploy-pages'
+    flags: 'i'
+  - regex: 'actions/deploy-pages@v5.*artifact|upload-pages-artifact@v5.*deploy-pages@v5'
+    flags: 'i'
+  - regex: 'No artifact.*github-pages|pages.*deployment.*failed.*artifact'
+    flags: 'i'
+error_messages:
+  - "Error: No artifact named 'github-pages' was found for the workflow run"
+  - "Error: Failed to deploy to GitHub Pages: Artifact not found"
+  - "deploy-pages: No artifacts found for deployment"
+root_cause: |
+  `actions/upload-pages-artifact@v5` updated its internal dependency to use
+  `actions/upload-artifact@v7` (the new immutable-artifacts API). This is
+  documented in the v5 release notes: *"Update upload-artifact action to
+  version 7"*.
+
+  `actions/upload-artifact@v7` creates artifacts using the new immutable
+  artifacts storage API (the "Results" backend). These artifacts are NOT
+  accessible via the old artifact API that `actions/deploy-pages@v5` uses
+  when downloading the `github-pages` artifact for deployment.
+
+  `actions/deploy-pages@v5` was released with only a Node.js 24 bump and
+  minor maintenance changes — it was **not updated** to consume artifacts from
+  the new immutable artifacts API. As a result, upgrading both pages actions to
+  v5 simultaneously leaves `deploy-pages` unable to locate the artifact uploaded
+  by `upload-pages-artifact`.
+
+  The symptom: the workflow succeeds through the upload step but fails at
+  deployment with "No artifact found" or "Artifact not found", because
+  `deploy-pages` is looking in the old artifact storage location.
+fix: |
+  **Option A (recommended):** Downgrade `upload-pages-artifact` back to v4 while
+  keeping `deploy-pages@v5`:
+
+  ```yaml
+  - uses: actions/upload-pages-artifact@v4   # v4 uses upload-artifact@v4 (old API)
+    with:
+      path: ./dist
+  - uses: actions/deploy-pages@v5
+  ```
+
+  This pairs the old-API artifact producer with the current deployer until
+  `deploy-pages` is updated to support the new format.
+
+  **Option B:** Keep both at v4:
+
+  ```yaml
+  - uses: actions/upload-pages-artifact@v4
+    with:
+      path: ./dist
+  - uses: actions/deploy-pages@v4
+  ```
+
+  **Option C:** Watch `actions/deploy-pages` releases for a version that
+  aligns with `upload-artifact@v7` / immutable artifacts. Once released,
+  upgrade both to matching major versions simultaneously.
+fix_code:
+  - language: yaml
+    label: "Downgrade upload-pages-artifact to v4 (pairs with deploy-pages@v5)"
+    code: |
+      jobs:
+        deploy:
+          runs-on: ubuntu-latest
+          permissions:
+            pages: write
+            id-token: write
+          steps:
+            - name: Upload pages artifact
+              uses: actions/upload-pages-artifact@v4    # ← pin to v4
+              with:
+                path: ./dist
+
+            - name: Deploy to GitHub Pages
+              id: deployment
+              uses: actions/deploy-pages@v5             # v5 is fine here
+  - language: yaml
+    label: "Pin both pages actions to v4 (fully stable combination)"
+    code: |
+      jobs:
+        deploy:
+          runs-on: ubuntu-latest
+          permissions:
+            pages: write
+            id-token: write
+          steps:
+            - name: Upload pages artifact
+              uses: actions/upload-pages-artifact@v4
+              with:
+                path: ./dist
+
+            - name: Deploy to GitHub Pages
+              id: deployment
+              uses: actions/deploy-pages@v4
+prevention:
+  - "When upgrading GitHub Pages actions, check the release notes of both upload-pages-artifact AND deploy-pages for API compatibility before upgrading both simultaneously"
+  - "Test a Pages deployment in a branch workflow before merging the version bump"
+  - "Pin both actions to the same major version family — v3/v4 pair or v5/v5+ once deploy-pages ships the matching update"
+docs:
+  - url: "https://github.com/actions/upload-pages-artifact/releases"
+    label: "actions/upload-pages-artifact releases"
+  - url: "https://github.com/actions/deploy-pages/releases"
+    label: "actions/deploy-pages releases"
+  - url: "https://github.com/Azure/awesome-azd/pull/883/files"
+    label: "Real-world fix: revert upload-pages-artifact + deploy-pages from v5 to v4 (Azure/awesome-azd)"
+  - url: "https://docs.github.com/en/pages/getting-started-with-github-pages/using-custom-workflows-with-github-pages"
+    label: "GitHub Docs — Using custom workflows with GitHub Pages"
diff --git a/errors/runner-environment/action-archive-download-403-access-denied-exception.yml b/errors/runner-environment/action-archive-download-403-access-denied-exception.yml
new file mode 100644
index 0000000..d63612b
--- /dev/null
+++ b/errors/runner-environment/action-archive-download-403-access-denied-exception.yml
@@ -0,0 +1,99 @@
+id: runner-environment-412
+title: "Action Archive Download Returns 403 — Runner v2.335.0+ Reports AccessDeniedException Immediately (No Retry)"
+category: runner-environment
+severity: error
+tags:
+  - action-download
+  - 403
+  - access-denied
+  - organization-policy
+  - private-action
+  - v2.335.0
+  - uses-step
+patterns:
+  - regex: 'Access denied to .*(codeload\.github\.com|github\.com).*\([a-zA-Z0-9_-]+\)'
+    flags: 'i'
+  - regex: "An action could not be found at the URI '.*' \\(.*\\)"
+    flags: 'i'
+  - regex: 'action.*download.*403|403.*action.*archive'
+    flags: 'i'
+error_messages:
+  - "Access denied to 'https://codeload.github.com/owner/action/tar.gz/SHA' (request-id-abc123)"
+  - "An action could not be found at the URI 'https://codeload.github.com/owner/action/tar.gz/SHA'"
+root_cause: |
+  When a `uses: owner/repo@version` step runs, the runner downloads the action archive
+  from `codeload.github.com`. If the download returns HTTP 403 (Forbidden), the runner
+  throws an `AccessDeniedException` immediately.
+
+  Before runner **v2.335.0** (released June 8, 2026), a 403 response would trigger the
+  standard retry loop — 3 retry attempts with backoff — before eventually failing with
+  a generic download error. This masked the real cause and added up to 3 minutes of delay.
+
+  Starting with **runner v2.335.0** (PR #4391 — "Not retry and report action download 403"),
+  403 is treated as a non-retryable client-side error: the runner throws an
+  `AccessDeniedException` immediately and the job fails fast with a specific message
+  including the download URL and request ID.
+
+  Common causes of the 403:
+  - The action's source repository is private and the runner's GITHUB_TOKEN does not
+    have access to it (e.g. using a private action in a different org without proper
+    cross-repo access).
+  - The organization's Actions policy is set to an allowed-list and the action is not
+    on the list (`Settings → Actions → General → Actions permissions`).
+  - The action reference (`@SHA`, `@tag`) was deleted or made inaccessible.
+  - The `GITHUB_TOKEN` does not have sufficient scope to download from the target repo.
+fix: |
+  **Check the exact blocked URL in the error message.** The `(request-id)` in the
+  `AccessDeniedException` message is a GitHub-side identifier for support escalation.
+
+  **If blocked by organization policy:**
+  Add the action to the organization's allowed-list under
+  `Settings → Actions → General → Actions permissions → Allow specific actions`.
+  Wildcard patterns like `actions/*` or `owner/*` are supported.
+
+  **If the action is private:**
+  Private actions must be in the same repository (or the runner must have cross-repo
+  access granted via GitHub Apps). Consider:
+  - Converting the action to a public repository.
+  - Using a local composite action in `.github/actions/` instead.
+  - Using the `secrets.GITHUB_TOKEN` of an installation token with access to the private repo.
+
+  **If using a GitHub App installation token:**
+  Ensure the token includes the `actions:read` or `contents:read` permission on the
+  action's source repository.
+fix_code:
+  - language: yaml
+    label: "Allow specific action in organization policy"
+    code: |
+      # Organization Settings → Actions → General → Actions permissions
+      # Allowed actions (wildcard patterns):
+      #   actions/*,octocat/example-action@v2
+      #
+      # Or in YAML for self-hosted GitHub Enterprise Server:
+      # github_actions:
+      #   permissions:
+      #     enabled: true
+      #     allowed_actions: selected
+      #     selected_actions:
+      #       - owner/action@v1
+  - language: yaml
+    label: "Convert to local composite action to avoid cross-repo 403"
+    code: |
+      # Move action code to .github/actions/my-action/action.yml
+      # Then reference locally (no cross-repo 403 risk):
+      steps:
+        - uses: ./.github/actions/my-action
+          with:
+            input: value
+prevention:
+  - "Pin organization Actions policies before using actions from private repos"
+  - "Test action access with a direct `curl` or `gh api` call to the archive URL using the same token"
+  - "In org policy, use the allow-list selector early — Actions blocked by policy now fail immediately (no retry delay) on runner v2.335.0+"
+  - "For private actions, prefer keeping them in the same repository as the workflow"
+docs:
+  - url: "https://github.com/actions/runner/pull/4391"
+    label: "runner PR #4391 — Not retry and report action download 403 (shipped in v2.335.0)"
+  - url: "https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository"
+    label: "GitHub Docs — Managing GitHub Actions settings (allowed actions)"
+  - url: "https://docs.github.com/en/actions/sharing-automations/creating-actions/about-custom-actions#using-release-management-for-actions"
+    label: "GitHub Docs — Using release management for actions"
diff --git a/errors/runner-environment/actions-runner-print-log-to-stdout-multiline-per-line-prefix.yml b/errors/runner-environment/actions-runner-print-log-to-stdout-multiline-per-line-prefix.yml
new file mode 100644
index 0000000..2085832
--- /dev/null
+++ b/errors/runner-environment/actions-runner-print-log-to-stdout-multiline-per-line-prefix.yml
@@ -0,0 +1,105 @@
+id: runner-environment-413
+title: "ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT Adds Timestamp Prefix to Every Line of Multiline Log Messages — Breaks Log Aggregator Parsing"
+category: runner-environment
+severity: warning
+tags:
+  - self-hosted
+  - stdout-logging
+  - multiline-logs
+  - log-aggregator
+  - kubernetes
+  - fluentbit
+  - v2.335.0
+  - PRINT_LOG_TO_STDOUT
+patterns:
+  - regex: '\[RUNNER \d{4}-\d{2}-\d{2}.*INFO.*\]\s*[\{\[\{]'
+    flags: 'i'
+  - regex: '\[RUNNER.*INFO.*BrokerMessageListener\].*"AgentId":'
+    flags: 'i'
+error_messages:
+  - "[RUNNER 2026-05-06 13:23:59Z INFO BrokerMessageListener] {"
+  - "[RUNNER 2026-05-06 13:23:59Z INFO BrokerMessageListener]   \"AgentId\": 2,"
+  - "[RUNNER 2026-05-06 13:23:59Z INFO BrokerMessageListener] }"
+root_cause: |
+  When `ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT=1` is set, the runner redirects its internal
+  diagnostic log to stdout. This is commonly used in containerized self-hosted runner
+  deployments (Kubernetes, Docker) where a log-forwarding sidecar (Fluentbit, Vector,
+  OpenTelemetry Collector) is expected to read stdout and forward to a log aggregator.
+
+  The issue: `StdoutTraceListener.TraceEvent()` splits each message on newline and calls
+  `WriteHeader()` for **every line**. A multiline log entry (such as a JSON settings dump
+  or a stack trace) becomes:
+
+  ```
+  [RUNNER 2026-05-17 13:23:59Z INFO Component] {
+  [RUNNER 2026-05-17 13:23:59Z INFO Component]   "field": "value",
+  [RUNNER 2026-05-17 13:23:59Z INFO Component] }
+  ```
+
+  Log aggregators that use timestamp-based multiline parsing (Fluentbit's `multiline`,
+  Elastic Filebeat's `multiline`, OpenTelemetry Collector's `recombine`) detect a
+  *new log entry* every line because every line starts with a timestamp. This makes
+  the JSON/stack-trace appear as many separate single-line log entries rather than
+  one grouped multiline entry — breaking downstream log indexing, alerting, and
+  search that expects structured objects.
+
+  This behavior was present since the feature was added (runner PR #2291, ~2022) and
+  was fixed in runner **v2.335.0** (released June 8, 2026) via PR #4424.
+fix: |
+  **Runner v2.335.0+:** Set `ACTIONS_RUNNER_DISABLE_STDOUT_MULTILINE_LOG_PREFIXING=1`
+  in the runner's environment. This causes multiline messages to receive a prefix only
+  on the **first line**, matching the behavior of the `_diag/` file logs.
+
+  The file-based `_diag/` logs have always formatted multiline messages correctly:
+  only the first line gets a `[timestamp INFO Component]` header, and continuation
+  lines are indented without a header. Setting this env var brings stdout logs
+  in line with `_diag/` format.
+
+  **Older runners (pre-v2.335.0):** No built-in fix is available. Workarounds:
+  - Configure your log aggregator's multiline mode to join lines that do NOT start
+    with a timestamp prefix (negative lookahead parser rule).
+  - Upgrade the runner to v2.335.0 or later.
+fix_code:
+  - language: yaml
+    label: "Set env var in Kubernetes runner pod spec to disable per-line prefix"
+    code: |
+      # In your Kubernetes runner pod spec or Docker run command:
+      env:
+        - name: ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT
+          value: "1"
+        - name: ACTIONS_RUNNER_DISABLE_STDOUT_MULTILINE_LOG_PREFIXING
+          value: "1"  # Available from runner v2.335.0+ (June 8, 2026)
+  - language: yaml
+    label: "ARC (Actions Runner Controller) runner scale set env configuration"
+    code: |
+      # In your ARC HelmRelease values.yaml:
+      template:
+        spec:
+          containers:
+            - name: runner
+              env:
+                - name: ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT
+                  value: "1"
+                - name: ACTIONS_RUNNER_DISABLE_STDOUT_MULTILINE_LOG_PREFIXING
+                  value: "1"
+  - language: yaml
+    label: "Fluentbit multiline parser workaround for pre-v2.335.0 runners"
+    code: |
+      # Fluentbit multiline config to join lines NOT starting with [RUNNER timestamp]:
+      [MULTILINE_PARSER]
+          Name          runner_multiline
+          Type          regex
+          Flush_Timeout 1000
+          Rule          "start_state" "/^\[RUNNER \d{4}-\d{2}-\d{2}/" "cont"
+          Rule          "cont"       "/^(?!\[RUNNER )/"               "cont"
+prevention:
+  - "When deploying runners to Kubernetes, use runner v2.335.0+ and set ACTIONS_RUNNER_DISABLE_STDOUT_MULTILINE_LOG_PREFIXING=1 alongside ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT=1"
+  - "Test log aggregator multiline detection after any runner version upgrade"
+  - "Check _diag/ file format as a reference — stdout format should match file format when the new env var is set"
+docs:
+  - url: "https://github.com/actions/runner/pull/4424"
+    label: "runner PR #4424 — Add ACTIONS_RUNNER_DISABLE_STDOUT_MULTILINE_LOG_PREFIXING (v2.335.0)"
+  - url: "https://github.com/actions/runner/issues/4423"
+    label: "runner #4423 — Multiline runner stdout logs are extremely difficult to parse due to repeated line headers"
+  - url: "https://docs.fluentbit.io/manual/data-pipeline/parsers/multiline-parsing"
+    label: "Fluentbit multiline parsing documentation"
diff --git a/errors/runner-environment/broker-401-vss-unauthorized-non-retryable.yml b/errors/runner-environment/broker-401-vss-unauthorized-non-retryable.yml
new file mode 100644
index 0000000..472e0f9
--- /dev/null
+++ b/errors/runner-environment/broker-401-vss-unauthorized-non-retryable.yml
@@ -0,0 +1,106 @@
+id: runner-environment-414
+title: "Self-Hosted Runner Immediately Fails on Broker 401 — VssUnauthorizedException Is Non-Retryable in Runner v2.335.0+"
+category: runner-environment
+severity: error
+tags:
+  - self-hosted
+  - broker
+  - authentication
+  - expired-token
+  - registration
+  - v2.335.0
+  - VssUnauthorizedException
+patterns:
+  - regex: 'VssUnauthorizedException|TF400813.*Resource not available.*anonymous'
+    flags: 'i'
+  - regex: 'BrokerServer.*401|401.*broker.*authentication|Unauthorized.*broker'
+    flags: 'i'
+  - regex: 'runner.*authentication.*failed.*broker|broker.*auth.*expired|registration.*token.*invalid'
+    flags: 'i'
+error_messages:
+  - "VssUnauthorizedException: TF400813: Resource not available for anonymous access. Client authentication required."
+  - "Broker authentication failed: 401 Unauthorized"
+  - "Failed to create message session: Unauthorized (401)"
+root_cause: |
+  Self-hosted runners authenticate to the GitHub broker service
+  (`broker.actions.githubusercontent.com`) using a registration token or credential
+  stored at configuration time. If this token expires, is revoked, or the runner is
+  removed from GitHub (while still running locally), subsequent broker requests return
+  HTTP 401.
+
+  Before runner **v2.335.0** (released June 8, 2026), the broker polling code treated
+  401 like a transient error and retried it multiple times (using the `ShouldRetryException`
+  predicate). Each retry added delay, and runners appeared "stuck" or "hanging" for
+  minutes before eventually stopping — obscuring the root cause.
+
+  Starting with **runner v2.335.0** (PR #4445 — "BrokerServer should not retry on 401"),
+  `VssUnauthorizedException` (the exception type wrapping 401) is treated as
+  **non-retryable**. The runner immediately stops its broker polling loop and exits with
+  an authentication error rather than retrying. This means:
+
+  - Failures are visible immediately (seconds, not minutes)
+  - The error message is clearly authentication-related
+  - On-call alerts trigger faster when a runner's credentials expire
+  - Runners that were "stuck" now self-terminate cleanly
+
+  **Common causes of 401 from the broker:**
+  - The runner's registration/PAT has expired (typical with short-lived tokens in
+    ephemeral runner provisioning scripts)
+  - The runner was deleted from the GitHub UI while it was still running
+  - The organization/repo moved or was renamed, invalidating the runner's URL
+  - Ephemeral runner tokens have a 60-minute TTL — runners that take >60 min to
+    receive their first job will 401 on broker connection
+fix: |
+  **Re-register the runner** to obtain a fresh registration token:
+
+  ```
+  ./config.sh remove --token <old-removal-token>
+  ./config.sh --url <repo-or-org-url> --token <new-registration-token>
+  ./run.sh
+  ```
+
+  For **ephemeral runners** (JIT configuration): check that the ephemeral token is not
+  older than 60 minutes at the time the runner first attempts to connect to the broker.
+  If your provisioning pipeline is slow, generate the token closer to runner startup.
+
+  For **ARC (Actions Runner Controller)** deployments: check the `EphemeralRunner` pod
+  logs for `VssUnauthorizedException`. This indicates the controller's token lifecycle
+  handling should refresh or reissue tokens before they expire.
+fix_code:
+  - language: yaml
+    label: "Re-register a self-hosted runner with a fresh token"
+    code: |
+      # Remove old registration:
+      ./config.sh remove --token $(gh api -X DELETE /repos/{owner}/{repo}/actions/runners/{runner_id} | jq -r '.token')
+
+      # Register fresh:
+      REG_TOKEN=$(gh api -X POST /repos/{owner}/{repo}/actions/runners/registration-token --jq '.token')
+      ./config.sh --url https://github.com/{owner}/{repo} \
+                  --token "$REG_TOKEN" \
+                  --name "my-runner" \
+                  --unattended
+  - language: yaml
+    label: "Ephemeral runner — generate token just before runner startup"
+    code: |
+      # In your CI provisioning script, generate the JIT token
+      # immediately before starting the runner (not in a pipeline stage that
+      # runs 30-60 minutes before the runner boots):
+      JIT_CONFIG=$(gh api -X POST \
+        /repos/{owner}/{repo}/actions/runners/generate-jitconfig \
+        -f name="ephemeral-$(date +%s)" \
+        -f runner_group_id=1 \
+        -f labels='["self-hosted","linux"]' \
+        --jq '.encoded_jit_config')
+      ./run.sh --jitconfig "$JIT_CONFIG"
+prevention:
+  - "Monitor runner logs for VssUnauthorizedException — on runner v2.335.0+ this appears immediately on 401 (previously delayed by retries)"
+  - "For ephemeral runners, generate JIT tokens immediately before runner startup to avoid the 60-minute TTL expiry"
+  - "Set up alerts on runner process exit code 1 — v2.335.0+ exits promptly on auth failure instead of hanging"
+  - "For ARC deployments, verify the EphemeralRunnerSet controller version supports token refresh; upgrade to ARC v0.14.1+ for latest lifecycle fixes"
+docs:
+  - url: "https://github.com/actions/runner/pull/4445"
+    label: "runner PR #4445 — BrokerServer should not retry on 401 (shipped in v2.335.0)"
+  - url: "https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners"
+    label: "GitHub Docs — About self-hosted runners"
+  - url: "https://docs.github.com/en/rest/actions/self-hosted-runners?apiVersion=2022-11-28#create-a-registration-token-for-a-repository"
+    label: "GitHub REST API — Create a registration token for a repository"