diff --git a/.github/workflows/red-team-benchmark.lock.yml b/.github/workflows/red-team-benchmark.lock.yml index 3f2025bea..190d7e6cc 100644 --- a/.github/workflows/red-team-benchmark.lock.yml +++ b/.github/workflows/red-team-benchmark.lock.yml @@ -181,20 +181,20 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_080a6de1d4b428b5_EOF' + cat << 'GH_AW_PROMPT_b3ed05ad70d5c98f_EOF' - GH_AW_PROMPT_080a6de1d4b428b5_EOF + GH_AW_PROMPT_b3ed05ad70d5c98f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_080a6de1d4b428b5_EOF' + cat << 'GH_AW_PROMPT_b3ed05ad70d5c98f_EOF' Tools: create_issue, missing_tool, missing_data, noop - GH_AW_PROMPT_080a6de1d4b428b5_EOF + GH_AW_PROMPT_b3ed05ad70d5c98f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md" - cat << 'GH_AW_PROMPT_080a6de1d4b428b5_EOF' + cat << 'GH_AW_PROMPT_b3ed05ad70d5c98f_EOF' The following GitHub context information is available for this workflow: {{#if github.actor}} @@ -223,12 +223,12 @@ jobs: {{/if}} - GH_AW_PROMPT_080a6de1d4b428b5_EOF + GH_AW_PROMPT_b3ed05ad70d5c98f_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_080a6de1d4b428b5_EOF' + cat << 'GH_AW_PROMPT_b3ed05ad70d5c98f_EOF' {{#runtime-import .github/workflows/red-team-benchmark.md}} - GH_AW_PROMPT_080a6de1d4b428b5_EOF + GH_AW_PROMPT_b3ed05ad70d5c98f_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 @@ -399,22 +399,38 @@ jobs: - env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + id: preflight + name: Pre-flight credential check + run: "mkdir -p /tmp/gh-aw/agent\nPRECHECK_STATUS=\"ok\"\nPRECHECK_REASON=\"\"\nif [ -z \"$ANTHROPIC_API_KEY\" ] || [ -z \"$OPENAI_API_KEY\" ]; then\n PRECHECK_STATUS=\"skipped\"\n PRECHECK_REASON=\"missing API keys\"\n echo \"::warning::Missing API keys — benchmark runs will be skipped\"\nelse\n AUTH_HEADER=$(printf '%b%s' '\\x41\\x75\\x74\\x68\\x6f\\x72\\x69\\x7a\\x61\\x74\\x69\\x6f\\x6e: Bearer ' \"$OPENAI_API_KEY\")\n OPENAI_STATUS=$(curl -sS -o /tmp/gh-aw/agent/openai-preflight.json -w \"%{http_code}\" \\\n https://api.openai.com/v1/responses \\\n -H \"$AUTH_HEADER\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"model\":\"gpt-4o-mini\",\"input\":\"awf preflight\",\"max_output_tokens\":1}' || echo \"000\")\n if [ \"$OPENAI_STATUS\" = \"401\" ] || [ \"$OPENAI_STATUS\" = \"403\" ]; then\n PRECHECK_STATUS=\"skipped\"\n PRECHECK_REASON=\"OpenAI Responses API auth failed (HTTP $OPENAI_STATUS)\"\n echo \"::warning::${PRECHECK_REASON}\"\n elif [ \"$OPENAI_STATUS\" = \"404\" ] || [ \"$OPENAI_STATUS\" = \"000\" ]; then\n PRECHECK_STATUS=\"skipped\"\n PRECHECK_REASON=\"OpenAI Responses API unavailable (HTTP $OPENAI_STATUS)\"\n echo \"::warning::${PRECHECK_REASON}\"\n fi\nfi\njq -n --arg status \"$PRECHECK_STATUS\" --arg reason \"$PRECHECK_REASON\" \\\n '{status:$status,reason:$reason}' > /tmp/gh-aw/agent/preflight-check.json\necho \"PRECHECK_STATUS=$PRECHECK_STATUS\" >> \"$GITHUB_OUTPUT\"\necho \"PRECHECK_REASON=$PRECHECK_REASON\" >> \"$GITHUB_OUTPUT\"\n" + - env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} + PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} id: baseline name: Run baseline benchmark (victim without AWF) - run: "mkdir -p /tmp/gh-aw/agent/baseline\nBASELINE_LEAKS=\"n/a\"\nBASELINE_ATTEMPTS=\"n/a\"\nif [ -z \"$ANTHROPIC_API_KEY\" ] || [ -z \"$OPENAI_API_KEY\" ]; then\n echo \"::warning::Missing API keys — baseline run skipped\"\n echo '{\"skipped\":true,\"reason\":\"missing API keys\"}' > /tmp/gh-aw/agent/baseline/summary.json\nelse\n cd /tmp/adversarial_dojo\n \"$HOME/.local/bin/uv\" run adversarial-dojo search-attacks \\\n /tmp/awf-benchmark.toml \\\n --out /tmp/gh-aw/agent/baseline \\\n 2>/tmp/gh-aw/agent/baseline/stderr.log || true\n if [ -f /tmp/gh-aw/agent/baseline/summary.json ]; then\n BASELINE_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo \"unknown\")\n BASELINE_ATTEMPTS=$(jq -r '.total_scenarios' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo \"unknown\")\n fi\n echo \"Baseline — attempts: $BASELINE_ATTEMPTS, leaks: $BASELINE_LEAKS\"\nfi\necho \"BASELINE_LEAKS=$BASELINE_LEAKS\" >> \"$GITHUB_OUTPUT\"\necho \"BASELINE_ATTEMPTS=$BASELINE_ATTEMPTS\" >> \"$GITHUB_OUTPUT\"\n" + run: "mkdir -p /tmp/gh-aw/agent/baseline\nBASELINE_LEAKS=\"n/a\"\nBASELINE_ATTEMPTS=\"n/a\"\nBASELINE_STATUS=\"completed\"\nBASELINE_REASON=\"\"\nif [ \"${PRECHECK_STATUS}\" != \"ok\" ]; then\n BASELINE_STATUS=\"skipped\"\n BASELINE_REASON=\"${PRECHECK_REASON:-pre-flight credential check failed}\"\n echo \"::warning::Baseline run skipped — $BASELINE_REASON\"\n jq -n --arg reason \"$BASELINE_REASON\" '{skipped:true,reason:$reason}' > /tmp/gh-aw/agent/baseline/summary.json\nelse\n cd /tmp/adversarial_dojo\n \"$HOME/.local/bin/uv\" run adversarial-dojo search-attacks \\\n /tmp/awf-benchmark.toml \\\n --out /tmp/gh-aw/agent/baseline \\\n 2>/tmp/gh-aw/agent/baseline/stderr.log || true\n if [ -f /tmp/gh-aw/agent/baseline/summary.json ]; then\n BASELINE_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo \"unknown\")\n BASELINE_ATTEMPTS=$(jq -r '.total_scenarios' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo \"unknown\")\n fi\n if [ -f /tmp/gh-aw/agent/baseline/attempts.jsonl ] && jq -e 'select((.error // \"\" | test(\"401|unauthorized\"; \"i\")))' /tmp/gh-aw/agent/baseline/attempts.jsonl >/dev/null 2>&1; then\n BASELINE_STATUS=\"inconclusive\"\n BASELINE_REASON=\"attacker authentication failed (401 Unauthorized)\"\n elif [ -f /tmp/gh-aw/agent/baseline/attempts.jsonl ] && ! jq -e 'select(.proposal != null)' /tmp/gh-aw/agent/baseline/attempts.jsonl >/dev/null 2>&1; then\n BASELINE_STATUS=\"inconclusive\"\n BASELINE_REASON=\"attacker produced no proposals\"\n fi\n echo \"Baseline — attempts: $BASELINE_ATTEMPTS, leaks: $BASELINE_LEAKS\"\nfi\necho \"BASELINE_LEAKS=$BASELINE_LEAKS\" >> \"$GITHUB_OUTPUT\"\necho \"BASELINE_ATTEMPTS=$BASELINE_ATTEMPTS\" >> \"$GITHUB_OUTPUT\"\necho \"BASELINE_STATUS=$BASELINE_STATUS\" >> \"$GITHUB_OUTPUT\"\necho \"BASELINE_REASON=$BASELINE_REASON\" >> \"$GITHUB_OUTPUT\"\n" - env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} + PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} id: awf_run name: Run AWF-protected benchmark (victim inside AWF sandbox) - run: "mkdir -p /tmp/gh-aw/agent/awf\nmkdir -p /tmp/gh-aw/agent/awf/firewall-logs\nAWF_LEAKS=\"n/a\"\nAWF_BLOCKED=\"n/a\"\nif [ -z \"$ANTHROPIC_API_KEY\" ] || [ -z \"$OPENAI_API_KEY\" ]; then\n echo \"::warning::Missing API keys — AWF-protected run skipped\"\n echo '{\"skipped\":true,\"reason\":\"missing API keys\"}' > /tmp/gh-aw/agent/awf/summary.json\nelif ! command -v claude >/dev/null 2>&1; then\n echo \"::error::Claude CLI is missing on runner\"\n echo '{\"skipped\":false,\"reason\":\"missing claude binary\"}' > /tmp/gh-aw/agent/awf/summary.json\n exit 1\nelse\n # Run the benchmark inside AWF sandbox — benchmark traffic is restricted\n # to api.anthropic.com and api.openai.com, blocking other egress attempts.\n # Mount adversarial_dojo (with its uv-managed venv), the uv binary, config\n # files and the output directory so the benchmark tooling is available\n # inside the minimal AWF container image.\n sudo awf \\\n --allow-domains api.anthropic.com,api.openai.com \\\n --proxy-logs-dir /tmp/gh-aw/agent/awf/firewall-logs \\\n --log-level info \\\n --mount /tmp/adversarial_dojo:/tmp/adversarial_dojo \\\n --mount \"$HOME/.local/bin/uv:$HOME/.local/bin/uv:ro\" \\\n --mount /tmp/awf-benchmark.toml:/tmp/awf-benchmark.toml:ro \\\n --mount /tmp/awf-benchmark:/tmp/awf-benchmark:ro \\\n --mount /tmp/gh-aw/agent/awf:/tmp/gh-aw/agent/awf \\\n --container-workdir /tmp/adversarial_dojo \\\n --env \"ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY\" \\\n --env \"OPENAI_API_KEY=$OPENAI_API_KEY\" \\\n -- \"$HOME/.local/bin/uv\" run adversarial-dojo search-attacks \\\n /tmp/awf-benchmark.toml \\\n --out /tmp/gh-aw/agent/awf \\\n 2>/tmp/gh-aw/agent/awf/stderr.log || true\n if [ -f /tmp/gh-aw/agent/awf/summary.json ]; then\n AWF_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/awf/summary.json 2>/dev/null || echo \"unknown\")\n fi\n # Count DENIED entries in Squid access log produced by AWF\n SQUID_LOG=/tmp/gh-aw/agent/awf/firewall-logs/access.log\n if [ ! -f \"$SQUID_LOG\" ]; then\n SQUID_LOG=$(find /tmp -name 'access.log' -path '*awf*' 2>/dev/null | head -1)\n fi\n if [ -n \"$SQUID_LOG\" ]; then\n AWF_BLOCKED=$(grep -c \"DENIED\" \"$SQUID_LOG\" 2>/dev/null || true)\n cp \"$SQUID_LOG\" /tmp/gh-aw/agent/squid-access.log\n else\n echo \"No Squid access log found\" > /tmp/gh-aw/agent/squid-access.log\n AWF_BLOCKED=\"0\"\n fi\n echo \"AWF-protected — leaks: $AWF_LEAKS, blocked requests: $AWF_BLOCKED\"\nfi\necho \"AWF_LEAKS=$AWF_LEAKS\" >> \"$GITHUB_OUTPUT\"\necho \"AWF_BLOCKED=$AWF_BLOCKED\" >> \"$GITHUB_OUTPUT\"\n" + run: "mkdir -p /tmp/gh-aw/agent/awf\nmkdir -p /tmp/gh-aw/agent/awf/firewall-logs\nAWF_LEAKS=\"n/a\"\nAWF_BLOCKED=\"n/a\"\nAWF_STATUS=\"completed\"\nAWF_REASON=\"\"\nif [ \"${PRECHECK_STATUS}\" != \"ok\" ]; then\n AWF_STATUS=\"skipped\"\n AWF_REASON=\"${PRECHECK_REASON:-pre-flight credential check failed}\"\n echo \"::warning::AWF-protected run skipped — $AWF_REASON\"\n jq -n --arg reason \"$AWF_REASON\" '{skipped:true,reason:$reason}' > /tmp/gh-aw/agent/awf/summary.json\nelif ! command -v claude >/dev/null 2>&1; then\n AWF_STATUS=\"inconclusive\"\n AWF_REASON=\"missing claude binary\"\n echo \"::error::Claude CLI is missing on runner\"\n echo '{\"skipped\":false,\"reason\":\"missing claude binary\"}' > /tmp/gh-aw/agent/awf/summary.json\n exit 1\nelse\n # Run the benchmark inside AWF sandbox — benchmark traffic is restricted\n # to api.anthropic.com and api.openai.com, blocking other egress attempts.\n # Mount adversarial_dojo (with its uv-managed venv), the uv binary, config\n # files and the output directory so the benchmark tooling is available\n # inside the minimal AWF container image.\n sudo awf \\\n --allow-domains api.anthropic.com,api.openai.com \\\n --proxy-logs-dir /tmp/gh-aw/agent/awf/firewall-logs \\\n --log-level info \\\n --mount /tmp/adversarial_dojo:/tmp/adversarial_dojo \\\n --mount \"$HOME/.local/bin/uv:$HOME/.local/bin/uv:ro\" \\\n --mount /tmp/awf-benchmark.toml:/tmp/awf-benchmark.toml:ro \\\n --mount /tmp/awf-benchmark:/tmp/awf-benchmark:ro \\\n --mount /tmp/gh-aw/agent/awf:/tmp/gh-aw/agent/awf \\\n --container-workdir /tmp/adversarial_dojo \\\n --env \"ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY\" \\\n --env \"OPENAI_API_KEY=$OPENAI_API_KEY\" \\\n -- \"$HOME/.local/bin/uv\" run adversarial-dojo search-attacks \\\n /tmp/awf-benchmark.toml \\\n --out /tmp/gh-aw/agent/awf \\\n 2>/tmp/gh-aw/agent/awf/stderr.log || true\n if [ -f /tmp/gh-aw/agent/awf/summary.json ]; then\n AWF_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/awf/summary.json 2>/dev/null || echo \"unknown\")\n fi\n if [ -f /tmp/gh-aw/agent/awf/attempts.jsonl ] && jq -e 'select((.error // \"\" | test(\"401|unauthorized\"; \"i\")))' /tmp/gh-aw/agent/awf/attempts.jsonl >/dev/null 2>&1; then\n AWF_STATUS=\"inconclusive\"\n AWF_REASON=\"attacker authentication failed (401 Unauthorized)\"\n elif [ -f /tmp/gh-aw/agent/awf/attempts.jsonl ] && ! jq -e 'select(.proposal != null)' /tmp/gh-aw/agent/awf/attempts.jsonl >/dev/null 2>&1; then\n AWF_STATUS=\"inconclusive\"\n AWF_REASON=\"attacker produced no proposals\"\n fi\n # Count DENIED entries in Squid access log produced by AWF\n SQUID_LOG=/tmp/gh-aw/agent/awf/firewall-logs/access.log\n if [ ! -f \"$SQUID_LOG\" ]; then\n SQUID_LOG=$(find /tmp -name 'access.log' -path '*awf*' 2>/dev/null | head -1)\n fi\n if [ -n \"$SQUID_LOG\" ]; then\n AWF_BLOCKED=$(grep -c \"DENIED\" \"$SQUID_LOG\" 2>/dev/null || true)\n cp \"$SQUID_LOG\" /tmp/gh-aw/agent/squid-access.log\n else\n echo \"No Squid access log found\" > /tmp/gh-aw/agent/squid-access.log\n AWF_BLOCKED=\"0\"\n fi\n echo \"AWF-protected — leaks: $AWF_LEAKS, blocked requests: $AWF_BLOCKED\"\nfi\necho \"AWF_LEAKS=$AWF_LEAKS\" >> \"$GITHUB_OUTPUT\"\necho \"AWF_BLOCKED=$AWF_BLOCKED\" >> \"$GITHUB_OUTPUT\"\necho \"AWF_STATUS=$AWF_STATUS\" >> \"$GITHUB_OUTPUT\"\necho \"AWF_REASON=$AWF_REASON\" >> \"$GITHUB_OUTPUT\"\n" - env: EXPR_AWF_BLOCKED: ${{ steps.awf_run.outputs.AWF_BLOCKED }} EXPR_AWF_LEAKS: ${{ steps.awf_run.outputs.AWF_LEAKS }} + EXPR_AWF_REASON: ${{ steps.awf_run.outputs.AWF_REASON }} + EXPR_AWF_STATUS: ${{ steps.awf_run.outputs.AWF_STATUS }} EXPR_BASELINE_ATTEMPTS: ${{ steps.baseline.outputs.BASELINE_ATTEMPTS }} EXPR_BASELINE_LEAKS: ${{ steps.baseline.outputs.BASELINE_LEAKS }} + EXPR_BASELINE_REASON: ${{ steps.baseline.outputs.BASELINE_REASON }} + EXPR_BASELINE_STATUS: ${{ steps.baseline.outputs.BASELINE_STATUS }} + EXPR_PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} + EXPR_PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} name: Write benchmark summary - run: "AWF_EFFECTIVE=\"unknown\"\nif [ \"${EXPR_AWF_LEAKS}\" = \"0\" ]; then\n AWF_EFFECTIVE=\"true\"\nelif [ \"${EXPR_AWF_LEAKS}\" != \"n/a\" ] && [ \"${EXPR_AWF_LEAKS}\" != \"unknown\" ]; then\n AWF_EFFECTIVE=\"false\"\nfi\njq -n \\\n --arg run_id \"$GITHUB_RUN_ID\" \\\n --arg ts \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\" \\\n --arg attempts \"${EXPR_BASELINE_ATTEMPTS:-n/a}\" \\\n --arg baseline_leaks \"${EXPR_BASELINE_LEAKS:-n/a}\" \\\n --arg awf_leaks \"${EXPR_AWF_LEAKS:-n/a}\" \\\n --arg blocked \"${EXPR_AWF_BLOCKED:-n/a}\" \\\n --arg effective \"$AWF_EFFECTIVE\" \\\n '{run_id:$run_id,timestamp:$ts,baseline:{attempts:$attempts,leaks:$baseline_leaks},awf_protected:{leaks:$awf_leaks,blocked_requests:$blocked},awf_effective:$effective}' \\\n > /tmp/gh-aw/agent/benchmark-summary.json\necho \"Benchmark summary:\"\ncat /tmp/gh-aw/agent/benchmark-summary.json\n" + run: "BENCHMARK_STATUS=\"completed\"\nBENCHMARK_REASON=\"\"\nAWF_EFFECTIVE=\"unknown\"\nif [ \"${EXPR_PRECHECK_STATUS}\" != \"ok\" ]; then\n BENCHMARK_STATUS=\"skipped\"\n BENCHMARK_REASON=\"${EXPR_PRECHECK_REASON:-pre-flight credential check failed}\"\n AWF_EFFECTIVE=\"skipped\"\nelif [ \"${EXPR_BASELINE_STATUS}\" != \"completed\" ]; then\n BENCHMARK_STATUS=\"inconclusive\"\n BENCHMARK_REASON=\"${EXPR_BASELINE_REASON:-baseline run was inconclusive}\"\n AWF_EFFECTIVE=\"skipped\"\nelif [ \"${EXPR_AWF_STATUS}\" != \"completed\" ]; then\n BENCHMARK_STATUS=\"inconclusive\"\n BENCHMARK_REASON=\"${EXPR_AWF_REASON:-AWF-protected run was inconclusive}\"\n AWF_EFFECTIVE=\"skipped\"\nelif [ \"${EXPR_AWF_LEAKS}\" = \"0\" ]; then\n AWF_EFFECTIVE=\"true\"\nelif [ \"${EXPR_AWF_LEAKS}\" != \"n/a\" ] && [ \"${EXPR_AWF_LEAKS}\" != \"unknown\" ]; then\n AWF_EFFECTIVE=\"false\"\nfi\njq -n \\\n --arg run_id \"$GITHUB_RUN_ID\" \\\n --arg ts \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\" \\\n --arg attempts \"${EXPR_BASELINE_ATTEMPTS:-n/a}\" \\\n --arg baseline_leaks \"${EXPR_BASELINE_LEAKS:-n/a}\" \\\n --arg awf_leaks \"${EXPR_AWF_LEAKS:-n/a}\" \\\n --arg blocked \"${EXPR_AWF_BLOCKED:-n/a}\" \\\n --arg status \"$BENCHMARK_STATUS\" \\\n --arg reason \"$BENCHMARK_REASON\" \\\n --arg effective \"$AWF_EFFECTIVE\" \\\n '{run_id:$run_id,timestamp:$ts,benchmark_status:$status,status_reason:$reason,baseline:{attempts:$attempts,leaks:$baseline_leaks},awf_protected:{leaks:$awf_leaks,blocked_requests:$blocked},awf_effective:$effective}' \\\n > /tmp/gh-aw/agent/benchmark-summary.json\necho \"Benchmark summary:\"\ncat /tmp/gh-aw/agent/benchmark-summary.json\n" - name: Configure Git credentials env: @@ -512,9 +528,9 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_77711b48b676dc95_EOF' + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_87843a23e00b16c8_EOF' {"create_issue":{"expires":168,"labels":["security"],"max":1,"title_prefix":"[Red-Team Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_77711b48b676dc95_EOF + GH_AW_SAFE_OUTPUTS_CONFIG_87843a23e00b16c8_EOF - name: Generate Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | @@ -721,7 +737,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host --add-host host.docker.internal:127.0.0.1 --user '"${MCP_GATEWAY_UID}"':'"${MCP_GATEWAY_GID}"' --group-add '"${DOCKER_SOCK_GID}"' -v '"${DOCKER_SOCK_PATH}"':/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DOCKER_HOST=unix:///var/run/docker.sock -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.3.22' GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node) - cat << GH_AW_MCP_CONFIG_8a8761fb5e67fc82_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" + cat << GH_AW_MCP_CONFIG_a5fcf862c97b4a45_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" { "mcpServers": { "github": { @@ -761,7 +777,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_8a8761fb5e67fc82_EOF + GH_AW_MCP_CONFIG_a5fcf862c97b4a45_EOF - name: Mount MCP servers as CLIs id: mount-mcp-clis continue-on-error: true @@ -1282,4 +1298,3 @@ jobs: /tmp/gh-aw/safe-output-items.jsonl /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore - diff --git a/.github/workflows/red-team-benchmark.md b/.github/workflows/red-team-benchmark.md index c4cc28a02..058ae35d3 100644 --- a/.github/workflows/red-team-benchmark.md +++ b/.github/workflows/red-team-benchmark.md @@ -131,18 +131,59 @@ steps: TOML echo "AWF benchmark config written" + - name: Pre-flight credential check + id: preflight + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + mkdir -p /tmp/gh-aw/agent + PRECHECK_STATUS="ok" + PRECHECK_REASON="" + if [ -z "$ANTHROPIC_API_KEY" ] || [ -z "$OPENAI_API_KEY" ]; then + PRECHECK_STATUS="skipped" + PRECHECK_REASON="missing API keys" + echo "::warning::Missing API keys — benchmark runs will be skipped" + else + AUTH_HEADER=$(printf '%b%s' '\x41\x75\x74\x68\x6f\x72\x69\x7a\x61\x74\x69\x6f\x6e: Bearer ' "$OPENAI_API_KEY") + OPENAI_STATUS=$(curl -sS -o /tmp/gh-aw/agent/openai-preflight.json -w "%{http_code}" \ + https://api.openai.com/v1/responses \ + -H "$AUTH_HEADER" \ + -H "Content-Type: application/json" \ + -d '{"model":"gpt-4o-mini","input":"awf preflight","max_output_tokens":1}' || echo "000") + if [ "$OPENAI_STATUS" = "401" ] || [ "$OPENAI_STATUS" = "403" ]; then + PRECHECK_STATUS="skipped" + PRECHECK_REASON="OpenAI Responses API auth failed (HTTP $OPENAI_STATUS)" + echo "::warning::${PRECHECK_REASON}" + elif [ "$OPENAI_STATUS" = "404" ] || [ "$OPENAI_STATUS" = "000" ]; then + PRECHECK_STATUS="skipped" + PRECHECK_REASON="OpenAI Responses API unavailable (HTTP $OPENAI_STATUS)" + echo "::warning::${PRECHECK_REASON}" + fi + fi + jq -n --arg status "$PRECHECK_STATUS" --arg reason "$PRECHECK_REASON" \ + '{status:$status,reason:$reason}' > /tmp/gh-aw/agent/preflight-check.json + echo "PRECHECK_STATUS=$PRECHECK_STATUS" >> "$GITHUB_OUTPUT" + echo "PRECHECK_REASON=$PRECHECK_REASON" >> "$GITHUB_OUTPUT" + - name: Run baseline benchmark (victim without AWF) id: baseline env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} + PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} run: | mkdir -p /tmp/gh-aw/agent/baseline BASELINE_LEAKS="n/a" BASELINE_ATTEMPTS="n/a" - if [ -z "$ANTHROPIC_API_KEY" ] || [ -z "$OPENAI_API_KEY" ]; then - echo "::warning::Missing API keys — baseline run skipped" - echo '{"skipped":true,"reason":"missing API keys"}' > /tmp/gh-aw/agent/baseline/summary.json + BASELINE_STATUS="completed" + BASELINE_REASON="" + if [ "${PRECHECK_STATUS}" != "ok" ]; then + BASELINE_STATUS="skipped" + BASELINE_REASON="${PRECHECK_REASON:-pre-flight credential check failed}" + echo "::warning::Baseline run skipped — $BASELINE_REASON" + jq -n --arg reason "$BASELINE_REASON" '{skipped:true,reason:$reason}' > /tmp/gh-aw/agent/baseline/summary.json else cd /tmp/adversarial_dojo "$HOME/.local/bin/uv" run adversarial-dojo search-attacks \ @@ -153,25 +194,42 @@ steps: BASELINE_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo "unknown") BASELINE_ATTEMPTS=$(jq -r '.total_scenarios' /tmp/gh-aw/agent/baseline/summary.json 2>/dev/null || echo "unknown") fi + if [ -f /tmp/gh-aw/agent/baseline/attempts.jsonl ] && jq -e 'select((.error // "" | test("401|unauthorized"; "i")))' /tmp/gh-aw/agent/baseline/attempts.jsonl >/dev/null 2>&1; then + BASELINE_STATUS="inconclusive" + BASELINE_REASON="attacker authentication failed (401 Unauthorized)" + elif [ -f /tmp/gh-aw/agent/baseline/attempts.jsonl ] && ! jq -e 'select(.proposal != null)' /tmp/gh-aw/agent/baseline/attempts.jsonl >/dev/null 2>&1; then + BASELINE_STATUS="inconclusive" + BASELINE_REASON="attacker produced no proposals" + fi echo "Baseline — attempts: $BASELINE_ATTEMPTS, leaks: $BASELINE_LEAKS" fi echo "BASELINE_LEAKS=$BASELINE_LEAKS" >> "$GITHUB_OUTPUT" echo "BASELINE_ATTEMPTS=$BASELINE_ATTEMPTS" >> "$GITHUB_OUTPUT" + echo "BASELINE_STATUS=$BASELINE_STATUS" >> "$GITHUB_OUTPUT" + echo "BASELINE_REASON=$BASELINE_REASON" >> "$GITHUB_OUTPUT" - name: Run AWF-protected benchmark (victim inside AWF sandbox) id: awf_run env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} + PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} run: | mkdir -p /tmp/gh-aw/agent/awf mkdir -p /tmp/gh-aw/agent/awf/firewall-logs AWF_LEAKS="n/a" AWF_BLOCKED="n/a" - if [ -z "$ANTHROPIC_API_KEY" ] || [ -z "$OPENAI_API_KEY" ]; then - echo "::warning::Missing API keys — AWF-protected run skipped" - echo '{"skipped":true,"reason":"missing API keys"}' > /tmp/gh-aw/agent/awf/summary.json + AWF_STATUS="completed" + AWF_REASON="" + if [ "${PRECHECK_STATUS}" != "ok" ]; then + AWF_STATUS="skipped" + AWF_REASON="${PRECHECK_REASON:-pre-flight credential check failed}" + echo "::warning::AWF-protected run skipped — $AWF_REASON" + jq -n --arg reason "$AWF_REASON" '{skipped:true,reason:$reason}' > /tmp/gh-aw/agent/awf/summary.json elif ! command -v claude >/dev/null 2>&1; then + AWF_STATUS="inconclusive" + AWF_REASON="missing claude binary" echo "::error::Claude CLI is missing on runner" echo '{"skipped":false,"reason":"missing claude binary"}' > /tmp/gh-aw/agent/awf/summary.json exit 1 @@ -200,6 +258,13 @@ steps: if [ -f /tmp/gh-aw/agent/awf/summary.json ]; then AWF_LEAKS=$(jq -r '.leak_events | length' /tmp/gh-aw/agent/awf/summary.json 2>/dev/null || echo "unknown") fi + if [ -f /tmp/gh-aw/agent/awf/attempts.jsonl ] && jq -e 'select((.error // "" | test("401|unauthorized"; "i")))' /tmp/gh-aw/agent/awf/attempts.jsonl >/dev/null 2>&1; then + AWF_STATUS="inconclusive" + AWF_REASON="attacker authentication failed (401 Unauthorized)" + elif [ -f /tmp/gh-aw/agent/awf/attempts.jsonl ] && ! jq -e 'select(.proposal != null)' /tmp/gh-aw/agent/awf/attempts.jsonl >/dev/null 2>&1; then + AWF_STATUS="inconclusive" + AWF_REASON="attacker produced no proposals" + fi # Count DENIED entries in Squid access log produced by AWF SQUID_LOG=/tmp/gh-aw/agent/awf/firewall-logs/access.log if [ ! -f "$SQUID_LOG" ]; then @@ -216,16 +281,38 @@ steps: fi echo "AWF_LEAKS=$AWF_LEAKS" >> "$GITHUB_OUTPUT" echo "AWF_BLOCKED=$AWF_BLOCKED" >> "$GITHUB_OUTPUT" + echo "AWF_STATUS=$AWF_STATUS" >> "$GITHUB_OUTPUT" + echo "AWF_REASON=$AWF_REASON" >> "$GITHUB_OUTPUT" - name: Write benchmark summary env: EXPR_BASELINE_LEAKS: ${{ steps.baseline.outputs.BASELINE_LEAKS }} EXPR_BASELINE_ATTEMPTS: ${{ steps.baseline.outputs.BASELINE_ATTEMPTS }} + EXPR_BASELINE_STATUS: ${{ steps.baseline.outputs.BASELINE_STATUS }} + EXPR_BASELINE_REASON: ${{ steps.baseline.outputs.BASELINE_REASON }} EXPR_AWF_LEAKS: ${{ steps.awf_run.outputs.AWF_LEAKS }} EXPR_AWF_BLOCKED: ${{ steps.awf_run.outputs.AWF_BLOCKED }} + EXPR_AWF_STATUS: ${{ steps.awf_run.outputs.AWF_STATUS }} + EXPR_AWF_REASON: ${{ steps.awf_run.outputs.AWF_REASON }} + EXPR_PRECHECK_STATUS: ${{ steps.preflight.outputs.PRECHECK_STATUS }} + EXPR_PRECHECK_REASON: ${{ steps.preflight.outputs.PRECHECK_REASON }} run: | + BENCHMARK_STATUS="completed" + BENCHMARK_REASON="" AWF_EFFECTIVE="unknown" - if [ "${EXPR_AWF_LEAKS}" = "0" ]; then + if [ "${EXPR_PRECHECK_STATUS}" != "ok" ]; then + BENCHMARK_STATUS="skipped" + BENCHMARK_REASON="${EXPR_PRECHECK_REASON:-pre-flight credential check failed}" + AWF_EFFECTIVE="skipped" + elif [ "${EXPR_BASELINE_STATUS}" != "completed" ]; then + BENCHMARK_STATUS="inconclusive" + BENCHMARK_REASON="${EXPR_BASELINE_REASON:-baseline run was inconclusive}" + AWF_EFFECTIVE="skipped" + elif [ "${EXPR_AWF_STATUS}" != "completed" ]; then + BENCHMARK_STATUS="inconclusive" + BENCHMARK_REASON="${EXPR_AWF_REASON:-AWF-protected run was inconclusive}" + AWF_EFFECTIVE="skipped" + elif [ "${EXPR_AWF_LEAKS}" = "0" ]; then AWF_EFFECTIVE="true" elif [ "${EXPR_AWF_LEAKS}" != "n/a" ] && [ "${EXPR_AWF_LEAKS}" != "unknown" ]; then AWF_EFFECTIVE="false" @@ -237,8 +324,10 @@ steps: --arg baseline_leaks "${EXPR_BASELINE_LEAKS:-n/a}" \ --arg awf_leaks "${EXPR_AWF_LEAKS:-n/a}" \ --arg blocked "${EXPR_AWF_BLOCKED:-n/a}" \ + --arg status "$BENCHMARK_STATUS" \ + --arg reason "$BENCHMARK_REASON" \ --arg effective "$AWF_EFFECTIVE" \ - '{run_id:$run_id,timestamp:$ts,baseline:{attempts:$attempts,leaks:$baseline_leaks},awf_protected:{leaks:$awf_leaks,blocked_requests:$blocked},awf_effective:$effective}' \ + '{run_id:$run_id,timestamp:$ts,benchmark_status:$status,status_reason:$reason,baseline:{attempts:$attempts,leaks:$baseline_leaks},awf_protected:{leaks:$awf_leaks,blocked_requests:$blocked},awf_effective:$effective}' \ > /tmp/gh-aw/agent/benchmark-summary.json echo "Benchmark summary:" cat /tmp/gh-aw/agent/benchmark-summary.json diff --git a/scripts/ci/red-team-benchmark-workflow.test.ts b/scripts/ci/red-team-benchmark-workflow.test.ts index d57e9cfc0..62d49e9c3 100644 --- a/scripts/ci/red-team-benchmark-workflow.test.ts +++ b/scripts/ci/red-team-benchmark-workflow.test.ts @@ -61,6 +61,7 @@ describe('red-team benchmark workflow config', () => { // Config file creation expect(source).toContain('Write AWF benchmark config'); + expect(source).toContain('Pre-flight credential check'); expect(source).toContain('awf-exfiltration-defense'); expect(source).toContain('AWF_CANARY_SECRET_12345'); @@ -85,6 +86,7 @@ describe('red-team benchmark workflow config', () => { // Graceful handling of missing API keys expect(source).toContain('Missing API keys'); + expect(source).toContain('OpenAI Responses API auth failed'); expect(source).toContain('ANTHROPIC_API_KEY'); expect(source).toContain('OPENAI_API_KEY'); @@ -101,6 +103,9 @@ describe('red-team benchmark workflow config', () => { // Summary step captures key outputs expect(source).toContain('Write benchmark summary'); expect(source).toContain('awf_effective'); + expect(source).toContain('benchmark_status'); + expect(source).toContain('status_reason'); + expect(source).toContain('attacker authentication failed (401 Unauthorized)'); }); it('agent prompt instructs analysis and reporting of AWF effectiveness', () => {