diff --git a/.github/workflows/daily-formal-spec-verifier.lock.yml b/.github/workflows/daily-formal-spec-verifier.lock.yml index 46aa789c3a0..151adea80d9 100644 --- a/.github/workflows/daily-formal-spec-verifier.lock.yml +++ b/.github/workflows/daily-formal-spec-verifier.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"44019fa4c3a05af65c3221b67215a4a4c1e4cce97a8127c7c77125c3548e471a","body_hash":"511c354d1036187b61d80cedbcc3a648d047e9e336b83a11a0bcc8bbf096319d","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.63","copilot-sdk":"1.0.1"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"44019fa4c3a05af65c3221b67215a4a4c1e4cce97a8127c7c77125c3548e471a","body_hash":"4a2db93ab36f2c0b08bbf8619e99279b09dd6bf544b3ea8d44ef80905c0daf1e","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.63","copilot-sdk":"1.0.1"}} # gh-aw-manifest: {"version":1,"secrets":["GH_AW_AGENT_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0","version":"v7.0.0"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.7","digest":"sha256:aae231e4635c8999d039c132f1602d3df850fe9b84a00aa2b5ac981179b5661c","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.7@sha256:aae231e4635c8999d039c132f1602d3df850fe9b84a00aa2b5ac981179b5661c"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.7","digest":"sha256:009caf2e3d88fa77b64e9a03a95a228fc58db0f1701c6d324b29ba5a3c7c79b6","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.7@sha256:009caf2e3d88fa77b64e9a03a95a228fc58db0f1701c6d324b29ba5a3c7c79b6"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.7","digest":"sha256:4757f198a3fa20f88bdbe70be7ae1a05f127d9c0a9e96a5d6460ef40c08fc83d","pinned_image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.7@sha256:4757f198a3fa20f88bdbe70be7ae1a05f127d9c0a9e96a5d6460ef40c08fc83d"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.7","digest":"sha256:deb1d4e19de62d51cee0508057a596a19315c3423ada4d675cad136dc8037c96","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.7@sha256:deb1d4e19de62d51cee0508057a596a19315c3423ada4d675cad136dc8037c96"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.27","digest":"sha256:fe984bddde4ec05d756d9043edb0a32912e6b7b72f6a121b1082f29221421cc7","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.27@sha256:fe984bddde4ec05d756d9043edb0a32912e6b7b72f6a121b1082f29221421cc7"},{"image":"ghcr.io/github/gh-aw-node","digest":"sha256:529d02eb970b1161aa25c593a9c3df57fdfad5a8add328cb3b6eccef66f3183b","pinned_image":"ghcr.io/github/gh-aw-node@sha256:529d02eb970b1161aa25c593a9c3df57fdfad5a8add328cb3b6eccef66f3183b"},{"image":"ghcr.io/github/github-mcp-server:v1.3.0","digest":"sha256:5c83359327a0bacc3d34db730bea6557d39d341cee0bf6c58c9a896e33150e80","pinned_image":"ghcr.io/github/github-mcp-server:v1.3.0@sha256:5c83359327a0bacc3d34db730bea6557d39d341cee0bf6c58c9a896e33150e80"}]} # This file was automatically generated by gh-aw. DO NOT EDIT. To debug this workflow, load the skill at https://github.com/github/gh-aw/blob/main/debug.md # diff --git a/.github/workflows/daily-formal-spec-verifier.md b/.github/workflows/daily-formal-spec-verifier.md index 3478a62e448..4425bbc85de 100644 --- a/.github/workflows/daily-formal-spec-verifier.md +++ b/.github/workflows/daily-formal-spec-verifier.md @@ -174,6 +174,13 @@ Use existing types, functions, and interfaces from the codebase where possible ( Create exactly one issue using the `create_issue` safe output. +### Output Contract (Required) + +1. Draft the title and body locally first if needed, but emit exactly one final `create_issue` safe output only after the full payload is complete. +2. Do **not** use `bash`, `cli-proxy`, or the `safeoutputs` CLI to create the issue or inspect the tool schema. Emit the safe output directly with `title` and `body` arguments. +3. Never retry `create_issue` with empty, placeholder, or partial arguments. +4. If the quality checks below cannot be met, emit `report_incomplete` directly as a safe output instead of `create_issue`. + ### Issue format Title: `[formal-spec] — Formal model & test suite — ` @@ -239,7 +246,7 @@ Before emitting `create_issue`, verify the body: - The generated test file compiles without errors (review for syntax mistakes). - Is at least 1200 characters long. -If these checks cannot be met, emit `report_incomplete` instead of `create_issue`. +If these checks cannot be met, emit `report_incomplete` directly as a safe output instead of `create_issue`. --- diff --git a/pkg/workflow/prompts_test.go b/pkg/workflow/prompts_test.go index 16b2edd4202..7f26ca4e64b 100644 --- a/pkg/workflow/prompts_test.go +++ b/pkg/workflow/prompts_test.go @@ -324,6 +324,35 @@ func TestDailyCavemanOptimizerUsesConcreteClaudeModelsForExperiment(t *testing.T } } +func TestDailyFormalSpecVerifierDefinesDirectSafeOutputContract(t *testing.T) { + repoRoot, err := findRepoRoot() + if err != nil { + t.Fatalf("Failed to find repo root: %v", err) + } + + workflowFile := filepath.Join(repoRoot, ".github", "workflows", "daily-formal-spec-verifier.md") + content, err := os.ReadFile(workflowFile) + if err != nil { + t.Fatalf("Failed to read workflow file: %v", err) + } + + workflow := string(content) + requiredContract := "Draft the title and body locally first if needed, but emit exactly one final `create_issue` safe output only after the full payload is complete." + if !strings.Contains(workflow, requiredContract) { + t.Fatal("Expected daily-formal-spec-verifier workflow to require a single final create_issue safe output") + } + + noShellGuidance := "Do **not** use `bash`, `cli-proxy`, or the `safeoutputs` CLI to create the issue or inspect the tool schema. Emit the safe output directly with `title` and `body` arguments." + if !strings.Contains(workflow, noShellGuidance) { + t.Fatal("Expected daily-formal-spec-verifier workflow to forbid bash/CLI safe-output invocation") + } + + reportIncompleteGuidance := "If the quality checks below cannot be met, emit `report_incomplete` directly as a safe output instead of `create_issue`." + if !strings.Contains(workflow, reportIncompleteGuidance) { + t.Fatal("Expected daily-formal-spec-verifier workflow to require direct report_incomplete fallback") + } +} + func TestDailyCacheStrategyAnalyzerUsesCodexCompatibleModelsForExperiment(t *testing.T) { repoRoot, err := findRepoRoot() if err != nil {