From 52f9adebf33ec26c98c518f83b5b50f549870def Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 14:22:37 +0000 Subject: [PATCH 1/3] Initial plan From 83d04ec02ab34155b094320c32e36f80b4efedce Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 14:30:21 +0000 Subject: [PATCH 2/3] Initial exploration of threat detection documentation Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- pkg/workflow/schemas/github-workflow.json | 76 ++++++++++++++++++++--- 1 file changed, 67 insertions(+), 9 deletions(-) diff --git a/pkg/workflow/schemas/github-workflow.json b/pkg/workflow/schemas/github-workflow.json index 6b93ceff0b8..160824c1de3 100644 --- a/pkg/workflow/schemas/github-workflow.json +++ b/pkg/workflow/schemas/github-workflow.json @@ -983,9 +983,19 @@ "$ref": "#/definitions/types", "items": { "type": "string", - "enum": ["created", "rerequested", "completed", "requested_action"] + "enum": [ + "created", + "rerequested", + "completed", + "requested_action" + ] }, - "default": ["created", "rerequested", "completed", "requested_action"] + "default": [ + "created", + "rerequested", + "completed", + "requested_action" + ] } } }, @@ -1197,7 +1207,13 @@ "type": "string", "enum": ["created", "closed", "opened", "edited", "deleted"] }, - "default": ["created", "closed", "opened", "edited", "deleted"] + "default": [ + "created", + "closed", + "opened", + "edited", + "deleted" + ] } } }, @@ -1215,9 +1231,23 @@ "$ref": "#/definitions/types", "items": { "type": "string", - "enum": ["created", "updated", "closed", "reopened", "edited", "deleted"] + "enum": [ + "created", + "updated", + "closed", + "reopened", + "edited", + "deleted" + ] }, - "default": ["created", "updated", "closed", "reopened", "edited", "deleted"] + "default": [ + "created", + "updated", + "closed", + "reopened", + "edited", + "deleted" + ] } } }, @@ -1230,9 +1260,21 @@ "$ref": "#/definitions/types", "items": { "type": "string", - "enum": ["created", "moved", "converted", "edited", "deleted"] + "enum": [ + "created", + "moved", + "converted", + "edited", + "deleted" + ] }, - "default": ["created", "moved", "converted", "edited", "deleted"] + "default": [ + "created", + "moved", + "converted", + "edited", + "deleted" + ] } } }, @@ -1516,9 +1558,25 @@ "$ref": "#/definitions/types", "items": { "type": "string", - "enum": ["published", "unpublished", "created", "edited", "deleted", "prereleased", "released"] + "enum": [ + "published", + "unpublished", + "created", + "edited", + "deleted", + "prereleased", + "released" + ] }, - "default": ["published", "unpublished", "created", "edited", "deleted", "prereleased", "released"] + "default": [ + "published", + "unpublished", + "created", + "edited", + "deleted", + "prereleased", + "released" + ] } } }, From 10c69811c102cec3395d9fc906f030eee77856ce Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 14:35:18 +0000 Subject: [PATCH 3/3] Add comprehensive threat detection documentation Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- docs/src/content/docs/guides/security.md | 25 +- .../content/docs/guides/threat-detection.md | 447 ++++++++++++++++++ .../content/docs/reference/safe-outputs.md | 112 +++++ 3 files changed, 583 insertions(+), 1 deletion(-) create mode 100644 docs/src/content/docs/guides/threat-detection.md diff --git a/docs/src/content/docs/guides/security.md b/docs/src/content/docs/guides/security.md index b36d9c3c543..a940efa6921 100644 --- a/docs/src/content/docs/guides/security.md +++ b/docs/src/content/docs/guides/security.md @@ -446,7 +446,29 @@ GitHub Agentic Workflows includes automatic threat detection to analyze agent ou The system uses AI-powered analysis with workflow source context to distinguish between legitimate actions and threats, helping reduce false positives while maintaining strong security controls. -See the [Safe Outputs Reference](/gh-aw/reference/safe-outputs/) for threat detection configuration options. +**Configuration Options:** + +Threat detection is automatically enabled when safe outputs are configured, but can be customized: + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + enabled: true # Enable/disable (default: true) + prompt: "Focus on SQL injection" # Custom analysis instructions + engine: # Custom detection engine + id: claude + model: claude-sonnet-4 + steps: # Additional security scanning + - name: Run TruffleHog + uses: trufflesecurity/trufflehog@main +``` + +**Custom Detection Tools:** + +Add specialized security scanners like Ollama/LlamaGuard, Semgrep, or TruffleHog alongside AI analysis for defense-in-depth security. + +See the [Threat Detection Guide](/gh-aw/guides/threat-detection/) for comprehensive documentation, configuration examples, and the LlamaGuard integration pattern. ### Automated Security Scanning @@ -621,6 +643,7 @@ Copilot and Claude expose richer default tools and optional Bash; Codex relies m ## See also +- [Threat Detection Guide](/gh-aw/guides/threat-detection/) - Comprehensive threat detection configuration and examples - [Safe Outputs Reference](/gh-aw/reference/safe-outputs/) - [Network Configuration](/gh-aw/reference/network/) - [Tools](/gh-aw/reference/tools/) diff --git a/docs/src/content/docs/guides/threat-detection.md b/docs/src/content/docs/guides/threat-detection.md new file mode 100644 index 00000000000..b993d6a6d3d --- /dev/null +++ b/docs/src/content/docs/guides/threat-detection.md @@ -0,0 +1,447 @@ +--- +title: Threat Detection +description: Configure automated threat detection to analyze agent output and code changes for security issues before they are applied. +sidebar: + order: 650 +--- + +GitHub Agentic Workflows includes automatic threat detection to analyze agent output and code changes for potential security issues before they are applied. When safe outputs are configured, a threat detection job automatically runs to identify prompt injection attempts, secret leaks, and malicious code patches. + +## How It Works + +Threat detection provides an additional security layer that: + +1. **Analyzes Agent Output**: Reviews all safe output items (issues, comments, PRs) for malicious content +2. **Scans Code Changes**: Examines git patches for suspicious patterns, backdoors, and vulnerabilities +3. **Uses Workflow Context**: Leverages the workflow source to distinguish legitimate actions from threats +4. **Runs Automatically**: Executes after the main agentic job completes but before safe outputs are applied + +**Security Architecture:** + +``` +┌─────────────────┐ +│ Agentic Job │ (Read-only permissions) +│ Generates │ +│ Output & Patches│ +└────────┬────────┘ + │ artifacts + ▼ +┌─────────────────┐ +│ Threat Detection│ (Analyzes for security issues) +│ Job │ +└────────┬────────┘ + │ approved/blocked + ▼ +┌─────────────────┐ +│ Safe Output Jobs│ (Write permissions, only if safe) +│ Create Issues, │ +│ PRs, Comments │ +└─────────────────┘ +``` + +## Default Configuration + +Threat detection is **automatically enabled** when safe outputs are configured: + +```yaml +safe-outputs: + create-issue: # Threat detection enabled automatically + create-pull-request: +``` + +The default configuration uses AI-powered analysis with the workflow's Claude or Copilot engine to detect: + +- **Prompt Injection**: Malicious instructions attempting to manipulate AI behavior +- **Secret Leaks**: Exposed API keys, tokens, passwords, or credentials +- **Malicious Patches**: Code changes introducing vulnerabilities, backdoors, or suspicious patterns + +## Configuration Options + +### Basic Enabled/Disabled + +Control threat detection with a boolean flag: + +```yaml +safe-outputs: + create-issue: + threat-detection: true # Explicitly enable (default when safe-outputs exist) + +# Or disable entirely: +safe-outputs: + create-pull-request: + threat-detection: false # Disable threat detection +``` + +### Advanced Configuration + +Use object syntax for fine-grained control: + +```yaml +safe-outputs: + create-issue: + threat-detection: + enabled: true # Enable/disable detection + prompt: "Focus on SQL injection" # Additional analysis instructions + engine: # Custom engine configuration + id: claude + model: claude-sonnet-4 + steps: # Custom detection steps + - name: Custom Security Check + run: echo "Running additional checks" +``` + +**Configuration Fields:** + +- **`enabled`** (boolean): Enable or disable threat detection. Default: `true` when safe-outputs exist +- **`prompt`** (string): Additional custom instructions appended to the default threat detection prompt +- **`engine`** (string | object | false): AI engine configuration for detection + - String format: `"claude"`, `"copilot"`, `"codex"` + - Object format: Full engine configuration (same as main workflow engine) + - `false`: Disable AI-based detection, run only custom steps +- **`steps`** (array): Additional GitHub Actions steps to run after AI analysis + +## AI-Based Detection (Default) + +By default, threat detection uses the same AI engine as your main workflow to analyze output for security threats. + +**Example with Default AI Detection:** + +```yaml +--- +on: push +engine: claude +safe-outputs: + create-pull-request: + threat-detection: true # Uses Claude for analysis +--- + +# Code Review Agent + +Analyze code and create pull requests with improvements. +``` + +The AI engine receives the workflow source context and analyzes: +- Agent output items (issues, comments, PRs) +- Git patch files with code changes +- Workflow intent and legitimate use cases + +**Output Format:** + +AI detection produces a structured JSON response: + +```json +{ + "prompt_injection": false, + "secret_leak": false, + "malicious_patch": false, + "reasons": [] +} +``` + +If any threat is detected (`true`), the workflow fails and safe outputs are blocked. + +## Custom Prompt Instructions + +Enhance AI analysis with domain-specific guidance: + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + prompt: | + Focus on these additional security concerns: + - SQL injection vulnerabilities in database queries + - Cross-site scripting (XSS) in user input handling + - Unsafe deserialization patterns + - Hardcoded credentials in configuration files +``` + +The custom prompt is appended to the default threat detection instructions, providing specialized context for your workflow's domain. + +## Custom Engine Configuration + +Override the main workflow engine for threat detection: + +**String Format:** + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + engine: copilot # Use Copilot instead of main workflow engine +``` + +**Object Format:** + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + engine: + id: claude + model: claude-sonnet-4 + max-turns: 3 +``` + +**Disable AI Engine:** + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + engine: false # No AI analysis, only custom steps + steps: + - name: Custom Scanning + run: | + # Run your own security tools + ./security-scan.sh +``` + +## Custom Detection Steps + +Add specialized security scanning tools alongside or instead of AI detection: + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + steps: + - name: Run Security Scanner + run: | + echo "Scanning agent output for threats..." + # Your custom security logic here + + - name: Check for Secrets + uses: trufflesecurity/trufflehog@main + with: + path: /tmp/gh-aw/threat-detection/ +``` + +**Available Artifacts:** + +Custom steps have access to these downloaded artifacts: + +- `/tmp/gh-aw/threat-detection/prompt.txt` - Workflow prompt +- `/tmp/gh-aw/threat-detection/agent_output.json` - Safe output items +- `/tmp/gh-aw/threat-detection/aw.patch` - Git patch file + +**Execution Order:** + +1. Download artifacts (prompt, output, patch) +2. Run AI-based analysis (if engine not disabled) +3. Execute custom steps +4. Upload detection log artifact + +## Example: LlamaGuard Integration + +Use Ollama with LlamaGuard 3 for specialized threat detection: + +```yaml +--- +on: push +engine: copilot +safe-outputs: + create-pull-request: + threat-detection: + steps: + - name: Ollama LlamaGuard 3 Scan + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + + // Install Ollama + await exec.exec('curl', ['-fsSL', 'https://ollama.com/install.sh', '-o', '/tmp/install.sh']); + await exec.exec('sh', ['/tmp/install.sh']); + + // Start Ollama service + exec.exec('ollama', ['serve'], { detached: true }); + + // Wait for service + let ready = false; + for (let i = 0; i < 30; i++) { + try { + await exec.exec('curl', ['-f', 'http://localhost:11434/api/version'], { silent: true }); + ready = true; + break; + } catch (e) { + await new Promise(r => setTimeout(r, 1000)); + } + } + + if (!ready) { + core.setFailed('Ollama service failed to start'); + return; + } + + // Pull LlamaGuard model + await exec.exec('ollama', ['pull', 'llama-guard3:1b']); + + // Scan agent output + const outputPath = '/tmp/gh-aw/threat-detection/agent_output.json'; + if (fs.existsSync(outputPath)) { + const content = fs.readFileSync(outputPath, 'utf8'); + + const response = await exec.getExecOutput('curl', [ + '-X', 'POST', + 'http://localhost:11434/api/chat', + '-H', 'Content-Type: application/json', + '-d', JSON.stringify({ + model: 'llama-guard3:1b', + messages: [{ role: 'user', content }], + stream: false + }) + ]); + + const result = JSON.parse(response.stdout); + const output = result.message?.content || ''; + + // Check if safe + const isSafe = output.toLowerCase().trim() === 'safe' || output.includes('s8'); + + if (!isSafe) { + core.setFailed(`LlamaGuard detected threat: ${output}`); + } else { + core.info('✅ Content appears safe'); + } + } + +timeout_minutes: 20 # Allow time for model download +--- + +# Code Review Agent + +Analyze and improve code with LlamaGuard threat scanning. +``` + +:::tip +For a complete LlamaGuard implementation, see `.github/workflows/shared/ollama-threat-scan.md` in the repository. +::: + +## Combined AI and Custom Detection + +Use both AI analysis and custom tools for defense-in-depth: + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + prompt: "Check for authentication bypass vulnerabilities" + engine: + id: claude + model: claude-sonnet-4 + steps: + - name: Static Analysis + run: | + # Run static analysis tool + semgrep --config auto /tmp/gh-aw/threat-detection/ + + - name: Secret Scanner + uses: trufflesecurity/trufflehog@main + with: + path: /tmp/gh-aw/threat-detection/aw.patch +``` + +This configuration: +1. Uses Claude with custom prompt for AI analysis +2. Runs Semgrep for static code analysis +3. Scans for exposed secrets with TruffleHog + +## Error Handling + +**When Threats Are Detected:** + +The threat detection job fails with a clear error message and safe output jobs are skipped: + +``` +❌ Threat detected: Potential SQL injection in code changes +Reasons: +- Unsanitized user input in database query +- Missing parameterized query pattern +``` + +**When Detection Fails:** + +If the detection process itself fails (e.g., network issues, tool errors), the workflow stops and safe outputs are not applied. This fail-safe approach prevents potentially malicious content from being processed. + +## Best Practices + +### When to Use AI Detection + +**Use AI-based detection when:** +- Analyzing natural language content (issues, comments, discussions) +- Detecting sophisticated prompt injection attempts +- Understanding context-specific security risks +- Identifying intent-based threats + +### When to Use Custom Steps + +**Add custom steps when:** +- Integrating specialized security tools (Semgrep, Snyk, TruffleHog) +- Enforcing organization-specific security policies +- Scanning for domain-specific vulnerabilities +- Meeting compliance requirements + +### Performance Considerations + +- **AI Analysis**: Typically completes in 10-30 seconds +- **Custom Tools**: Varies by tool (LlamaGuard: 5-15 minutes with model download) +- **Timeout**: Set appropriate `timeout_minutes` for custom tools +- **Artifact Size**: Large patches may require truncation for analysis + +### Security Recommendations + +1. **Defense in Depth**: Use both AI and custom detection for critical workflows +2. **Regular Updates**: Keep custom security tools and models up to date +3. **Test Thoroughly**: Validate detection with known malicious samples +4. **Monitor False Positives**: Review blocked outputs to refine detection logic +5. **Document Rationale**: Comment why specific detection rules exist + +## Troubleshooting + +### AI Detection Always Fails + +**Symptom**: Every workflow execution reports threats + +**Solutions**: +- Review custom prompt for overly strict instructions +- Check if legitimate workflow patterns trigger detection +- Adjust prompt to provide better context +- Use `threat-detection.enabled: false` temporarily to test + +### Custom Steps Not Running + +**Symptom**: Steps in `threat-detection.steps` don't execute + +**Check**: +- Verify YAML indentation is correct +- Ensure steps array is properly formatted +- Review workflow compilation output for errors +- Check if AI detection failed before custom steps + +### Large Patches Cause Timeouts + +**Symptom**: Detection times out with large code changes + +**Solutions**: +- Increase `timeout_minutes` in workflow frontmatter +- Configure `max-patch-size` to limit patch size +- Truncate content before analysis in custom steps +- Split large changes into smaller PRs + +### False Positives + +**Symptom**: Legitimate content flagged as malicious + +**Solutions**: +- Refine custom prompt with specific exclusions +- Adjust custom detection tool thresholds +- Add workflow context explaining legitimate patterns +- Review detection logs to understand trigger patterns + +## Related Documentation + +- [Safe Outputs Reference](/gh-aw/reference/safe-outputs/) - Complete safe outputs configuration +- [Security Guide](/gh-aw/guides/security/) - Overall security best practices +- [Custom Safe Outputs](/gh-aw/guides/custom-safe-outputs/) - Creating custom output types +- [Frontmatter Reference](/gh-aw/reference/frontmatter/) - All configuration options diff --git a/docs/src/content/docs/reference/safe-outputs.md b/docs/src/content/docs/reference/safe-outputs.md index 95e0798099a..0bee7f9e290 100644 --- a/docs/src/content/docs/reference/safe-outputs.md +++ b/docs/src/content/docs/reference/safe-outputs.md @@ -1157,8 +1157,120 @@ safe-outputs: add-comment: ``` +## Threat Detection (`threat-detection:`) + +Threat detection provides automated security analysis of agent output and code changes before safe outputs are applied. It helps identify prompt injection attempts, secret leaks, and malicious code patches. + +**Automatic Enablement:** + +Threat detection is automatically enabled when any safe outputs are configured. It can be explicitly controlled or customized: + +**Basic Configuration:** + +```yaml +safe-outputs: + create-issue: + threat-detection: true # Explicitly enable (default behavior) + +# Or disable: +safe-outputs: + create-pull-request: + threat-detection: false # Disable threat detection +``` + +**Advanced Configuration:** + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + enabled: true # Enable/disable detection (default: true) + prompt: "Focus on SQL injection" # Additional analysis instructions + engine: # Custom engine for detection (string, object, or false) + id: claude + model: claude-sonnet-4 + steps: # Additional custom detection steps + - name: Custom Security Scan + run: | + echo "Running security checks..." +``` + +**Configuration Fields:** + +- **`enabled`** (boolean): Enable or disable threat detection. Default: `true` when safe-outputs are configured +- **`prompt`** (string): Additional custom instructions appended to the default threat detection prompt for specialized analysis +- **`engine`** (string | object | false): AI engine configuration for detection analysis + - String: Engine ID like `"claude"`, `"copilot"`, or `"codex"` + - Object: Full engine configuration with `id`, `model`, and other options + - `false`: Disable AI-based detection and run only custom steps +- **`steps`** (array): Additional GitHub Actions steps executed after AI analysis for custom security scanning + +**Default Behavior:** + +When enabled (default), threat detection uses the same AI engine as your main workflow to analyze: +- Agent output items (issues, comments, pull requests) +- Git patch files with code changes +- Workflow source context for legitimate use cases + +The analysis produces a structured JSON response indicating detected threats: + +```json +{ + "prompt_injection": false, + "secret_leak": false, + "malicious_patch": false, + "reasons": [] +} +``` + +If any threat is detected, the workflow fails and safe outputs are blocked. + +**Custom Detection Steps:** + +Add specialized security tools to supplement or replace AI analysis: + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + steps: + - name: Run TruffleHog + uses: trufflesecurity/trufflehog@main + with: + path: /tmp/gh-aw/threat-detection/ + + - name: Static Analysis + run: | + semgrep --config auto /tmp/gh-aw/threat-detection/ +``` + +Custom steps have access to: +- `/tmp/gh-aw/threat-detection/prompt.txt` - Workflow prompt +- `/tmp/gh-aw/threat-detection/agent_output.json` - Safe output items +- `/tmp/gh-aw/threat-detection/aw.patch` - Git patch file + +**Example: LlamaGuard Integration** + +```yaml +safe-outputs: + create-pull-request: + threat-detection: + steps: + - name: Ollama LlamaGuard 3 Scan + uses: actions/github-script@v7 + with: + script: | + // Install and run Ollama with llama-guard3:1b model + // See .github/workflows/shared/ollama-threat-scan.md for complete example +``` + +:::tip +See the [Threat Detection Guide](/gh-aw/guides/threat-detection/) for comprehensive documentation, examples, and best practices. +::: + ## Related Documentation +- [Threat Detection Guide](/gh-aw/guides/threat-detection/) - Complete threat detection documentation and examples - [Frontmatter](/gh-aw/reference/frontmatter/) - All configuration options for workflows - [Workflow Structure](/gh-aw/reference/workflow-structure/) - Directory layout and organization - [Command Triggers](/gh-aw/reference/command-triggers/) - Special /my-bot triggers and context text