diff --git a/actions/setup/js/sanitize_content.cjs b/actions/setup/js/sanitize_content.cjs index 2937712e324..bb291c04a1a 100644 --- a/actions/setup/js/sanitize_content.cjs +++ b/actions/setup/js/sanitize_content.cjs @@ -13,8 +13,7 @@ const { buildAllowedDomains, buildAllowedGitHubReferences, getCurrentRepoSlug, - sanitizeUrlProtocols, - sanitizeUrlDomains, + applyURLSanitizationPolicy, neutralizeCommands, neutralizeGitHubReferences, removeXmlComments, @@ -120,8 +119,7 @@ function sanitizeContent(content, maxLengthOrOptions) { sanitized = applyToNonCodeRegions(sanitized, convertXmlTags); // URI filtering (shared with core) - sanitized = sanitizeUrlProtocols(sanitized); - sanitized = sanitizeUrlDomains(sanitized, allowedDomains); + sanitized = applyURLSanitizationPolicy(sanitized, allowedDomains); // Apply truncation limits (shared with core) sanitized = applyTruncation(sanitized, maxLength); diff --git a/actions/setup/js/sanitize_content.test.cjs b/actions/setup/js/sanitize_content.test.cjs index 89056c55ac9..ed2fecab3b0 100644 --- a/actions/setup/js/sanitize_content.test.cjs +++ b/actions/setup/js/sanitize_content.test.cjs @@ -23,6 +23,7 @@ describe("sanitize_content.cjs", () => { delete global.core; delete process.env.GH_AW_ALLOWED_DOMAINS; delete process.env.GH_AW_ALLOWED_GITHUB_REFS; + delete process.env.GH_AW_SAFE_OUTPUTS_URLS; delete process.env.GH_AW_COMMANDS; delete process.env.GITHUB_SERVER_URL; delete process.env.GITHUB_API_URL; @@ -1045,6 +1046,37 @@ describe("sanitize_content.cjs", () => { expect(result).toContain("(redacted)"); expect(result).not.toContain("file://"); }); + + it("should preserve non-https protocol URLs inside fenced code blocks", () => { + // Fenced code block content must not be rewritten – suggestion block patches + // are applied verbatim by GitHub, so any rewrite corrupts the patch. + process.env.GH_AW_SAFE_OUTPUTS_URLS = "allowed-or-code-region"; + const input = "Prose ftp://bad.com\n```\nftp://inside-code.example\n```\nMore prose"; + const result = sanitizeContent(input); + expect(result).toContain("ftp://inside-code.example"); + expect(result).not.toContain("ftp://bad.com"); + }); + + it("should preserve non-https protocol URLs inside suggestion blocks", () => { + // Regression: safe-output sanitizer must not rewrite link targets inside + // GitHub pull request suggestion fences (fixes issue #39793). + process.env.GH_AW_SAFE_OUTPUTS_URLS = "allowed-or-code-region"; + const input = "See also ftp://external.example.\n" + "```suggestion\n" + "Assign users a role that grants [access](reference://docs/role).\n" + "```\n"; + const result = sanitizeContent(input); + // Content inside the suggestion block is preserved verbatim + expect(result).toContain("reference://docs/role"); + // Content outside the block is still sanitized + expect(result).not.toContain("ftp://external.example"); + }); + + it("should sanitize non-https protocol URLs inside fenced code blocks by default", () => { + // Default policy is "allowed-only", so code regions are sanitized unless + // GH_AW_SAFE_OUTPUTS_URLS is set to "allowed-or-code-region". + const input = "Prose ftp://bad.com\n```\nftp://inside-code.example\n```\nMore prose"; + const result = sanitizeContent(input); + expect(result).not.toContain("ftp://inside-code.example"); + expect(result).toContain("/redacted"); + }); }); describe("URL domain filtering", () => { @@ -1121,6 +1153,36 @@ describe("sanitize_content.cjs", () => { const result = sanitizeContent("Visit https://deep.nested.example.com/page"); expect(result).toBe("Visit https://deep.nested.example.com/page"); }); + + it("should preserve disallowed HTTPS domain URLs inside fenced code blocks", () => { + // Fenced code block content must not be domain-filtered. + process.env.GH_AW_SAFE_OUTPUTS_URLS = "allowed-or-code-region"; + const input = "Prose https://evil.com/malicious\n```\nhttps://evil.com/inside-code\n```\nMore prose"; + const result = sanitizeContent(input); + expect(result).toContain("https://evil.com/inside-code"); + expect(result).not.toContain("https://evil.com/malicious"); + }); + + it("should preserve disallowed HTTPS domain URLs inside suggestion blocks", () => { + // Regression: safe-output sanitizer must not rewrite link targets inside + // GitHub pull request suggestion fences (fixes issue #39793). + process.env.GH_AW_SAFE_OUTPUTS_URLS = "allowed-or-code-region"; + const input = "Prose https://evil.com/bad.\n" + "```suggestion\n" + "Assign users a role that grants [access](https://docs.elastic.co/en/docs/role).\n" + "```\n"; + const result = sanitizeContent(input); + // Content inside the suggestion block is preserved verbatim + expect(result).toContain("https://docs.elastic.co/en/docs/role"); + // Content outside the block is still sanitized + expect(result).not.toContain("https://evil.com/bad"); + }); + + it("should sanitize disallowed HTTPS domain URLs inside fenced code blocks by default", () => { + // Default policy is "allowed-only", so code regions are sanitized unless + // GH_AW_SAFE_OUTPUTS_URLS is set to "allowed-or-code-region". + const input = "Prose https://evil.com/malicious\n```\nhttps://evil.com/inside-code\n```\nMore prose"; + const result = sanitizeContent(input); + expect(result).not.toContain("https://evil.com/inside-code"); + expect(result).toContain("(evil.com/redacted)"); + }); }); describe("protocol-relative URL sanitization", () => { diff --git a/actions/setup/js/sanitize_content_core.cjs b/actions/setup/js/sanitize_content_core.cjs index 5f28598745b..2e29f05aa7c 100644 --- a/actions/setup/js/sanitize_content_core.cjs +++ b/actions/setup/js/sanitize_content_core.cjs @@ -8,6 +8,10 @@ const { isRepoAllowed } = require("./repo_helpers.cjs"); +const SAFE_OUTPUTS_URLS_ENV = "GH_AW_SAFE_OUTPUTS_URLS"; +const SAFE_OUTPUTS_URLS_ALLOWED_ONLY = "allowed-only"; +const SAFE_OUTPUTS_URLS_ALLOWED_OR_CODE_REGION = "allowed-or-code-region"; + /** * Module-level set to collect redacted URL domains across sanitization calls. * @type {string[]} @@ -1280,10 +1284,7 @@ function sanitizeContentCore(content, maxLength, maxBotMentions) { sanitized = applyToNonCodeRegions(sanitized, convertXmlTags); // URI filtering - replace non-https protocols with "(redacted)" - sanitized = sanitizeUrlProtocols(sanitized); - - // Domain filtering for HTTPS URIs - sanitized = sanitizeUrlDomains(sanitized, allowedDomains); + sanitized = applyURLSanitizationPolicy(sanitized, allowedDomains); // Apply truncation limits sanitized = applyTruncation(sanitized, maxLength); @@ -1307,6 +1308,28 @@ function sanitizeContentCore(content, maxLength, maxBotMentions) { return sanitized.trim(); } +/** + * Apply URL sanitization using configured safe-outputs URL policy. + * @param {string} content + * @param {string[]} allowedDomains + * @returns {string} + */ +function applyURLSanitizationPolicy(content, allowedDomains) { + const urlPolicy = process.env[SAFE_OUTPUTS_URLS_ENV] || SAFE_OUTPUTS_URLS_ALLOWED_ONLY; + if (urlPolicy === SAFE_OUTPUTS_URLS_ALLOWED_OR_CODE_REGION) { + // Preserve fenced/inline code regions (including ```suggestion blocks) verbatim. + // This avoids corrupting patch payloads while still sanitizing prose. + let sanitized = applyToNonCodeRegions(content, sanitizeUrlProtocols); + sanitized = applyToNonCodeRegions(sanitized, s => sanitizeUrlDomains(s, allowedDomains)); + return sanitized; + } + // Default policy ("allowed-only"): sanitize URLs in all content regions, + // including fenced and inline code spans. + let sanitized = sanitizeUrlProtocols(content); + sanitized = sanitizeUrlDomains(sanitized, allowedDomains); + return sanitized; +} + module.exports = { sanitizeContentCore, getRedactedDomains, @@ -1320,6 +1343,7 @@ module.exports = { sanitizeDomainName, sanitizeUrlProtocols, sanitizeUrlDomains, + applyURLSanitizationPolicy, neutralizeCommands, neutralizeGitHubReferences, removeXmlComments, diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index 376d89261bb..142920637bd 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -4911,6 +4911,11 @@ } ], "properties": { + "urls": { + "type": "string", + "description": "URL sanitization policy for safe outputs. \"allowed-only\" sanitizes all non-allowed URLs everywhere. \"allowed-or-code-region\" preserves URLs inside fenced and inline code regions while sanitizing prose.", + "enum": ["allowed-only", "allowed-or-code-region"] + }, "allowed-domains": { "type": "array", "description": "List of allowed domains for URL redaction in safe output handlers. Supports ecosystem identifiers (e.g., \"python\", \"node\", \"default-safe-outputs\") like network.allowed. These domains are unioned with the engine defaults and network.allowed when computing the final allowed domain set. localhost and github.com are always included.", diff --git a/pkg/workflow/compiler_safe_outputs_steps.go b/pkg/workflow/compiler_safe_outputs_steps.go index 87055403dcf..1a87731406f 100644 --- a/pkg/workflow/compiler_safe_outputs_steps.go +++ b/pkg/workflow/compiler_safe_outputs_steps.go @@ -542,6 +542,9 @@ func (c *Compiler) buildHandlerManagerStep(data *WorkflowData) ([]string, error) if domainsStr != "" { steps = append(steps, fmt.Sprintf(" GH_AW_ALLOWED_DOMAINS: %q\n", domainsStr)) } + if data.SafeOutputs != nil && data.SafeOutputs.URLs != "" { + steps = append(steps, fmt.Sprintf(" GH_AW_SAFE_OUTPUTS_URLS: %q\n", data.SafeOutputs.URLs)) + } // Pass GitHub server/API URLs so buildAllowedDomains() can add GHES domains dynamically steps = append(steps, " GITHUB_SERVER_URL: ${{ github.server_url }}\n") steps = append(steps, " GITHUB_API_URL: ${{ github.api_url }}\n") diff --git a/pkg/workflow/compiler_safe_outputs_steps_test.go b/pkg/workflow/compiler_safe_outputs_steps_test.go index f03718daac2..40cb43cea71 100644 --- a/pkg/workflow/compiler_safe_outputs_steps_test.go +++ b/pkg/workflow/compiler_safe_outputs_steps_test.go @@ -736,6 +736,16 @@ func TestBuildHandlerManagerStep(t *testing.T) { "GITHUB_API_URL: ${{ github.api_url }}", }, }, + { + name: "handler manager with urls policy propagates to process step", + safeOutputs: &SafeOutputsConfig{ + URLs: SafeOutputsURLsPolicyAllowedOrCodeRegion, + AddComments: &AddCommentsConfig{}, + }, + checkContains: []string{ + "GH_AW_SAFE_OUTPUTS_URLS: \"allowed-or-code-region\"", + }, + }, { name: "handler manager without allowed-domains still includes github urls", safeOutputs: &SafeOutputsConfig{ diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go index 88be670510f..1704eae3824 100644 --- a/pkg/workflow/compiler_types.go +++ b/pkg/workflow/compiler_types.go @@ -702,6 +702,7 @@ type SafeOutputsConfig struct { Jobs map[string]*SafeJobConfig `yaml:"jobs,omitempty"` // Safe-jobs configuration (moved from top-level) Scripts map[string]*SafeScriptConfig `yaml:"scripts,omitempty"` // Custom inline handlers that run in the safe-output handler loop GitHubApp *GitHubAppConfig `yaml:"github-app,omitempty"` // GitHub App credentials for token minting + URLs string `yaml:"urls,omitempty"` // URL sanitization policy: SafeOutputsURLsPolicyAllowedOnly (default) or SafeOutputsURLsPolicyAllowedOrCodeRegion AllowedDomains []string `yaml:"allowed-domains,omitempty"` // Allowed domains for URL redaction, unioned with network.allowed; supports ecosystem identifiers AllowGitHubReferences []string `yaml:"allowed-github-references,omitempty"` // Allowed repositories for GitHub references (e.g., ["repo", "org/repo2"]) Staged bool `yaml:"staged,omitempty"` // If true, emit step summary messages instead of making GitHub API calls diff --git a/pkg/workflow/compiler_validators.go b/pkg/workflow/compiler_validators.go index ebf7748243c..7204df06979 100644 --- a/pkg/workflow/compiler_validators.go +++ b/pkg/workflow/compiler_validators.go @@ -153,6 +153,7 @@ func (c *Compiler) validateCoreToolConfiguration(workflowData *WorkflowData, mar {logMessage: "Validating safe-outputs target fields", validateFn: func() error { return validateSafeOutputsTarget(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs max fields", validateFn: func() error { return validateSafeOutputsMax(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs samples entries against MCP tool schemas", validateFn: func() error { return validateSafeOutputsSamples(workflowData.SafeOutputs) }}, + {logMessage: "Validating safe-outputs urls policy", validateFn: func() error { return validateSafeOutputsURLs(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs allowed-domains", validateFn: func() error { return c.validateSafeOutputsAllowedDomains(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs merge-pull-request", validateFn: func() error { return validateSafeOutputsMergePullRequest(workflowData.SafeOutputs) }}, {logMessage: "Validating safe-outputs needs declarations", validateFn: func() error { return validateSafeOutputsNeeds(workflowData) }}, diff --git a/pkg/workflow/compiler_yaml.go b/pkg/workflow/compiler_yaml.go index 5825534710d..397f6853e47 100644 --- a/pkg/workflow/compiler_yaml.go +++ b/pkg/workflow/compiler_yaml.go @@ -1018,6 +1018,9 @@ func (c *Compiler) generateOutputCollectionStep(yaml *strings.Builder, data *Wor if domainsStr != "" { fmt.Fprintf(yaml, " GH_AW_ALLOWED_DOMAINS: %q\n", domainsStr) } + if data.SafeOutputs != nil && data.SafeOutputs.URLs != "" { + fmt.Fprintf(yaml, " GH_AW_SAFE_OUTPUTS_URLS: %q\n", data.SafeOutputs.URLs) + } // Add allowed GitHub references configuration for reference escaping if data.SafeOutputs != nil && data.SafeOutputs.AllowGitHubReferences != nil { diff --git a/pkg/workflow/imports.go b/pkg/workflow/imports.go index dd6de131297..6c9130d7bc7 100644 --- a/pkg/workflow/imports.go +++ b/pkg/workflow/imports.go @@ -420,6 +420,9 @@ func mergeSafeOutputConfig(result *SafeOutputsConfig, config map[string]any, c * if len(result.AllowedDomains) == 0 && len(importedConfig.AllowedDomains) > 0 { result.AllowedDomains = importedConfig.AllowedDomains } + if result.URLs == "" && importedConfig.URLs != "" { + result.URLs = importedConfig.URLs + } if !result.Staged && importedConfig.Staged { result.Staged = importedConfig.Staged } diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go index a8298a73874..ee1166bd750 100644 --- a/pkg/workflow/safe_outputs_config.go +++ b/pkg/workflow/safe_outputs_config.go @@ -195,6 +195,13 @@ func (c *Compiler) extractSafeOutputsConfig(frontmatter map[string]any) *SafeOut } } + // Parse URL sanitization policy + if urls, exists := outputMap["urls"]; exists { + if urlsStr, ok := urls.(string); ok { + config.URLs = urlsStr + } + } + // Parse allowed-github-references configuration if allowGitHubRefs, exists := outputMap["allowed-github-references"]; exists { if refsArray, ok := allowGitHubRefs.([]any); ok { diff --git a/pkg/workflow/safe_outputs_fix_test.go b/pkg/workflow/safe_outputs_fix_test.go index 309742e47f8..658c8bade9a 100644 --- a/pkg/workflow/safe_outputs_fix_test.go +++ b/pkg/workflow/safe_outputs_fix_test.go @@ -191,6 +191,24 @@ func TestMergeSafeOutputsMetaFieldsUnit(t *testing.T) { assert.Equal(t, []string{"owner/main-repo"}, result.AllowGitHubReferences, "Main AllowGitHubReferences should take precedence") }, }, + { + name: "URLs policy imported when empty in main", + topConfig: nil, + imported: `{"urls":"allowed-or-code-region"}`, + verify: func(t *testing.T, result *SafeOutputsConfig) { + assert.Equal(t, SafeOutputsURLsPolicyAllowedOrCodeRegion, result.URLs, "URLs policy should be imported") + }, + }, + { + name: "URLs policy not overridden when set in main", + topConfig: &SafeOutputsConfig{ + URLs: SafeOutputsURLsPolicyAllowedOnly, + }, + imported: `{"urls":"allowed-or-code-region"}`, + verify: func(t *testing.T, result *SafeOutputsConfig) { + assert.Equal(t, SafeOutputsURLsPolicyAllowedOnly, result.URLs, "Main URLs policy should take precedence") + }, + }, { name: "GroupReports imported when false in main", topConfig: nil, diff --git a/pkg/workflow/safe_outputs_urls_validation_test.go b/pkg/workflow/safe_outputs_urls_validation_test.go new file mode 100644 index 00000000000..b50497b6c02 --- /dev/null +++ b/pkg/workflow/safe_outputs_urls_validation_test.go @@ -0,0 +1,28 @@ +//go:build !integration + +package workflow + +import "testing" + +func TestValidateSafeOutputsURLs(t *testing.T) { + tests := []struct { + name string + config *SafeOutputsConfig + wantErr bool + }{ + {name: "nil config", config: nil, wantErr: false}, + {name: "empty policy", config: &SafeOutputsConfig{}, wantErr: false}, + {name: "allowed-only", config: &SafeOutputsConfig{URLs: SafeOutputsURLsPolicyAllowedOnly}, wantErr: false}, + {name: "allowed-or-code-region", config: &SafeOutputsConfig{URLs: SafeOutputsURLsPolicyAllowedOrCodeRegion}, wantErr: false}, + {name: "invalid", config: &SafeOutputsConfig{URLs: "unknown"}, wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateSafeOutputsURLs(tt.config) + if (err != nil) != tt.wantErr { + t.Fatalf("validateSafeOutputsURLs() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/workflow/safe_outputs_validation.go b/pkg/workflow/safe_outputs_validation.go index 3f789894414..f5504194d4c 100644 --- a/pkg/workflow/safe_outputs_validation.go +++ b/pkg/workflow/safe_outputs_validation.go @@ -9,6 +9,30 @@ import ( var safeOutputsDomainsValidationLog = newValidationLogger("safe_outputs_domains") +const ( + SafeOutputsURLsPolicyAllowedOnly = "allowed-only" + SafeOutputsURLsPolicyAllowedOrCodeRegion = "allowed-or-code-region" +) + +// validateSafeOutputsURLs validates the urls policy in safe-outputs. +func validateSafeOutputsURLs(config *SafeOutputsConfig) error { + if config == nil || config.URLs == "" { + return nil + } + + switch config.URLs { + case SafeOutputsURLsPolicyAllowedOnly, SafeOutputsURLsPolicyAllowedOrCodeRegion: + return nil + default: + return fmt.Errorf( + "safe-outputs.urls: invalid value %q (expected one of: %q, %q)", + config.URLs, + SafeOutputsURLsPolicyAllowedOnly, + SafeOutputsURLsPolicyAllowedOrCodeRegion, + ) + } +} + // validateSafeOutputsAllowedDomains validates the allowed-domains configuration in safe-outputs. // Supports ecosystem identifiers (e.g., "python", "node", "default-safe-outputs") like network.allowed. func (c *Compiler) validateSafeOutputsAllowedDomains(config *SafeOutputsConfig) error {