Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions pkg/cli/logs_report.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ type RunData struct {
Agent string `json:"agent,omitempty" console:"header:Agent,omitempty"`
Status string `json:"status" console:"header:Status"`
Conclusion string `json:"conclusion,omitempty" console:"-"`
Classification string `json:"classification" console:"-"`
Duration string `json:"duration,omitempty" console:"header:Duration,omitempty"`
ActionMinutes float64 `json:"action_minutes,omitempty" console:"header:Action Minutes,omitempty"`
TokenUsage int `json:"token_usage,omitempty" console:"header:Tokens,format:number,omitempty"`
Expand Down Expand Up @@ -218,6 +219,7 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation
Agent: agentID,
Status: run.Status,
Conclusion: run.Conclusion,
Classification: deriveRunClassification(comparison),
TokenUsage: run.TokenUsage,
EffectiveTokens: run.EffectiveTokens,
EstimatedCost: run.EstimatedCost,
Expand Down Expand Up @@ -334,6 +336,30 @@ func buildLogsData(processedRuns []ProcessedRun, outputDir string, continuation
}
}

// deriveRunClassification maps a run's AuditComparisonData to one of four
// human-readable classification labels:
//
// - "risky" – comparison detected a risk signal (e.g. posture change, new MCP failure).
// - "normal" – comparison found no risk signals (stable or minor changes).
// - "baseline" – no prior successful run was available to compare against;
// this run acts as its own baseline.
// - "unclassified" – comparison data is absent or incomplete.
func deriveRunClassification(comparison *AuditComparisonData) string {
if comparison == nil {
return "unclassified"
}
if !comparison.BaselineFound {
return "baseline"
}
if comparison.Classification == nil {
return "unclassified"
}
if comparison.Classification.Label == "risky" {
return "risky"
}
return "normal"
}

// isValidToolName checks if a tool name appears to be valid
// Filters out single words, common words, and other garbage that shouldn't be tools
func isValidToolName(toolName string) bool {
Expand Down
61 changes: 61 additions & 0 deletions pkg/cli/logs_report_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,3 +847,64 @@ func TestBuildLogsDataIncludesDateFields(t *testing.T) {
t.Errorf("Expected UpdatedAt = %v, got %v", updatedAt, run.UpdatedAt)
}
}

// TestDeriveRunClassification tests the classification mapping helper.
func TestDeriveRunClassification(t *testing.T) {
tests := []struct {
name string
comparison *AuditComparisonData
want string
Comment on lines +851 to +856

Copilot AI Apr 3, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new classification field is part of the public logs JSON surface area, but current tests only validate deriveRunClassification in isolation. Consider adding/adjusting a buildLogsData test assertion that RunData.Classification is always populated and matches the expected mapping (e.g., baseline/no-baseline, risky, normal) so the API contract can’t regress if buildLogsData stops setting the field.

Copilot uses AI. Check for mistakes.
}{
{
name: "nil comparison returns unclassified",
comparison: nil,
want: "unclassified",
},
{
name: "no baseline found returns baseline",
comparison: &AuditComparisonData{BaselineFound: false},
want: "baseline",
},
{
name: "nil classification with baseline returns unclassified",
comparison: &AuditComparisonData{
BaselineFound: true,
Classification: nil,
},
want: "unclassified",
},
{
name: "risky label returns risky",
comparison: &AuditComparisonData{
BaselineFound: true,
Classification: &AuditComparisonClassification{Label: "risky"},
},
want: "risky",
},
{
name: "stable label returns normal",
comparison: &AuditComparisonData{
BaselineFound: true,
Classification: &AuditComparisonClassification{Label: "stable"},
},
want: "normal",
},
{
name: "changed label returns normal",
comparison: &AuditComparisonData{
BaselineFound: true,
Classification: &AuditComparisonClassification{Label: "changed"},
},
want: "normal",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := deriveRunClassification(tt.comparison)
if got != tt.want {
t.Errorf("deriveRunClassification() = %q, want %q", got, tt.want)
}
})
}
}
2 changes: 1 addition & 1 deletion pkg/cli/mcp_logs_guardrail.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func getLogsDataSchema() LogsDataSchema {
},
"runs": {
Type: "array",
Description: "Array of workflow run data (database_id, workflow_name, agent, status, conclusion, duration, token_usage, estimated_cost, turns, error_count, warning_count, missing_tool_count, created_at, url, logs_path, event, branch)",
Description: "Array of workflow run data (database_id, workflow_name, agent, status, conclusion, classification, duration, token_usage, estimated_cost, turns, error_count, warning_count, missing_tool_count, created_at, url, logs_path, event, branch). classification is one of: risky, normal, baseline, unclassified.",
},
"tool_usage": {
Type: "array",
Expand Down
Loading