diff --git a/mod_api/__init__.py b/mod_api/__init__.py new file mode 100644 index 00000000..fb1a634b --- /dev/null +++ b/mod_api/__init__.py @@ -0,0 +1,22 @@ +""" +mod_api: JSON REST API blueprint for the CCExtractor CI platform. + +Registered at /api/v1. All endpoints return structured JSON, use scoped +Bearer token auth, and enforce per-client rate limiting. +""" + +from flask import Blueprint + +mod_api = Blueprint('api', __name__) + +# Middleware (registers before_request hooks and error handlers) +from mod_api.middleware import auth # noqa: E402, F401 +from mod_api.middleware import error_handler # noqa: E402, F401 +from mod_api.middleware import rate_limit # noqa: E402, F401 +# Route modules (registers endpoint functions on the blueprint) +from mod_api.routes import auth as auth_routes # noqa: E402, F401 +from mod_api.routes import errors_logs # noqa: E402, F401 +from mod_api.routes import results # noqa: E402, F401 +from mod_api.routes import runs # noqa: E402, F401 +from mod_api.routes import samples # noqa: E402, F401 +from mod_api.routes import system # noqa: E402, F401 diff --git a/mod_api/middleware/__init__.py b/mod_api/middleware/__init__.py new file mode 100644 index 00000000..860b3ce0 --- /dev/null +++ b/mod_api/middleware/__init__.py @@ -0,0 +1 @@ +"""mod_api.middleware: auth, rate limiting, validation, and error handling.""" diff --git a/mod_api/middleware/auth.py b/mod_api/middleware/auth.py new file mode 100644 index 00000000..a338444a --- /dev/null +++ b/mod_api/middleware/auth.py @@ -0,0 +1,123 @@ +""" +Bearer token authentication and scope/role enforcement for API routes. + +Runs as a before_request hook on the api blueprint. Public endpoints +(token creation, health check) are exempted. On success, the authenticated +user and token are stored in flask.g for downstream handlers. + +HTTP semantics: + 401 = token missing, expired, revoked, or invalid + 403 = valid token but insufficient scope or role +""" + +import functools +from typing import List + +from flask import g, request + +from mod_api import mod_api +from mod_api.middleware.error_handler import make_error_response +from mod_api.models.api_token import ApiToken + +_AUTH_FAILED_MSG = 'Bearer token is missing, expired, or invalid.' + +# These endpoints bypass auth entirely. +_PUBLIC_ENDPOINTS = frozenset([ + 'api.create_token', # POST /auth/tokens (uses email/password body) + 'api.system_health', # GET /system/health (uptime monitoring) +]) + + +def _unauthorized(): + """Shorthand for a 401 response with the standard auth failure message.""" + return make_error_response( + 'unauthorized', _AUTH_FAILED_MSG, http_status=401) + + +@mod_api.before_request +def authenticate_request(): + """Validate Bearer token and attach user context to the request.""" + if request.endpoint in _PUBLIC_ENDPOINTS: + g.api_user = None + g.api_token = None + return + + auth_header = request.headers.get('Authorization', '') + if not auth_header: + return _unauthorized() + + parts = auth_header.split(' ', 1) + if len(parts) != 2 or parts[0] != 'Bearer': + return _unauthorized() + + token_value = parts[1].strip() + if not token_value or not token_value.startswith('spci_'): + return _unauthorized() + + # Look up by prefix, then verify the full hash against each candidate. + prefix = ApiToken.extract_prefix(token_value) + candidates = ApiToken.query.filter_by(token_prefix=prefix).all() + + if not candidates: + return _unauthorized() + + matched_token = None + for candidate in candidates: + if ApiToken.verify_token(token_value, candidate.token_hash): + matched_token = candidate + break + + if matched_token is None: + return _unauthorized() + + if not matched_token.is_valid: + return _unauthorized() + + g.api_token = matched_token + g.api_user = matched_token.user + + +def require_scope(scope: str): + """Reject the request if the token lacks ``scope``.""" + def decorator(f): + @functools.wraps(f) + def decorated_function(*args, **kwargs): + token = getattr(g, 'api_token', None) + if token is None: + return _unauthorized() + if not token.has_scope(scope): + return make_error_response( + 'forbidden', + 'Token lacks the required scope for this operation.', + details={ + 'required_scope': scope, + 'token_scopes': token.scopes, + }, + http_status=403, + ) + return f(*args, **kwargs) + return decorated_function + return decorator + + +def require_roles(roles: List[str]): + """Reject the request if the user's role is not in ``roles``.""" + def decorator(f): + @functools.wraps(f) + def decorated_function(*args, **kwargs): + user = getattr(g, 'api_user', None) + if user is None: + return _unauthorized() + if user.role.value not in roles: + return make_error_response( + 'forbidden', + 'Your role does not have permission for this operation.', + details={ + 'required_roles': roles, + 'user_role': user.role.value, + }, + http_status=403, + ) + return f(*args, **kwargs) + return decorated_function + return decorator diff --git a/mod_api/middleware/error_handler.py b/mod_api/middleware/error_handler.py new file mode 100644 index 00000000..8bbc46de --- /dev/null +++ b/mod_api/middleware/error_handler.py @@ -0,0 +1,159 @@ +""" +Structured JSON error responses for API routes. + +Intercepts standard HTTP errors (400, 401, 403, 404, 405, 422, 429, 500), +Marshmallow validation errors, and SQLAlchemy errors so that nothing under +/api/v1/* ever returns an HTML error page. + +Response shape: {"code": "...", "message": "...", "details": {...}} +""" + +from flask import jsonify, request +from marshmallow import ValidationError as MarshmallowValidationError +from sqlalchemy.exc import SQLAlchemyError + +from mod_api import mod_api + +_API_PREFIX = '/api/v1' + + +def make_error_response(code, message, details=None, http_status=400): + """Build a JSON error response conforming to the ErrorResponse schema.""" + body = { + 'code': code, + 'message': str(message)[:500], + 'details': details if details is not None else {}, + } + response = jsonify(body) + response.status_code = http_status + return response + + +def _is_api_request(): + """Check whether the current request targets an API endpoint.""" + return request.path.startswith(_API_PREFIX) + + +@mod_api.app_errorhandler(400) +def handle_400(error): + """Bad request.""" + if not _is_api_request(): + raise error + return make_error_response( + 'validation_error', + getattr(error, 'description', 'Bad request.'), + http_status=400, + ) + + +@mod_api.app_errorhandler(401) +def handle_401(error): + """Unauthorized.""" + if not _is_api_request(): + raise error + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + +@mod_api.app_errorhandler(403) +def handle_403(error): + """Forbidden.""" + if not _is_api_request(): + raise error + return make_error_response( + 'forbidden', + 'Token does not have the required scope for this operation.', + http_status=403, + ) + + +@mod_api.app_errorhandler(404) +def handle_404(error): + """Not found.""" + if not _is_api_request(): + raise error + return make_error_response( + 'not_found', + getattr(error, 'description', 'Resource not found.'), + http_status=404, + ) + + +@mod_api.app_errorhandler(405) +def handle_405(error): + """Handle method-not-allowed errors for API routes.""" + if not _is_api_request(): + raise error + return make_error_response( + 'method_not_allowed', + 'Method not allowed.', + http_status=405, + ) + + +@mod_api.app_errorhandler(422) +def handle_422(error): + """Unprocessable entity.""" + if not _is_api_request(): + raise error + return make_error_response( + 'unprocessable', + getattr( + error, + 'description', + 'Request is valid JSON but semantically invalid.'), + http_status=422, + ) + + +@mod_api.app_errorhandler(429) +def handle_429(error): + """Rate limited.""" + if not _is_api_request(): + raise error + return make_error_response( + 'rate_limited', + 'Rate limit exceeded.', + details={'retry_after': 30, 'limit': 120, 'window': '60s'}, + http_status=429, + ) + + +@mod_api.app_errorhandler(500) +def handle_500(error): + """Handle unexpected server errors for API routes.""" + if not _is_api_request(): + raise error + return make_error_response( + 'internal_error', + 'An unexpected error occurred.', + http_status=500, + ) + + +@mod_api.errorhandler(MarshmallowValidationError) +def handle_marshmallow_validation_error(error): + """Catch schema validation failures and return them as 400.""" + return make_error_response( + 'validation_error', + 'Request failed schema validation.', + details={'fields': error.messages}, + http_status=400, + ) + + +@mod_api.errorhandler(SQLAlchemyError) +def handle_sqlalchemy_error(error): + """Log the real error, but never expose raw SQL details to the client.""" + from flask import g + log = getattr(g, 'log', None) + if log: + log.error(f'Database error in API: {error}') + return make_error_response( + 'internal_error', + 'An unexpected database error occurred.', + http_status=500, + ) diff --git a/mod_api/middleware/rate_limit.py b/mod_api/middleware/rate_limit.py new file mode 100644 index 00000000..1f73da7b --- /dev/null +++ b/mod_api/middleware/rate_limit.py @@ -0,0 +1,116 @@ +""" +Per-client rate limiting for API endpoints. + +Limits: + POST /auth/tokens 5 req / 15 min (keyed by IP) + POST/DELETE/PUT/PATCH 20 req / min (keyed by token) + GET 120 req / min (keyed by token) + +Includes X-RateLimit-* headers on every response. + +Uses an in-memory dict for simplicity. For multi-process deployments, +swap this out for a Redis backend. +""" + +import time + +from flask import g, request + +from mod_api import mod_api + +_rate_limit_store = {} # key -> {'count': int, 'window_start': float} +_eviction_counter = 0 +_EVICTION_INTERVAL = 100 # run cleanup every N requests + + +def _evict_stale_entries(): + """Prune entries older than 15 min to bound memory usage.""" + global _eviction_counter + _eviction_counter += 1 + if _eviction_counter < _EVICTION_INTERVAL: + return + _eviction_counter = 0 + now = time.time() + stale_keys = [ + key for key, entry in _rate_limit_store.items() + if (now - entry['window_start']) > 900 + ] + for key in stale_keys: + del _rate_limit_store[key] + + +def _get_rate_limit_key(): + """Build the rate-limit bucket key for this request.""" + if request.endpoint == 'api.create_token': + return f'ip:{request.remote_addr}' + token = getattr(g, 'api_token', None) + if token: + return f'token:{token.id}' + return f'ip:{request.remote_addr}' + + +def _get_limits(): + """Return (max_requests, window_seconds) for the current endpoint.""" + if request.endpoint == 'api.create_token': + return 5, 900 + if request.method in ('POST', 'DELETE', 'PUT', 'PATCH'): + return 20, 60 + return 120, 60 + + +@mod_api.before_request +def check_rate_limit(): + """Reject the request if the client has exceeded their rate limit.""" + _evict_stale_entries() + + key = _get_rate_limit_key() + max_requests, window_seconds = _get_limits() + now = time.time() + + entry = _rate_limit_store.get(key) + + if entry is None or (now - entry['window_start']) >= window_seconds: + _rate_limit_store[key] = {'count': 1, 'window_start': now} + else: + entry['count'] += 1 + if entry['count'] > max_requests: + reset_at = int(entry['window_start'] + window_seconds) + retry_after = max(1, reset_at - int(now)) + from mod_api.middleware.error_handler import make_error_response + response = make_error_response( + 'rate_limited', + f'Rate limit exceeded. Retry after {retry_after} seconds.', + details={ + 'retry_after': retry_after, + 'limit': max_requests, + 'window': f'{window_seconds}s', + }, + http_status=429, + ) + response.headers['Retry-After'] = str(retry_after) + response.headers['X-RateLimit-Limit'] = str(max_requests) + response.headers['X-RateLimit-Remaining'] = '0' + response.headers['X-RateLimit-Reset'] = str(reset_at) + return response + + +@mod_api.after_request +def add_rate_limit_headers(response): + """Attach X-RateLimit-* headers to every response.""" + key = _get_rate_limit_key() + max_requests, window_seconds = _get_limits() + now = time.time() + + entry = _rate_limit_store.get(key) + if entry: + remaining = max(0, max_requests - entry['count']) + reset_at = int(entry['window_start'] + window_seconds) + else: + remaining = max_requests + reset_at = int(now + window_seconds) + + response.headers['X-RateLimit-Limit'] = str(max_requests) + response.headers['X-RateLimit-Remaining'] = str(remaining) + response.headers['X-RateLimit-Reset'] = str(reset_at) + + return response diff --git a/mod_api/middleware/validation.py b/mod_api/middleware/validation.py new file mode 100644 index 00000000..e7b51808 --- /dev/null +++ b/mod_api/middleware/validation.py @@ -0,0 +1,230 @@ +""" +Request validation decorators for bodies, query params, and path IDs. + +All of these return 400 with field-level details on failure, so route +handlers can assume clean input. +""" + +import re +from functools import wraps + +from flask import request +from marshmallow import ValidationError as MarshmallowValidationError + +from mod_api.middleware.error_handler import make_error_response + +PATTERNS = { + 'commit_sha': re.compile(r'^[a-fA-F0-9]{40}$'), + 'sha256': re.compile(r'^[a-fA-F0-9]{64}$'), + 'repository': re.compile(r'^[a-zA-Z0-9_.\-]+/[a-zA-Z0-9_.\-]+$'), + 'branch': re.compile(r'^[A-Za-z0-9._/\-]+$'), + 'token_name': re.compile(r'^[a-zA-Z0-9_\-]+$'), + 'extension': re.compile(r'^[a-zA-Z0-9]+$'), +} + +# Whitelist of allowed sort params. Never pass raw user input to the ORM. +ALLOWED_RUN_SORTS = frozenset([ + 'created_at', '-created_at', + 'run_id', '-run_id', +]) + + +def validate_body(schema_class): + """Validate the JSON body with a schema, pass result as ``validated_data``.""" + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + content_type = request.content_type or '' + if 'application/json' not in content_type: + return make_error_response( + 'validation_error', + 'Content-Type must be application/json.', + http_status=415, + ) + json_data = request.get_json(silent=True) + if json_data is None: + return make_error_response( + 'validation_error', + 'Request body must be valid JSON.', + http_status=400, + ) + schema = schema_class() + try: + validated = schema.load(json_data) + except MarshmallowValidationError as e: + return make_error_response( + 'validation_error', + 'Request failed schema validation.', + details={'fields': e.messages}, + http_status=400, + ) + kwargs['validated_data'] = validated + return f(*args, **kwargs) + return decorated + return decorator + + +def validate_pagination(f): + """Extract and validate ``limit`` and ``offset`` query params.""" + @wraps(f) + def decorated(*args, **kwargs): + try: + limit = int(request.args.get('limit', 50)) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + 'limit must be an integer.', + details={ + 'fields': { + 'limit': 'Must be an integer between 1 and 100.'}}, + http_status=400, + ) + + try: + offset = int(request.args.get('offset', 0)) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + 'offset must be a non-negative integer.', + details={ + 'fields': { + 'offset': 'Must be a non-negative integer.'}}, + http_status=400, + ) + + if limit < 1 or limit > 100: + return make_error_response( + 'validation_error', + 'limit must be between 1 and 100.', + details={'fields': {'limit': 'Must be between 1 and 100.'}}, + http_status=400, + ) + + if offset < 0: + return make_error_response( + 'validation_error', + 'offset must be non-negative.', + details={'fields': {'offset': 'Must be >= 0.'}}, + http_status=400, + ) + + kwargs['limit'] = limit + kwargs['offset'] = offset + return f(*args, **kwargs) + return decorated + + +def validate_path_id(param_name): + """Ensure a URL path parameter is a positive integer.""" + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + value = kwargs.get(param_name) + try: + int_value = int(value) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + f'{param_name} must be a positive integer.', + details={ + 'fields': { + param_name: 'Must be a positive integer.'}}, + http_status=400, + ) + if int_value < 1: + return make_error_response( + 'validation_error', + f'{param_name} must be >= 1.', + details={ + 'fields': { + param_name: 'Must be >= 1. Zero and negative IDs are rejected.' + } + }, + http_status=400, + ) + kwargs[param_name] = int_value + return f(*args, **kwargs) + return decorated + return decorator + + +def validate_date_range(f): + """Parse date query params and reject inverted ranges.""" + @wraps(f) + def decorated(*args, **kwargs): + from datetime import datetime + + created_after_str = request.args.get('created_after') + created_before_str = request.args.get('created_before') + created_after = None + created_before = None + + if created_after_str: + try: + created_after = datetime.fromisoformat( + created_after_str.replace('Z', '+00:00')) + except ValueError: + return make_error_response( + 'validation_error', + 'created_after must be a valid ISO 8601 datetime.', + details={ + 'fields': { + 'created_after': 'Invalid ISO 8601 format.'}}, + http_status=400, + ) + + if created_before_str: + try: + created_before = datetime.fromisoformat( + created_before_str.replace('Z', '+00:00')) + except ValueError: + return make_error_response( + 'validation_error', + 'created_before must be a valid ISO 8601 datetime.', + details={ + 'fields': { + 'created_before': 'Invalid ISO 8601 format.'}}, + http_status=400, + ) + + if created_after and created_before and created_after > created_before: + return make_error_response( + 'validation_error', + 'created_after must not be after created_before.', + details={'fields': { + 'created_after': 'Must be before created_before.', + 'created_before': 'Must be after created_after.', + }}, + http_status=400, + ) + + kwargs['created_after'] = created_after + kwargs['created_before'] = created_before + return f(*args, **kwargs) + return decorated + + +def validate_sort(allowed=None): + """Validate the ``sort`` query param against a whitelist.""" + if allowed is None: + allowed = ALLOWED_RUN_SORTS + + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + sort = request.args.get('sort', '-created_at') + if sort not in allowed: + return make_error_response( + 'validation_error', + f'sort must be one of: {", ".join(sorted(allowed))}', + details={ + 'fields': { + 'sort': f'Must be one of: {sorted(allowed)}' + } + }, + http_status=400, + ) + kwargs['sort'] = sort + return f(*args, **kwargs) + return decorated + return decorator diff --git a/mod_api/models/__init__.py b/mod_api/models/__init__.py new file mode 100644 index 00000000..dcb36537 --- /dev/null +++ b/mod_api/models/__init__.py @@ -0,0 +1 @@ +"""mod_api.models: database models for the API module.""" diff --git a/mod_api/models/api_token.py b/mod_api/models/api_token.py new file mode 100644 index 00000000..12b56c6c --- /dev/null +++ b/mod_api/models/api_token.py @@ -0,0 +1,140 @@ +""" +ApiToken model: server-side storage for scoped API tokens. + +Tokens are opaque strings prefixed with 'spci_'. Only the argon2 hash +is persisted; the plaintext is returned exactly once at creation time. +""" + +import json +import secrets +from datetime import datetime, timedelta, timezone +from typing import List + +from argon2 import PasswordHasher +from argon2.exceptions import VerifyMismatchError +from sqlalchemy import (Column, DateTime, ForeignKey, Integer, String, Text, + UniqueConstraint) +from sqlalchemy.orm import relationship + +from database import Base + +_ph = PasswordHasher() + +VALID_SCOPES = frozenset([ + 'runs:read', + 'runs:write', + 'results:read', + 'baselines:write', + 'system:read', + 'tokens:manage', +]) + +DEFAULT_SCOPES = ['runs:read', 'results:read'] + +TOKEN_PREFIX = 'spci_' +TOKEN_BYTE_LENGTH = 32 + + +class ApiToken(Base): + """Scoped API token bound to a user account.""" + + __tablename__ = 'api_token' + __table_args__ = ( + UniqueConstraint('user_id', 'token_name', name='uq_user_token_name'), + {'mysql_engine': 'InnoDB'}, + ) + + id = Column(Integer, primary_key=True) + user_id = Column( + Integer, + ForeignKey('user.id', onupdate='CASCADE', ondelete='CASCADE'), + nullable=False, + ) + user = relationship('User', uselist=False) + token_name = Column(String(50), nullable=False) + token_hash = Column(String(255), nullable=False) + token_prefix = Column(String(16), nullable=False, index=True) + scopes_json = Column(Text(), nullable=False) + created_at = Column(DateTime(timezone=True), nullable=False) + expires_at = Column(DateTime(timezone=True), nullable=False) + revoked_at = Column(DateTime(timezone=True), nullable=True) + + def __init__( + self, + user_id: int, + token_name: str, + token_hash: str, + token_prefix: str, + scopes: List[str], + expires_in_days: int = 7, + ) -> None: + self.user_id = user_id + self.token_name = token_name + self.token_hash = token_hash + self.token_prefix = token_prefix + self.scopes_json = json.dumps(scopes) + self.created_at = datetime.now(timezone.utc) + self.expires_at = self.created_at + timedelta(days=expires_in_days) + + def __repr__(self) -> str: + """Return a debug representation of the token.""" + return f'' + + @property + def scopes(self) -> List[str]: + """Parse the JSON scopes column into a list.""" + return json.loads(self.scopes_json) + + @property + def is_expired(self) -> bool: + """Check whether this token has passed its expiration time.""" + now = datetime.now(timezone.utc) + expires = self.expires_at + if expires is None: + return True + # MySQL DATETIME columns don't preserve tzinfo; treat naive as UTC. + if expires.tzinfo is None: + expires = expires.replace(tzinfo=timezone.utc) + return bool(now > expires) + + @property + def is_revoked(self) -> bool: + """Check whether this token has been explicitly revoked.""" + return bool(self.revoked_at is not None) + + @property + def is_valid(self) -> bool: + """Return True if the token is neither expired nor revoked.""" + return not self.is_expired and not self.is_revoked + + def has_scope(self, scope: str) -> bool: + """Return True if the token grants the given scope.""" + return scope in self.scopes + + def revoke(self) -> None: + """Mark this token as revoked with the current timestamp.""" + self.revoked_at = datetime.now(timezone.utc) + + @staticmethod + def generate_token() -> str: + """Create a new random token string with the spci_ prefix.""" + random_bytes = secrets.token_urlsafe(TOKEN_BYTE_LENGTH) + return f'{TOKEN_PREFIX}{random_bytes}' + + @staticmethod + def hash_token(plaintext: str) -> str: + """Hash a token with argon2 for storage.""" + return _ph.hash(plaintext) + + @staticmethod + def verify_token(plaintext: str, token_hash: str) -> bool: + """Verify a plaintext token against its stored argon2 hash.""" + try: + return _ph.verify(token_hash, plaintext) + except VerifyMismatchError: + return False + + @staticmethod + def extract_prefix(token: str) -> str: + """Return the first 16 chars used for DB lookup.""" + return token[:16] if len(token) >= 16 else token diff --git a/mod_api/routes/__init__.py b/mod_api/routes/__init__.py new file mode 100644 index 00000000..eac65b96 --- /dev/null +++ b/mod_api/routes/__init__.py @@ -0,0 +1 @@ +"""mod_api.routes — Endpoint handlers for the API.""" diff --git a/mod_api/routes/auth.py b/mod_api/routes/auth.py new file mode 100644 index 00000000..e69de29b diff --git a/mod_api/routes/errors_logs.py b/mod_api/routes/errors_logs.py new file mode 100644 index 00000000..a19a2b3c --- /dev/null +++ b/mod_api/routes/errors_logs.py @@ -0,0 +1,189 @@ +""" +Error and build log routes. + +GET /runs/{id}/errors Test-level errors for a run +GET /runs/{id}/infrastructure-errors Infra errors (VM, build, worker) +GET /runs/{id}/error-summary Grouped error counts +GET /runs/{id}/logs Build log (cursor-paginated) +GET /runs/{id}/samples/{sid}/logs Per-sample logs (not yet available) +""" + +from flask import g, request + +from mod_api import mod_api +from mod_api.middleware.auth import require_roles, require_scope +from mod_api.middleware.error_handler import make_error_response +from mod_api.middleware.validation import validate_pagination, validate_path_id +from mod_api.services.error_service import (derive_error_summary, + derive_errors_for_run, + derive_infrastructure_errors) +from mod_api.services.log_service import read_log_lines +from mod_api.utils import cursor_paginated_response, paginated_response +from mod_test.models import Test + + +@mod_api.route('/runs//errors', methods=['GET']) +@require_scope('results:read') +@validate_path_id('run_id') +@validate_pagination +def list_run_errors(run_id, limit=50, offset=0): + """List test errors for a run, derived from result and output data.""" + test = Test.query.filter(Test.id == run_id).first() + if test is None: + return make_error_response('not_found', f'Run {run_id} not found.', http_status=404) + + errors = derive_errors_for_run(run_id) + + error_type = request.args.get('type') + if error_type: + errors = [e for e in errors if e['type'] == error_type] + + severity = request.args.get('severity') + if severity: + errors = [e for e in errors if e['severity'] == severity] + + sample_id = request.args.get('sample_id', type=int) + if sample_id: + errors = [e for e in errors if e.get('sample_id') == sample_id] + + total = len(errors) + paged = errors[offset:offset + limit] + + return paginated_response(paged, total, limit, offset) + + +@mod_api.route('/runs//infrastructure-errors', methods=['GET']) +@require_scope('system:read') +@validate_path_id('run_id') +@validate_pagination +def list_infrastructure_errors(run_id, limit=50, offset=0): + """ + Infra errors classified from TestProgress messages on a best-effort basis. + + Stack traces are opt-in because they may contain internal paths. + """ + test = Test.query.filter(Test.id == run_id).first() + if test is None: + return make_error_response('not_found', f'Run {run_id} not found.', http_status=404) + + include_stack = request.args.get('include_stack', 'false').lower() == 'true' + if include_stack: + user = getattr(g, 'api_user', None) + if user is None or user.role.value not in ('admin', 'contributor'): + return make_error_response( + 'forbidden', + 'Stack traces require admin or contributor role.', + details={'required_roles': ['admin', 'contributor']}, + http_status=403, + ) + + errors = derive_infrastructure_errors(run_id) + + if not include_stack: + for e in errors: + e.pop('stack', None) + + # Apply optional type and severity filters. + error_type = request.args.get('type') + if error_type: + errors = [e for e in errors if e.get('type') == error_type] + + severity = request.args.get('severity') + if severity: + errors = [e for e in errors if e.get('severity') == severity] + + total = len(errors) + paged = errors[offset:offset + limit] + return paginated_response(paged, total, limit, offset) + + +@mod_api.route('/runs//error-summary', methods=['GET']) +@require_scope('results:read') +@validate_path_id('run_id') +@validate_pagination +def get_error_summary(run_id, limit=50, offset=0): + """Group error summary for triaging a run before drilling into details.""" + test = Test.query.filter(Test.id == run_id).first() + if test is None: + return make_error_response('not_found', f'Run {run_id} not found.', http_status=404) + + group_by = request.args.get('group_by', 'type') + if group_by not in ('type', 'severity', 'sample_id', 'regression_id', 'category'): + return make_error_response( + 'validation_error', + 'group_by must be one of: type, severity, sample_id, regression_id, category.', + http_status=400, + ) + + severity = request.args.get('severity') + + summary = derive_error_summary(run_id, group_by=group_by) + + if severity: + summary = [s for s in summary if s.get('severity') == severity] + + total = len(summary) + paged = summary[offset:offset + limit] + return paginated_response(paged, total, limit, offset) + + +@mod_api.route('/runs//logs', methods=['GET']) +@require_scope('system:read') +@require_roles(['admin', 'contributor']) +@validate_path_id('run_id') +def get_run_logs(run_id): + """ + Read a run's build log with cursor-based pagination. + + Returns 404 (not a broken download link) when the file doesn't exist. + """ + test = Test.query.filter(Test.id == run_id).first() + if test is None: + return make_error_response('not_found', f'Run {run_id} not found.', http_status=404) + + cursor = request.args.get('cursor') + limit = request.args.get('limit', 100, type=int) + limit = max(1, min(limit, 100)) + level = request.args.get('level') + source = request.args.get('source') + contains = request.args.get('contains') + if contains and len(contains) > 100: + return make_error_response( + 'validation_error', + 'contains parameter must be 100 characters or less.', + http_status=400, + ) + + try: + lines, next_cursor = read_log_lines( + run_id, + cursor=cursor, + limit=limit, + level=level, + source=source, + contains=contains, + ) + except FileNotFoundError: + return make_error_response( + 'log_not_found', + f'Log file not found for run {run_id}.', + details={'run_id': run_id, 'checked': ['local', 'gcs']}, + http_status=404, + ) + + return cursor_paginated_response(lines, next_cursor, limit) + + +@mod_api.route('/runs//samples//logs', methods=['GET']) +@require_scope('system:read') +@require_roles(['admin', 'contributor']) +@validate_path_id('run_id') +@validate_path_id('sample_id') +def get_sample_logs(run_id, sample_id): + """Per-sample logs aren't available yet — the CI worker doesn't support them.""" + return make_error_response( + 'not_found', + f'Per-sample logs are not available for sample {sample_id} in run {run_id}.', + details={'reason': 'Per-sample log storage is not yet supported by the CI worker.'}, + http_status=404, + ) diff --git a/mod_api/routes/results.py b/mod_api/routes/results.py new file mode 100644 index 00000000..e69de29b diff --git a/mod_api/schemas/__init__.py b/mod_api/schemas/__init__.py new file mode 100644 index 00000000..88996065 --- /dev/null +++ b/mod_api/schemas/__init__.py @@ -0,0 +1 @@ +"""mod_api.schemas: Marshmallow schemas for request/response validation.""" diff --git a/mod_api/schemas/auth.py b/mod_api/schemas/auth.py new file mode 100644 index 00000000..d22f610a --- /dev/null +++ b/mod_api/schemas/auth.py @@ -0,0 +1,67 @@ +"""Request/response schemas for the token endpoints.""" + +from marshmallow import RAISE, Schema, fields, validate + +from mod_api.models.api_token import VALID_SCOPES + + +class TokenCreateRequestSchema(Schema): + """Validates POST /auth/tokens bodies.""" + + email = fields.Email(required=True) + password = fields.String( + required=True, + validate=validate.Length(min=8, max=128), + ) + token_name = fields.String( + required=True, + validate=[ + validate.Length(min=1, max=50), + validate.Regexp( + r'^[a-zA-Z0-9_\-]+$', + error='token_name must match ^[a-zA-Z0-9_-]+$', + ), + ], + ) + expires_in_days = fields.Integer( + load_default=7, + validate=validate.Range(min=1, max=30), + ) + scopes = fields.List( + fields.String(validate=validate.OneOf(VALID_SCOPES)), + load_default=None, + validate=validate.Length(max=8), + ) + + class Meta: + """Reject unknown fields.""" + + unknown = RAISE + + +class AuthTokenSchema(Schema): + """The one-time response returned when a token is created.""" + + token = fields.String(required=True) + token_type = fields.String(dump_default='bearer') + token_name = fields.String(required=True) + scopes = fields.List(fields.String(), required=True) + expires_at = fields.DateTime(required=True) + + +class ApiTokenItemSchema(Schema): + """Token metadata for list responses — never includes the plaintext.""" + + id = fields.Integer(required=True) + user_id = fields.Integer(required=True) + token_name = fields.String(required=True) + token_prefix = fields.String(required=True) + scopes = fields.Method('get_scopes') + created_at = fields.DateTime(required=True) + expires_at = fields.DateTime(required=True) + is_revoked = fields.Boolean(required=True) + revoked_at = fields.DateTime(allow_none=True) + + def get_scopes(self, obj): + """Deserialize scopes from the model's JSON column.""" + return obj.scopes diff --git a/mod_api/schemas/common.py b/mod_api/schemas/common.py new file mode 100644 index 00000000..2234a33b --- /dev/null +++ b/mod_api/schemas/common.py @@ -0,0 +1,27 @@ +"""Shared schemas: ErrorResponse and pagination wrappers.""" + +from marshmallow import Schema, fields + + +class ErrorResponseSchema(Schema): + """Standard JSON error body returned by all error responses.""" + + code = fields.String(required=True) + message = fields.String(required=True) + details = fields.Dict(keys=fields.String(), required=True, load_default={}) + + +class PaginationSchema(Schema): + """Offset-based pagination metadata.""" + + limit = fields.Integer(required=True) + offset = fields.Integer(required=True) + total = fields.Integer(required=True) + next_offset = fields.Integer(allow_none=True, load_default=None) + + +class CursorPaginationSchema(Schema): + """Cursor-based pagination metadata.""" + + limit = fields.Integer(required=True) + next_cursor = fields.String(allow_none=True, load_default=None) diff --git a/mod_api/schemas/errors.py b/mod_api/schemas/errors.py new file mode 100644 index 00000000..a451187d --- /dev/null +++ b/mod_api/schemas/errors.py @@ -0,0 +1,51 @@ +"""Schemas for error items, error summary buckets, and log lines.""" + +from marshmallow import Schema, fields, validate + + +class ErrorItemSchema(Schema): + """A single error derived from run results or infra progress.""" + + error_id = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + regression_id = fields.Integer(allow_none=True) + type = fields.String(required=True) + severity = fields.String( + required=True, + validate=validate.OneOf(['info', 'warning', 'error', 'critical']), + ) + message = fields.String(required=True) + location = fields.Dict(allow_none=True, load_default=None) + stack = fields.List(fields.String(), load_default=None) + occurred_at = fields.DateTime(allow_none=True) + + +class ErrorSummaryBucketSchema(Schema): + """One bucket in a grouped error summary.""" + + key = fields.String(required=True) + count = fields.Integer(required=True) + severity = fields.String(required=True) + sample_ids = fields.List(fields.Integer(), load_default=[]) + first_seen_at = fields.DateTime(allow_none=True) + last_seen_at = fields.DateTime(allow_none=True) + + +class LogLineSchema(Schema): + """A single parsed line from a build log.""" + + timestamp = fields.DateTime(allow_none=True) + level = fields.String( + required=True, + validate=validate.OneOf( + ['debug', 'info', 'warning', 'error', 'critical']), + ) + source = fields.String( + required=True, + validate=validate.OneOf( + ['orchestrator', 'worker', 'build', 'test_runner', 'web']), + ) + message = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) diff --git a/mod_api/schemas/results.py b/mod_api/schemas/results.py new file mode 100644 index 00000000..4004f2cb --- /dev/null +++ b/mod_api/schemas/results.py @@ -0,0 +1,95 @@ +"""Schemas for expected/actual output, diffs, and baseline approvals.""" + +from marshmallow import RAISE, Schema, fields, validate + + +class OutputFileContentSchema(Schema): + """File content blob returned for expected or actual output.""" + + run_id = fields.Integer(allow_none=True) + sample_id = fields.Integer(required=True) + regression_id = fields.Integer(required=True) + output_id = fields.Integer(required=True) + filename = fields.String(required=True) + content_type = fields.String(required=True) + encoding = fields.String( + required=True, validate=validate.OneOf(['utf-8', 'base64'])) + content = fields.String(required=True) + sha256 = fields.String(allow_none=True) + storage_status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'missing']), + ) + + +class DiffHunkLineSchema(Schema): + """One line inside a diff hunk.""" + + kind = fields.String(required=True, validate=validate.OneOf( + ['context', 'added', 'removed'])) + expected_line = fields.Integer(allow_none=True) + actual_line = fields.Integer(allow_none=True) + text = fields.String(required=True) + + +class DiffHunkSchema(Schema): + """A contiguous block of changes.""" + + expected_start = fields.Integer(required=True) + actual_start = fields.Integer(required=True) + lines = fields.List(fields.Nested(DiffHunkLineSchema), required=True) + + +class DiffSchema(Schema): + """Structured diff between expected and actual output.""" + + run_id = fields.Integer(required=True) + sample_id = fields.Integer(required=True) + regression_id = fields.Integer(required=True) + output_id = fields.Integer(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'identical', 'different', 'missing_actual', 'missing_expected', + ])) + summary = fields.Dict(required=True) + hunks = fields.List(fields.Nested(DiffHunkSchema), required=True) + + +class BaselineApprovalRequestSchema(Schema): + """POST /runs/{id}/samples/{sid}/baseline-approval body.""" + + regression_id = fields.Integer( + required=True, + validate=validate.Range(min=1), + ) + output_id = fields.Integer( + required=True, + validate=validate.Range(min=1), + ) + reason = fields.String( + required=True, + validate=validate.Length(min=10, max=500), + ) + remove_variants = fields.Boolean( + load_default=False, + ) + + class Meta: + """Reject unknown fields.""" + + unknown = RAISE + + +class BaselineApprovalSchema(Schema): + """Response after a baseline approval is applied.""" + + approval_id = fields.String(required=True) + status = fields.String( + required=True, + validate=validate.OneOf( + ['approved'])) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(required=True) + regression_id = fields.Integer(required=True) + output_id = fields.Integer(required=True) + requested_by = fields.String(required=True) + created_at = fields.DateTime(required=True) diff --git a/mod_api/schemas/runs.py b/mod_api/schemas/runs.py new file mode 100644 index 00000000..fb081562 --- /dev/null +++ b/mod_api/schemas/runs.py @@ -0,0 +1,119 @@ +"""Schemas for runs, summaries, progress events, and run actions.""" + +from marshmallow import RAISE, Schema, fields, validate + + +class ProgressEventSchema(Schema): + """A single progress event in a run's timeline.""" + + timestamp = fields.DateTime(required=True) + status = fields.String(required=True) + message = fields.String(required=True) + step = fields.Integer(allow_none=True) + + +class RunSchema(Schema): + """Full run details.""" + + run_id = fields.Integer(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'queued', 'running', 'pass', 'fail', 'canceled', 'error', 'incomplete', + ])) + platform = fields.String( + required=True, validate=validate.OneOf(['linux', 'windows'])) + test_type = fields.String(validate=validate.OneOf(['commit', 'pr'])) + repository = fields.String(required=True) + branch = fields.String(allow_none=True) + commit_sha = fields.String(required=True) + pr_number = fields.Integer(allow_none=True, load_default=None) + created_at = fields.DateTime(required=True) + queued_at = fields.DateTime(allow_none=True) + started_at = fields.DateTime(allow_none=True) + completed_at = fields.DateTime(allow_none=True) + github_link = fields.String(allow_none=True) + + +class RunSummarySchema(Schema): + """Pass/fail/skip aggregate counts for a run.""" + + run_id = fields.Integer(required=True) + status = fields.String(required=True) + total_samples = fields.Integer(required=True) + pass_count = fields.Integer(required=True) + fail_count = fields.Integer(required=True) + skipped_count = fields.Integer(required=True) + missing_output_count = fields.Integer(required=True) + error_count = fields.Integer(load_default=0) + duration_ms = fields.Integer(allow_none=True) + triggered_by = fields.String(allow_none=True) + + +class RunConfigSchema(Schema): + """The test matrix and configuration for a run.""" + + run_id = fields.Integer(required=True) + platform = fields.String(required=True) + branch = fields.String(required=True) + commit_sha = fields.String(required=True) + regression_test_ids = fields.List(fields.Integer(), required=True) + + +class RunCreateRequestSchema(Schema): + """POST /runs request body.""" + + commit_sha = fields.String( + required=True, + validate=validate.Regexp( + r'^[a-fA-F0-9]{40}$', + error='commit_sha must be a 40-character hex string.', + ), + ) + platform = fields.String( + required=True, + validate=validate.OneOf(['linux', 'windows']), + ) + branch = fields.String( + load_default='master', + validate=[ + validate.Length(max=100), + validate.Regexp( + r'^[A-Za-z0-9._/\-]+$', + error='branch must match ^[A-Za-z0-9._/-]+$', + ), + ], + ) + repository = fields.String( + load_default=None, + validate=[ + validate.Length(max=100), + validate.Regexp( + r'^[a-zA-Z0-9_.\-]+/[a-zA-Z0-9_.\-]+$', + error='repository must match owner/repo format.', + ), + ], + ) + pull_request = fields.Integer( + load_default=None, + allow_none=True, + validate=validate.Range(min=1), + ) + regression_test_ids = fields.List( + fields.Integer(validate=validate.Range(min=1)), + load_default=None, + validate=validate.Length(max=500), + ) + + class Meta: + """Reject unknown fields.""" + + unknown = RAISE + + +class RunActionResultSchema(Schema): + """Response for cancel and similar run actions.""" + + run_id = fields.Integer(required=True) + new_run_id = fields.Integer(allow_none=True) + action = fields.String(required=True) + status = fields.String(required=True) + message = fields.String(required=True) diff --git a/mod_api/schemas/samples.py b/mod_api/schemas/samples.py new file mode 100644 index 00000000..5f074a52 --- /dev/null +++ b/mod_api/schemas/samples.py @@ -0,0 +1,70 @@ +"""Request and response schemas for Sample endpoints and results.""" + +from marshmallow import Schema, fields, validate + + +class OutputFileSchema(Schema): + """Output file schema.""" + + output_id = fields.Integer(required=True) + filename = fields.String(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'match', 'diff_mismatch', 'missing_output', 'missing_expected', + ])) + + +class RunSampleSchema(Schema): + """A regression test's result within a specific run.""" + + regression_test_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + sample_name = fields.String(allow_none=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'pass', 'fail', 'skipped', 'missing_output', 'running', 'not_started', + ])) + exit_code = fields.Integer(allow_none=True) + expected_rc = fields.Integer(allow_none=True) + runtime_ms = fields.Integer(allow_none=True) + command = fields.String(allow_none=True) + category = fields.String(allow_none=True) + outputs = fields.List(fields.Nested(OutputFileSchema), load_default=[]) + + +class SampleSchema(Schema): + """A media sample from the catalog.""" + + sample_id = fields.Integer(required=True) + sha = fields.String(required=True) + extension = fields.String(required=True) + original_name = fields.String(required=True) + filename = fields.String(required=True) + tags = fields.List(fields.String(), load_default=[]) + regression_test_count = fields.Integer(load_default=0) + active = fields.Boolean(load_default=True) + + +class SampleHistoryEntrySchema(Schema): + """One row in a sample's cross-run history.""" + + run_id = fields.Integer(required=True) + status = fields.String(required=True) + platform = fields.String(required=True) + branch = fields.String(required=True) + commit_sha = fields.String(required=True) + tested_at = fields.DateTime(allow_none=True) + failure_signature = fields.String(allow_none=True) + + +class RegressionTestSchema(Schema): + """A regression test definition.""" + + regression_test_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + sample_name = fields.String(allow_none=True) + command = fields.String(required=True) + input_type = fields.String(required=True) + output_type = fields.String(required=True) + expected_rc = fields.Integer(required=True) + active = fields.Boolean(required=True) + categories = fields.List(fields.String(), load_default=[]) + description = fields.String(allow_none=True) diff --git a/mod_api/schemas/system.py b/mod_api/schemas/system.py new file mode 100644 index 00000000..553502bd --- /dev/null +++ b/mod_api/schemas/system.py @@ -0,0 +1,61 @@ +"""Schemas for health checks, queue jobs, and run artifacts.""" + +from marshmallow import Schema, fields, validate + + +class DependencyHealthSchema(Schema): + """Status of a single system dependency (DB, GCS, local storage).""" + + name = fields.String(required=True) + status = fields.String( + required=True, validate=validate.OneOf(['ok', 'degraded', 'down'])) + message = fields.String(allow_none=True) + + +class SystemHealthSchema(Schema): + """Overall system health response.""" + + status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'down']), + ) + checked_at = fields.DateTime(required=True) + dependencies = fields.List( + fields.Nested(DependencyHealthSchema), + required=True) + + +class QueueJobSchema(Schema): + """A single queued or running job.""" + + run_id = fields.Integer(required=True) + status = fields.String( + required=True, validate=validate.OneOf(['queued', 'running'])) + platform = fields.String( + required=True, validate=validate.OneOf(['linux', 'windows'])) + queued_at = fields.DateTime(allow_none=True) + started_at = fields.DateTime(allow_none=True) + position = fields.Integer(allow_none=True) + + +class ArtifactSchema(Schema): + """A downloadable artifact tied to a run.""" + + artifact_id = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + type = fields.String( + required=True, + validate=validate.OneOf([ + 'build_log', 'sample_output', 'expected_output', 'actual_output', + 'diff', 'media_info', 'binary', 'coredump', 'combined_stdout', + ]), + ) + filename = fields.String(required=True) + content_type = fields.String(required=True) + size_bytes = fields.Integer(allow_none=True) + storage_status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'missing']), + ) + download_url = fields.String(allow_none=True) diff --git a/mod_api/services/__init__.py b/mod_api/services/__init__.py new file mode 100644 index 00000000..a1bbdb18 --- /dev/null +++ b/mod_api/services/__init__.py @@ -0,0 +1 @@ +"""mod_api.services — Core business logic for the API.""" diff --git a/mod_api/utils.py b/mod_api/utils.py new file mode 100644 index 00000000..1faecd6a --- /dev/null +++ b/mod_api/utils.py @@ -0,0 +1,68 @@ +"""Pagination, serialization, and response formatting helpers.""" + +from flask import jsonify + + +def paginated_response(data, total, limit, offset, schema=None): + """Build an offset-paginated JSON response.""" + if schema: + serialized = schema.dump(data, many=True) + else: + serialized = data + + next_offset = offset + limit if (offset + limit) < total else None + + return jsonify({ + 'data': serialized, + 'pagination': { + 'limit': limit, + 'offset': offset, + 'total': total, + 'next_offset': next_offset, + }, + }) + + +def cursor_paginated_response(data, next_cursor, limit, schema=None): + """Build a cursor-paginated JSON response.""" + if schema: + serialized = schema.dump(data, many=True) + else: + serialized = data + + return jsonify({ + 'data': serialized, + 'pagination': { + 'limit': limit, + 'next_cursor': next_cursor, + }, + }) + + +def single_response(data, schema=None, http_status=200): + """Build a single-item JSON response.""" + if schema: + serialized = schema.dump(data) + else: + serialized = data + + response = jsonify(serialized) + response.status_code = http_status + return response + + +def get_sort_column(sort_param, column_map): + """Translate a sort string into an SQLAlchemy order_by clause. + + Handles descending sorts prefixed with '-' (e.g. '-created_at'). + """ + descending = sort_param.startswith('-') + field_name = sort_param.lstrip('-') + + column = column_map.get(field_name) + if column is None: + return None + + if descending: + return column.desc() + return column.asc() diff --git a/openapi-ci-api.yaml b/openapi-ci-api.yaml new file mode 100644 index 00000000..ef291327 --- /dev/null +++ b/openapi-ci-api.yaml @@ -0,0 +1,2759 @@ +openapi: 3.0.3 +info: + title: CCExtractor CI System API + version: 1.2.0 + description: | + Security-hardened JSON-only REST API for the CCExtractor CI/sample platform. + Designed for AI agents and CI automation. Enforces scoped Bearer token auth, + strict input validation, rate limiting on all routes, and safe defaults + throughout. No browser sessions, no HTML, no implicit permissions. + + **Authentication:** All endpoints require bearer token authentication unless + explicitly marked with `security: []` (only /system/health and POST /auth/tokens). + + **Rate-limit headers:** Every response includes `X-RateLimit-Limit`, + `X-RateLimit-Remaining`, and `X-RateLimit-Reset` headers. These are modeled + explicitly on the 429 response for brevity; they are present on all responses + regardless of status code. + + contact: + name: CCExtractor Development + url: https://github.com/CCExtractor/sample-platform + license: + name: GPL-3.0-only + url: https://www.gnu.org/licenses/gpl-3.0.html + +servers: + - url: http://localhost:5000/api/v1 + description: Development + - url: https://sampleplatform.ccextractor.org/api/v1 + description: Production + +# +# Global security: all endpoints require auth +# unless explicitly overridden with security: [] +# +security: + - bearerAuth: [] + +tags: + - name: Auth + description: Token issuance and revocation + - name: Runs + description: CI run lifecycle — list, inspect, trigger, and cancel + - name: Samples + description: Media samples and regression test definitions + - name: Results + description: Per-sample output, diffs, and baseline management + - name: Errors and Logs + description: Structured errors and raw log access + - name: System + description: Health, queue, and artifacts + +# +# SECURITY NOTES (implementers must read) +# +# 1. AUTH MODEL +# - All tokens are opaque, server-side. Never expose session cookies via API. +# - The CI worker token (/ci/progress-reporter) is a separate secret and is +# NOT valid for user-facing API endpoints. +# - Token creation is rate-limited to 5 req/15 min per IP to prevent +# credential stuffing. +# +# 2. SCOPE ENFORCEMENT +# - Scope checks happen at the middleware layer before route handlers. +# - x-required-scope on each operation defines the minimum scope needed. +# - Missing scope → 403 Forbidden (not 401, token is valid but insufficient). +# +# 3. INPUT VALIDATION +# - additionalProperties: false on all request bodies (no mass-assignment). +# - Regex patterns on all free-text IDs (commit_sha, sha256, repository). +# - maxLength on every string field. maxItems on every array. +# - Integer IDs have minimum: 1 (no zero or negative IDs). +# +# 4. OUTPUT SAFETY +# - got=null in TestResultFile means match, not missing output. +# The dummy row (-1,-1,-1,'','error') is translated server-side to +# status=missing_output and never surfaced as a real object. +# - test.failed reflects cancellation only; fail_count is computed from +# TestResult rows. Do not expose test.failed directly. +# - Stack traces in infrastructure errors are opt-in (include_stack=false +# by default) to avoid leaking internal paths. +# +# 5. STORAGE +# - Artifacts may exist in local SAMPLE_REPOSITORY, GCS, or both. +# - storage_status=degraded means one backend only; missing means neither. +# - Never return a download_url that has not been verified to exist. +# - Log endpoints return 404 (not a broken download link) when the log +# file is absent from both storage backends. +# +# 6. RATE LIMITING (all routes) +# - Default: 120 req/min per token (reads), 20 req/min per token (writes). +# - Auth endpoint: 5 req/15 min per IP. +# - Every response includes X-RateLimit-Limit, X-RateLimit-Remaining, +# X-RateLimit-Reset headers. +# - 429 response includes Retry-After header (seconds). +# +# 7. IDEMPOTENCY +# - POST /runs/{run_id}/cancel is idempotent; canceling an already-canceled +# run returns 202 with status=accepted and a no-op message. +# +# 8. DIFF ACCESS +# - The diff route is header-gated on the legacy system (not role-gated). +# The API wraps the XHR path and returns structured JSON. No HTML. +# +# 9. STATUS DERIVATION +# - Run status is derived, not stored. TestStatus has only: preparation, +# testing, completed, canceled (canceled covers both canceled and error). +# The API normalizes this to the 7-value enum below. +# - RunSample.status is computed from TestResult + TestResultFile + +# expected exit code + multiple acceptable baselines. +# - fail_count and missing_output_count in RunSummary are mutually +# exclusive. A sample appears in exactly one bucket (missing_output +# is checked first; if the dummy sentinel row is detected the function +# returns immediately without evaluating fail conditions). +# +# 10. REPOSITORY PERMISSIONS +# - POST /runs enforces a repo-aware permission check. Triggering a run +# against the main configured repository (GITHUB_OWNER/GITHUB_REPOSITORY) +# requires the contributor role or above. Any authenticated user with +# runs:write scope may trigger runs against fork repositories. There is +# no global repository allowlist; the elevated-role check applies only +# to the main configured repository. +# + +paths: + + # AUTH + + /auth/tokens: + get: + tags: [Auth] + summary: List API tokens + operationId: listTokens + description: > + Lists tokens for the authenticated user. Non-admin users see only their + own tokens. Admins may append ?all=true to list tokens across the entire + system; non-admin callers sending ?all=true receive 403. + + Plaintext token values are never included in list responses. + security: + - bearerAuth: [] + x-required-scope: tokens:manage + parameters: + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: all + in: query + schema: + type: boolean + description: > + Admin only. Set to true to list tokens for all users in the system. + Non-admin callers receive 403 if this parameter is present and true. + responses: + "200": + description: Paginated list of tokens (without plaintext secrets). + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ApiTokenItem" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + post: + tags: [Auth] + summary: Create an API token + operationId: createToken + description: > + Rate-limited to 5 requests per 15 minutes per IP. Tokens are opaque + and stored server-side. Scopes are additive; request only what you need. + Tokens expire after expires_in_days (default 30, max 90). + security: [] + x-rate-limit: "5/15min per IP" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/TokenCreateRequest" + responses: + "201": + description: Token created. Store the token value; it will not be shown again. + content: + application/json: + schema: + $ref: "#/components/schemas/AuthToken" + "400": + $ref: "#/components/responses/BadRequest" + "401": + description: Invalid credentials + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: invalid_credentials + message: Email or password is incorrect. + details: {} + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /auth/tokens/current: + delete: + tags: [Auth] + summary: Revoke the current API token + operationId: revokeCurrentToken + description: > + Immediately invalidates the token used in the Authorization header. + Subsequent requests with the same token will receive 401. + + No specific scope is required beyond authentication — any valid token + can self-revoke. This is the preferred way to clean up a token when + you have it in hand but do not know its numeric ID. + security: + - bearerAuth: [] + responses: + "204": + description: Token revoked + "401": + $ref: "#/components/responses/Unauthorized" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /auth/tokens/{token_id}: + delete: + tags: [Auth] + summary: Revoke a specific API token by ID + operationId: revokeToken + description: > + Revokes the token identified by token_id. Non-admin users may only + revoke their own tokens; attempting to revoke another user’s token + returns 403. Admins may revoke any token. + + To revoke the token currently in use without knowing its ID, use + DELETE /auth/tokens/current instead. + security: + - bearerAuth: [] + x-required-scope: tokens:manage + parameters: + - name: token_id + in: path + required: true + schema: + type: integer + minimum: 1 + responses: + "204": + description: Token revoked successfully. + "401": + $ref: "#/components/responses/Unauthorized" + "403": + description: > + Token is valid but you cannot revoke this token. Non-admin users + may only revoke their own tokens. Admins may revoke any token. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: forbidden + message: You may only revoke your own tokens unless you have admin role. + details: {} + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + # RUNS + + /runs: + get: + tags: [Runs] + summary: List CI runs + operationId: listRuns + description: > + Public read. The underlying table is capped at the 50 most recent runs + in the current implementation; this endpoint adds full pagination. + Sorted by -created_at by default (newest first). + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - $ref: "#/components/parameters/RunStatus" + - $ref: "#/components/parameters/Branch" + - $ref: "#/components/parameters/CommitSha" + - $ref: "#/components/parameters/Repository" + - $ref: "#/components/parameters/Platform" + - $ref: "#/components/parameters/CreatedAfter" + - $ref: "#/components/parameters/CreatedBefore" + - name: sort + in: query + schema: + type: string + default: -created_at + enum: [created_at, -created_at, run_id, -run_id] + description: Sort field. Prefix with - for descending order. + responses: + "200": + description: Paginated runs + headers: + X-RateLimit-Limit: + $ref: "#/components/headers/RateLimitLimit" + X-RateLimit-Remaining: + $ref: "#/components/headers/RateLimitRemaining" + X-RateLimit-Reset: + $ref: "#/components/headers/RateLimitReset" + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/Run" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + post: + tags: [Runs] + summary: Trigger a new CI run + operationId: createRun + description: > + Requires runs:write scope and contributor role or above. + The regression_test_ids set is validated against active tests only. + If omitted, all active regression tests are used. + security: + - bearerAuth: [] + x-required-scope: runs:write + x-required-roles: [admin, tester, contributor] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/RunCreateRequest" + responses: + "202": + description: Run queued. Poll /runs/{run_id}/progress for status. + content: + application/json: + schema: + $ref: "#/components/schemas/Run" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "422": + $ref: "#/components/responses/UnprocessableEntity" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}: + get: + tags: [Runs] + summary: Get a CI run + operationId: getRun + description: > + Returns normalized run status derived from TestProgress rows. + status=canceled covers both explicit cancellation and infrastructure + errors (the underlying model does not distinguish them). + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + responses: + "200": + description: Run details + content: + application/json: + schema: + $ref: "#/components/schemas/Run" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/summary: + get: + tags: [Runs] + summary: Get pass/fail summary for a run + operationId: getRunSummary + description: > + fail_count is computed from TestResult rows, not from test.failed. + test.failed only reflects whether the final progress status is + canceled — it does not reflect regression test outcomes. + Use this endpoint, not test.failed, to triage a run. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + responses: + "200": + description: Run summary + content: + application/json: + schema: + $ref: "#/components/schemas/RunSummary" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/progress: + get: + tags: [Runs] + summary: Get progress events for a run + operationId: getRunProgress + description: > + Progress events are sourced from TestProgress rows written by the CI + worker via /ci/progress-reporter. Messages are unstructured text. + Structured error types are aspirational until the worker protocol + emits structured JSON. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: status + in: query + schema: + type: string + enum: [queued, preparation, testing, completed, canceled, error] + responses: + "200": + description: Paginated progress events + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ProgressEvent" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/cancel: + post: + tags: [Runs] + summary: Cancel a queued or running CI run + operationId: cancelRun + description: > + Idempotent. Canceling an already-canceled or completed run returns + 202 with a no-op message rather than an error. + Requires runs:write scope. + security: + - bearerAuth: [] + x-required-scope: runs:write + x-required-roles: [admin, tester, contributor] + parameters: + - $ref: "#/components/parameters/RunId" + requestBody: + required: false + content: + application/json: + schema: + type: object + properties: + reason: + type: string + maxLength: 255 + additionalProperties: false + responses: + "202": + description: Cancellation accepted (or no-op if already terminal) + content: + application/json: + schema: + $ref: "#/components/schemas/RunActionResult" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/config: + get: + tags: [Runs] + summary: Get run configuration and test matrix + operationId: getRunConfig + description: > + regression_test_ids lists IDs included in this run. When no custom + set was configured, all regression tests are returned. + Implementers must filter by active=true explicitly — + get_customized_regressiontests() does not do this by default. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + responses: + "200": + description: Run configuration + content: + application/json: + schema: + $ref: "#/components/schemas/RunConfig" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + # SAMPLES + + /runs/{run_id}/samples: + get: + tags: [Samples] + summary: List regression test results in a run + operationId: listRunSamples + description: > + Returns one entry per regression test result, not one per unique media + file. A single media sample may yield multiple entries if it has + multiple regression tests (different command flags). + sample_progress in the legacy JSON endpoint is len(test.results) over + total regression tests; it does not reflect multi-output completeness. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: status + in: query + schema: + type: string + enum: [pass, fail, skipped, missing_output, running, not_started] + - name: name + in: query + schema: + type: string + maxLength: 100 + - name: tag + in: query + schema: + type: string + maxLength: 50 + - name: category + in: query + schema: + type: string + maxLength: 50 + responses: + "200": + description: Paginated regression test results + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/RunSample" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/samples/{sample_id}: + get: + tags: [Samples] + summary: Get full details for a regression test result in a run + operationId: getRunSample + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + responses: + "200": + description: Regression test result details + content: + application/json: + schema: + $ref: "#/components/schemas/RunSample" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /samples: + get: + tags: [Samples] + summary: List all known media samples + operationId: listSamples + description: > + Returns paginated media sample metadata. Samples are the original + media files uploaded for regression testing. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: status + in: query + description: > + Derived from linked regression tests. The sample table itself has + no quarantine state; active/inactive reflects whether any active + regression tests reference the sample. + schema: + type: string + enum: [active, inactive] + - name: name + in: query + schema: + type: string + maxLength: 100 + - name: tag + in: query + schema: + type: string + maxLength: 50 + - name: sha256 + in: query + schema: + type: string + pattern: '^[a-fA-F0-9]{64}$' + - name: extension + in: query + schema: + type: string + maxLength: 10 + pattern: '^[a-zA-Z0-9]+$' + responses: + "200": + description: Paginated media samples + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/Sample" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /samples/{sample_id}: + get: + tags: [Samples] + summary: Get media sample metadata + operationId: getSample + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/SampleId" + responses: + "200": + description: Media sample metadata + content: + application/json: + schema: + $ref: "#/components/schemas/Sample" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /samples/{sample_id}/history: + get: + tags: [Samples] + summary: Get regression test result history for a sample across runs + operationId: getSampleHistory + description: > + Use failure_signature for flake detection: a stable signature across + multiple runs on different commits indicates a genuine regression, + not infrastructure noise. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/SampleId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - $ref: "#/components/parameters/RunStatus" + - $ref: "#/components/parameters/Branch" + - $ref: "#/components/parameters/Platform" + - $ref: "#/components/parameters/CreatedAfter" + - $ref: "#/components/parameters/CreatedBefore" + responses: + "200": + description: Paginated sample history + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/SampleHistoryEntry" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /regression-tests: + get: + tags: [Samples] + summary: List regression test definitions + operationId: listRegressionTests + description: > + The active filter must be applied explicitly. The legacy + get_customized_regressiontests() returns all regression tests — + including inactive ones — when no custom set is defined. + security: + - bearerAuth: [] + x-required-scope: runs:read + parameters: + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: active + in: query + schema: + type: boolean + - name: category + in: query + schema: + type: string + maxLength: 50 + - name: tag + in: query + schema: + type: string + maxLength: 50 + - name: sample_id + in: query + schema: + type: integer + minimum: 1 + responses: + "200": + description: Paginated regression test definitions + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/RegressionTest" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + # RESULTS + + /runs/{run_id}/samples/{sample_id}/expected: + get: + tags: [Results] + summary: Get expected output for a regression test result + operationId: getExpectedOutput + description: > + Expected output is a file reference stored under TestResults using the + regression output extension. Resolved from GCS or local + SAMPLE_REPOSITORY at request time. storage_status reflects which + backends have the file. Do not assume local and GCS are always in sync. + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + - $ref: "#/components/parameters/RegressionId" + - $ref: "#/components/parameters/OutputId" + - $ref: "#/components/parameters/Format" + responses: + "200": + description: Expected output file + content: + application/json: + schema: + $ref: "#/components/schemas/OutputFile" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/samples/{sample_id}/actual: + get: + tags: [Results] + summary: Get actual output generated by a regression test in a run + operationId: getActualOutput + description: > + IMPORTANT: TestResultFile.got = null means the actual output MATCHED + expected, not that actual output is missing. This is a semantic trap + in the data model. Missing output is represented by a dummy row + (-1,-1,-1,'','error') which the API translates to status=missing_output + and returns 404. A 200 response always contains a real output file. + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + - $ref: "#/components/parameters/RegressionId" + - $ref: "#/components/parameters/OutputId" + - $ref: "#/components/parameters/Format" + responses: + "200": + description: Actual output file (output exists and differs from expected) + content: + application/json: + schema: + $ref: "#/components/schemas/OutputFile" + "204": + description: > + No actual file stored. got=null in the DB means output matched + expected. Use /expected to retrieve the matched content. + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/samples/{sample_id}/diff: + get: + tags: [Results] + summary: Get expected-vs-actual diff for a failing regression test result + operationId: getDiff + description: > + The legacy diff route is header-gated (X-Requested-With: XMLHttpRequest), + not role-gated. The 403 seen on direct browser requests was a + header-check artifact. This endpoint wraps the XHR logic and returns + structured JSON — no HTML, no 50-line truncation. + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + - $ref: "#/components/parameters/RegressionId" + - $ref: "#/components/parameters/OutputId" + - name: context_lines + in: query + schema: + type: integer + minimum: 1 + maximum: 50 + default: 3 + - name: format + in: query + schema: + type: string + enum: [structured, unified] + default: structured + responses: + "200": + description: Structured or unified diff + content: + application/json: + schema: + $ref: "#/components/schemas/Diff" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/samples/{sample_id}/baseline-approval: + post: + tags: [Results] + summary: Approve actual output as the new expected baseline + operationId: approveBaseline + description: > + Requires baselines:write scope and admin or contributor role. + This is a destructive write — the approved output becomes the new + expected baseline for the regression test. Provide a reason; + it is stored in the audit log. + security: + - bearerAuth: [] + x-required-scope: baselines:write + x-required-roles: [admin, contributor] + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BaselineApprovalRequest" + responses: + "200": + description: Baseline approval applied immediately. + content: + application/json: + schema: + $ref: "#/components/schemas/BaselineApproval" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + # ERRORS AND LOGS + + /runs/{run_id}/errors: + get: + tags: [Errors and Logs] + summary: Get structured test errors for a run + operationId: listRunErrors + description: > + Error types are derived from TestResult and TestResultFile rows. + missing_output is detected from the dummy (-1,-1,-1,'','error') row + pattern, not from got=null (which means match, not missing). + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: type + in: query + schema: + type: string + enum: [test_failure, exit_code_mismatch, missing_output, diff_mismatch] + - name: severity + in: query + schema: + type: string + enum: [info, warning, error, critical] + - name: sample_id + in: query + schema: + type: integer + minimum: 1 + responses: + "200": + description: Paginated test errors + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ErrorItem" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/infrastructure-errors: + get: + tags: [Errors and Logs] + summary: Get worker, provisioning, and build errors for a run + operationId: listInfraErrors + description: > + Errors are extracted from TestProgress rows written by the CI worker. + Messages are currently unstructured text. The type filter does + best-effort text matching until the worker protocol emits structured + error types. + Stack traces are opt-in (include_stack defaults to false) to avoid + leaking internal paths to unauthorized callers. + security: + - bearerAuth: [] + x-required-scope: system:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: type + in: query + schema: + type: string + enum: [queue, vm_provisioning, checkout, merge, build, worker, web_server, storage] + - name: severity + in: query + schema: + type: string + enum: [info, warning, error, critical] + - name: include_stack + in: query + schema: + type: boolean + default: false + description: > + Default false. Set true only when debugging infrastructure failures. + Stacks may contain internal paths; access requires system:read scope. + responses: + "200": + description: Paginated infrastructure errors + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ErrorItem" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/logs: + get: + tags: [Errors and Logs] + summary: Get raw logs for a run + operationId: getRunLogs + description: > + Logs are stored at SAMPLE_REPOSITORY/LogFiles/{id}.txt and served + via GCS signed URL. Returns 404 — not a broken download link — when + the file is absent from both local and GCS storage. + Uses cursor-based pagination. + security: + - bearerAuth: [] + x-required-scope: system:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Cursor" + - name: level + in: query + schema: + type: string + enum: [debug, info, warning, error, critical] + - name: source + in: query + schema: + type: string + enum: [orchestrator, worker, build, test_runner, web] + - name: contains + in: query + schema: + type: string + maxLength: 100 + responses: + "200": + description: Cursor-paginated run log lines + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/CursorPage" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/LogLine" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + description: Log file not found in local or GCS storage + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: log_not_found + message: Log file for run 9309 does not exist in any storage backend. + details: + run_id: 9309 + checked: [local, gcs] + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/samples/{sample_id}/logs: + get: + tags: [Errors and Logs] + summary: Get raw logs for a regression test result in a run + operationId: getSampleLogs + description: > + Returns raw log lines for a specific regression test result. + Logs are stored at SAMPLE_REPOSITORY/LogFiles/ and served via GCS + signed URL when available. Returns 404 when the log file is absent + from both local and GCS storage. + security: + - bearerAuth: [] + x-required-scope: system:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/SampleId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Cursor" + - name: level + in: query + schema: + type: string + enum: [debug, info, warning, error, critical] + - name: contains + in: query + schema: + type: string + maxLength: 100 + responses: + "200": + description: Cursor-paginated sample log lines + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/CursorPage" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/LogLine" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/error-summary: + get: + tags: [Errors and Logs] + summary: Get grouped error summary for a run + operationId: getErrorSummary + description: > + Use this endpoint to triage a run before drilling into individual + errors. group_by=type gives a high-level failure breakdown; + group_by=sample_id helps identify flaky samples. + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: group_by + in: query + schema: + type: string + enum: [type, sample_id, regression_id, category, severity] + default: type + - name: severity + in: query + schema: + type: string + enum: [info, warning, error, critical] + responses: + "200": + description: Paginated grouped error summary + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ErrorSummaryBucket" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + # SYSTEM + + /system/health: + get: + tags: [System] + summary: Get CI system health and dependency status + operationId: getHealth + description: > + Unauthenticated. Returns overall system status and per-dependency + health. Used by monitoring and uptime checks. + security: [] + responses: + "200": + description: System healthy or degraded + content: + application/json: + schema: + $ref: "#/components/schemas/SystemHealth" + "503": + description: System is down + content: + application/json: + schema: + $ref: "#/components/schemas/SystemHealth" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /system/queue: + get: + tags: [System] + summary: Get queue depth and currently running jobs + operationId: getQueue + security: + - bearerAuth: [] + x-required-scope: system:read + parameters: + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: platform + in: query + schema: + type: string + enum: [linux, windows] + - name: status + in: query + schema: + type: string + enum: [queued, running] + responses: + "200": + description: Queue status and active jobs + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + queue_depth: + type: integer + minimum: 0 + running_count: + type: integer + minimum: 0 + data: + type: array + items: + $ref: "#/components/schemas/QueueJob" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + + /runs/{run_id}/artifacts: + get: + tags: [System] + summary: List downloadable artifacts for a run + operationId: listArtifacts + description: > + Only returns artifacts with a verified download_url from at least one + storage backend. storage_status=degraded means one backend only; + storage_status=missing means neither backend has the file (download_url + will be null). Never returns a URL that has not been verified to exist. + security: + - bearerAuth: [] + x-required-scope: results:read + parameters: + - $ref: "#/components/parameters/RunId" + - $ref: "#/components/parameters/Limit" + - $ref: "#/components/parameters/Offset" + - name: type + in: query + schema: + type: string + enum: [build_log, sample_output, expected_output, diff, media_info, binary] + responses: + "200": + description: Paginated run artifacts + content: + application/json: + schema: + allOf: + - $ref: "#/components/schemas/Page" + - type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/Artifact" + "400": + $ref: "#/components/responses/BadRequest" + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + "404": + $ref: "#/components/responses/NotFound" + "429": + $ref: "#/components/responses/RateLimited" + default: + $ref: "#/components/responses/Error" + +# +# COMPONENTS +# +components: + + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: opaque + description: > + Opaque server-side API token. Obtain via POST /auth/tokens. + The CI worker token used by /ci/progress-reporter is a separate + secret and is NOT valid here. Never use browser session cookies + for API clients. + + # HEADERS + + headers: + RateLimitLimit: + description: Maximum requests allowed in the current window + schema: + type: integer + example: 120 + RateLimitRemaining: + description: Requests remaining in the current window + schema: + type: integer + example: 117 + RateLimitReset: + description: Unix timestamp when the rate limit window resets + schema: + type: integer + example: 1748908800 + + # PARAMETERS + + parameters: + Limit: + name: limit + in: query + description: Maximum number of results to return (1–100) + schema: + type: integer + minimum: 1 + maximum: 100 + default: 50 + + Offset: + name: offset + in: query + description: Number of results to skip for pagination + schema: + type: integer + minimum: 0 + default: 0 + + Cursor: + name: cursor + in: query + description: > + Opaque cursor token for cursor-based pagination. Do not mix with offset. + Obtain next_cursor from the previous response's pagination object. + schema: + type: string + maxLength: 255 + + RunId: + name: run_id + in: path + required: true + description: Numeric run ID + schema: + type: integer + minimum: 1 + + SampleId: + name: sample_id + in: path + required: true + description: Numeric sample or regression result ID + schema: + type: integer + minimum: 1 + + RunStatus: + name: status + in: query + description: > + Normalized run status. Derived from TestProgress rows and TestResult + outcomes. The underlying TestStatus model stores only preparation, + testing, completed, and canceled (where canceled covers both canceled + and error). This enum is the normalized API contract. + schema: + type: string + enum: [queued, running, pass, fail, canceled, error, incomplete] + example: pass + + Branch: + name: branch + in: query + description: Filter by branch name (e.g. master, develop). + schema: + type: string + maxLength: 100 + example: master + + CommitSha: + name: commit_sha + in: query + description: > + Filter by full 40-character SHA-1 commit hash. + schema: + type: string + pattern: '^[a-fA-F0-9]{40}$' + example: 0b1a967b732898e705ea8f2fda5d08eb00328579 + + Repository: + name: repository + in: query + description: > + Filter by GitHub repository in owner/repo format. + schema: + type: string + pattern: '^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$' + maxLength: 100 + example: CCExtractor/ccextractor + + Platform: + name: platform + in: query + schema: + type: string + enum: [linux, windows] + example: linux + + CreatedAfter: + name: created_after + in: query + description: > + ISO 8601 datetime filter. Returns runs created after this time. + Example: 2025-01-01T00:00:00Z + schema: + type: string + format: date-time + + CreatedBefore: + name: created_before + in: query + description: > + ISO 8601 datetime filter. Returns runs created before this time. + Example: 2026-12-31T23:59:59Z + schema: + type: string + format: date-time + + RegressionId: + name: regression_id + in: query + required: true + description: Regression test definition ID + schema: + type: integer + minimum: 1 + + OutputId: + name: output_id + in: query + required: true + description: Output file ID within a regression test definition + schema: + type: integer + minimum: 1 + + Format: + name: format + in: query + description: > + Content encoding for file responses. + Use text only when the file is known to be UTF-8 compatible. + Binary or unknown content defaults to base64. + schema: + type: string + enum: [text, base64] + default: base64 + + # RESPONSES + + responses: + BadRequest: + description: Request body or query parameters failed schema validation + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: validation_error + message: Request failed schema validation. + details: + fields: + commit_sha: Must match pattern ^[a-fA-F0-9]{40}$ + platform: Must be one of [linux, windows] + + Unauthorized: + description: Missing, expired, or invalid bearer token + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: unauthorized + message: Bearer token is missing, expired, or invalid. + details: {} + + Forbidden: + description: Token is valid but lacks the required scope or role + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: forbidden + message: Token does not have the required scope for this operation. + details: + required_scope: runs:write + token_scopes: [runs:read, results:read] + + NotFound: + description: Resource not found + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: not_found + message: Run 9317 not found. + details: + resource: run + id: 9317 + + UnprocessableEntity: + description: Request is valid JSON but semantically invalid + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: unprocessable + message: regression_test_ids contains inactive test IDs. + details: + inactive_ids: [42, 99] + + RateLimited: + description: Too many requests. Retry after the indicated number of seconds. + headers: + Retry-After: + description: Seconds to wait before retrying + schema: + type: integer + example: 30 + X-RateLimit-Limit: + $ref: "#/components/headers/RateLimitLimit" + X-RateLimit-Remaining: + $ref: "#/components/headers/RateLimitRemaining" + X-RateLimit-Reset: + $ref: "#/components/headers/RateLimitReset" + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + code: rate_limited + message: Rate limit exceeded. Retry after 30 seconds. + details: + retry_after: 30 + limit: 120 + window: 60s + + Error: + description: Unexpected server error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + + # SCHEMAS + + schemas: + + Page: + type: object + required: [data, pagination] + properties: + data: + type: array + description: > + Result items. The concrete type is defined by allOf composition + in each endpoint response. + items: {} + pagination: + type: object + required: [limit, offset, total] + properties: + limit: + type: integer + minimum: 1 + offset: + type: integer + minimum: 0 + total: + type: integer + minimum: 0 + next_offset: + type: integer + minimum: 0 + nullable: true + + CursorPage: + type: object + required: [data, pagination] + properties: + data: + type: array + description: > + Result items. The concrete type is defined by allOf composition + in each endpoint response. + items: {} + pagination: + type: object + required: [limit, next_cursor] + properties: + limit: + type: integer + minimum: 1 + next_cursor: + type: string + maxLength: 255 + nullable: true + description: > + Opaque cursor for the next page. Null when there are no + more results. + + ErrorResponse: + type: object + required: [code, message, details] + properties: + code: + type: string + maxLength: 100 + description: Machine-readable error code (snake_case) + example: not_found + message: + type: string + maxLength: 500 + description: Human-readable error summary + example: Run 9317 not found. + details: + type: object + additionalProperties: true + description: > + Structured context for the error. Always an object, never null. + Empty object {} when no additional detail is available. + + ApiTokenItem: + type: object + description: > + Token metadata returned when listing tokens. The plaintext token + value is never included - it is shown only once at creation time. + required: [id, user_id, token_name, token_prefix, scopes, created_at, expires_at, is_revoked] + properties: + id: + type: integer + minimum: 1 + user_id: + type: integer + minimum: 1 + description: Owner of the token. Visible to admins when listing all tokens. + token_name: + type: string + maxLength: 50 + token_prefix: + type: string + maxLength: 20 + description: First few characters of the token for identification. + scopes: + type: array + maxItems: 8 + uniqueItems: true + items: + type: string + enum: [runs:read, runs:write, results:read, baselines:write, system:read, tokens:manage] + created_at: + type: string + format: date-time + expires_at: + type: string + format: date-time + is_revoked: + type: boolean + description: True if the token has been explicitly revoked. + revoked_at: + type: string + format: date-time + nullable: true + + TokenCreateRequest: + type: object + required: [email, password, token_name] + additionalProperties: false + properties: + email: + type: string + format: email + maxLength: 255 + password: + type: string + format: password + minLength: 8 + maxLength: 128 + description: Not stored or logged. Used only to verify identity. + token_name: + type: string + minLength: 1 + maxLength: 50 + pattern: '^[a-zA-Z0-9_-]+$' + description: > + Descriptive label for the token (e.g., local-agent, ci-bot). + Must be unique per user. + expires_in_days: + type: integer + minimum: 1 + maximum: 90 + default: 30 + scopes: + type: array + maxItems: 8 + uniqueItems: true + default: [runs:read, results:read] + items: + type: string + enum: [runs:read, runs:write, results:read, baselines:write, system:read, tokens:manage] + description: > + Requested scopes. Grant only what the client needs. + runs:read — list and inspect runs, samples, history. + runs:write — trigger and cancel runs. + results:read — access expected/actual output, diffs, errors, logs. + baselines:write — approve new expected baselines. + system:read — queue, infrastructure errors, stack traces, artifacts. + tokens:manage — list and revoke API tokens. + + AuthToken: + type: object + required: [token, token_type, token_name, scopes, expires_at] + properties: + token: + type: string + maxLength: 512 + description: > + Opaque token value. Store it securely. It will not be shown again. + token_type: + type: string + enum: [bearer] + token_name: + type: string + maxLength: 50 + scopes: + type: array + maxItems: 8 + uniqueItems: true + items: + type: string + enum: [runs:read, runs:write, results:read, baselines:write, system:read, tokens:manage] + expires_at: + type: string + format: date-time + + RunCreateRequest: + type: object + required: [repository, commit_sha, platform] + additionalProperties: false + properties: + repository: + type: string + pattern: '^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$' + maxLength: 100 + example: CCExtractor/ccextractor + branch: + type: string + pattern: '^[A-Za-z0-9._\/-]+$' + maxLength: 100 + example: master + commit_sha: + type: string + pattern: '^[a-fA-F0-9]{40}$' + example: 0632bff4e382d5f86eff9073b9ddd37f03f9778c + pull_request: + type: integer + minimum: 1 + nullable: true + example: 2264 + platform: + type: string + enum: [linux, windows] + example: windows + regression_test_ids: + type: array + maxItems: 500 + uniqueItems: true + items: + type: integer + minimum: 1 + description: > + Optional subset of active regression test IDs. + If omitted, all active tests are used. + Inactive test IDs are rejected with 422. + + Run: + type: object + required: [run_id, status, repository, commit_sha, platform, created_at] + properties: + run_id: + type: integer + minimum: 1 + status: + type: string + enum: [queued, running, pass, fail, canceled, error, incomplete] + description: > + Normalized status. Derived from TestProgress rows and TestResult + outcomes. status=canceled covers both explicit cancellation and + infrastructure error (the underlying model conflates them). + platform: + type: string + enum: [linux, windows] + test_type: + type: string + enum: [pr, commit] + description: Whether this run was triggered by a pull request or a commit push. + repository: + type: string + maxLength: 100 + branch: + type: string + maxLength: 100 + nullable: true + commit_sha: + type: string + pattern: '^[a-fA-F0-9]{40}$' + pr_number: + type: integer + minimum: 1 + nullable: true + description: Pull request number, if this run was triggered by a PR. + created_at: + type: string + format: date-time + queued_at: + type: string + format: date-time + nullable: true + started_at: + type: string + format: date-time + nullable: true + completed_at: + type: string + format: date-time + nullable: true + github_link: + type: string + format: uri + nullable: true + description: Direct link to the commit or PR on GitHub. + + RunSummary: + type: object + required: [run_id, status, total_samples, pass_count, fail_count, skipped_count, missing_output_count] + properties: + run_id: + type: integer + minimum: 1 + status: + type: string + enum: [queued, running, pass, fail, canceled, error, incomplete] + description: > + Overall run status at the time the summary was generated. + Same derivation as Run.status. + total_samples: + type: integer + minimum: 0 + description: Total regression test results in this run. + pass_count: + type: integer + minimum: 0 + fail_count: + type: integer + minimum: 0 + description: > + Computed from TestResult rows. NOT derived from test.failed, + which only reflects cancellation state and is unreliable for + determining whether regression tests actually passed. + skipped_count: + type: integer + minimum: 0 + missing_output_count: + type: integer + minimum: 0 + description: > + Samples that produced no output when output was expected. + Detected from the dummy TestResultFile(-1,-1,-1,'','error') row, + not from got=null (which means output matched). + error_count: + type: integer + minimum: 0 + duration_ms: + type: integer + minimum: 0 + nullable: true + triggered_by: + type: string + maxLength: 100 + nullable: true + + ProgressEvent: + type: object + required: [timestamp, status, message] + properties: + timestamp: + type: string + format: date-time + status: + type: string + enum: [queued, preparation, testing, completed, canceled, error] + message: + type: string + maxLength: 500 + description: Unstructured text from TestProgress rows. + step: + type: integer + minimum: 0 + nullable: true + + RunActionResult: + type: object + required: [run_id, action, status] + properties: + run_id: + type: integer + minimum: 1 + description: ID of the run this action targets. + new_run_id: + type: integer + minimum: 1 + nullable: true + description: Reserved for future use. Always null for cancel actions. + action: + type: string + enum: [cancel] + status: + type: string + enum: [accepted, rejected, no_op] + description: no_op is returned when canceling an already-terminal run. + message: + type: string + maxLength: 500 + + RunConfig: + type: object + required: [run_id] + properties: + run_id: + type: integer + minimum: 1 + matrix: + type: array + maxItems: 500 + items: + type: object + required: [regression_test_id] + properties: + regression_test_id: + type: integer + minimum: 1 + sample_name: + type: string + maxLength: 255 + nullable: true + command: + type: string + maxLength: 500 + input_type: + type: string + maxLength: 50 + nullable: true + output_type: + type: string + maxLength: 50 + nullable: true + additionalProperties: true + regression_test_ids: + type: array + maxItems: 500 + uniqueItems: true + items: + type: integer + minimum: 1 + description: > + IDs included in this run. When no custom set was configured, all + regression tests are returned. Implementers must filter by + active=true — get_customized_regressiontests() does not do this. + command_defaults: + type: array + maxItems: 50 + items: + type: string + maxLength: 100 + + Sample: + type: object + required: [sample_id, sha] + properties: + sample_id: + type: integer + minimum: 1 + sha: + type: string + pattern: '^[a-fA-F0-9]{64}$' + description: SHA256 hash of the sample file. + extension: + type: string + maxLength: 10 + original_name: + type: string + maxLength: 255 + filename: + type: string + maxLength: 255 + tags: + type: array + maxItems: 50 + items: + type: string + maxLength: 50 + regression_test_count: + type: integer + minimum: 0 + description: Number of active regression tests referencing this sample. + active: + type: boolean + description: True if at least one active regression test references this sample. + + RegressionTest: + type: object + required: [regression_test_id, sample_id, command] + properties: + regression_test_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + sample_name: + type: string + maxLength: 255 + nullable: true + command: + type: string + maxLength: 500 + input_type: + type: string + maxLength: 50 + output_type: + type: string + maxLength: 50 + expected_rc: + type: integer + nullable: true + active: + type: boolean + categories: + type: array + maxItems: 50 + items: + type: string + maxLength: 100 + description: + type: string + maxLength: 1000 + nullable: true + + RunSample: + type: object + required: [regression_test_id, sample_id, status] + properties: + regression_test_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + nullable: true + sample_name: + type: string + maxLength: 255 + nullable: true + category: + type: string + maxLength: 100 + nullable: true + command: + type: string + maxLength: 500 + nullable: true + status: + type: string + enum: [pass, fail, skipped, missing_output, running, not_started] + description: > + Computed from TestResult, TestResultFile, expected exit code, + and multiple acceptable baselines. Not a stored column. + runtime_ms: + type: integer + minimum: 0 + nullable: true + exit_code: + type: integer + nullable: true + expected_rc: + type: integer + nullable: true + description: Expected return code for this regression test. + outputs: + type: array + maxItems: 20 + description: > + One entry per expected output file. + got=null in the DB means output matched expected; no actual file + is stored. The dummy (-1,-1,-1,'','error') row is translated to + status=missing_output and is never exposed here. + items: + type: object + required: [output_id, filename, status] + additionalProperties: false + properties: + output_id: + type: integer + minimum: 1 + filename: + type: string + maxLength: 255 + status: + type: string + enum: [match, diff_mismatch, missing_output, missing_expected] + description: > + match = actual identical to expected. + diff_mismatch = actual differs from expected. + missing_output = test produced no output. + missing_expected = no expected baseline exists. + + SampleHistoryEntry: + type: object + required: [run_id, status] + properties: + run_id: + type: integer + minimum: 1 + status: + type: string + enum: [pass, fail, skipped, missing_output] + platform: + type: string + enum: [linux, windows] + branch: + type: string + maxLength: 100 + nullable: true + commit_sha: + type: string + pattern: '^[a-fA-F0-9]{40}$' + nullable: true + tested_at: + type: string + format: date-time + nullable: true + description: completed_at or started_at timestamp from the run. + failure_signature: + type: string + maxLength: 255 + nullable: true + description: > + Stable string identifying the failure type and output ID. + Use across runs to detect genuine regressions vs. infrastructure + flakes. + + OutputFile: + type: object + required: [sample_id, regression_id, output_id, filename, content_type, encoding, content, storage_status] + properties: + run_id: + type: integer + minimum: 1 + nullable: true + description: Null for expected output not tied to a specific run. + sample_id: + type: integer + minimum: 1 + regression_id: + type: integer + minimum: 1 + output_id: + type: integer + minimum: 1 + filename: + type: string + maxLength: 255 + content_type: + type: string + maxLength: 100 + encoding: + type: string + enum: [utf-8, base64] + description: > + utf-8 only when file is confirmed text. Default is base64. + content: + type: string + maxLength: 1048576 + sha256: + type: string + pattern: '^[a-fA-F0-9]{64}$' + storage_status: + type: string + enum: [ok, degraded, missing] + description: > + ok = file verified in at least one storage backend. + degraded = file exists but integrity or redundancy check failed. + missing = file not found in any storage backend. + + Diff: + type: object + required: [run_id, sample_id, regression_id, output_id, status] + properties: + run_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + regression_id: + type: integer + minimum: 1 + output_id: + type: integer + minimum: 1 + status: + type: string + enum: [identical, different, missing_expected, missing_actual] + summary: + type: object + required: [added_lines, removed_lines, changed_hunks] + properties: + added_lines: + type: integer + minimum: 0 + removed_lines: + type: integer + minimum: 0 + changed_hunks: + type: integer + minimum: 0 + hunks: + type: array + maxItems: 500 + items: + type: object + required: [expected_start, actual_start, lines] + additionalProperties: false + properties: + expected_start: + type: integer + minimum: 0 + actual_start: + type: integer + minimum: 0 + lines: + type: array + maxItems: 500 + items: + type: object + required: [kind, text] + additionalProperties: false + properties: + kind: + type: string + enum: [context, added, removed] + expected_line: + type: integer + minimum: 0 + nullable: true + actual_line: + type: integer + minimum: 0 + nullable: true + text: + type: string + maxLength: 1000 + + BaselineApprovalRequest: + type: object + required: [regression_id, output_id, reason] + additionalProperties: false + properties: + regression_id: + type: integer + minimum: 1 + output_id: + type: integer + minimum: 1 + reason: + type: string + minLength: 10 + maxLength: 500 + description: > + Required justification stored in the audit log. Minimum 10 + characters; do not accept placeholder values. + remove_variants: + type: boolean + default: false + description: > + If true, remove all platform-specific variants and use this + output as the single baseline across all platforms. + WARNING: This collapses platform-specific expected outputs into one. + + BaselineApproval: + type: object + required: [approval_id, status, run_id, sample_id, regression_id, output_id, requested_by, created_at] + properties: + approval_id: + type: string + maxLength: 100 + status: + type: string + enum: [approved] + run_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + regression_id: + type: integer + minimum: 1 + output_id: + type: integer + minimum: 1 + requested_by: + type: string + maxLength: 100 + description: Display name of the user who requested the approval. + created_at: + type: string + format: date-time + + ErrorItem: + type: object + required: [error_id, run_id, type, severity, message, occurred_at] + properties: + error_id: + type: string + maxLength: 100 + run_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + nullable: true + regression_id: + type: integer + minimum: 1 + nullable: true + type: + type: string + enum: [test_failure, exit_code_mismatch, missing_output, diff_mismatch, queue, vm_provisioning, checkout, merge, build, worker, web_server, storage] + maxLength: 100 + severity: + type: string + enum: [info, warning, error, critical] + message: + type: string + maxLength: 1000 + location: + type: object + nullable: true + additionalProperties: true + properties: + file: + type: string + maxLength: 500 + nullable: true + line: + type: integer + minimum: 0 + nullable: true + column: + type: integer + minimum: 0 + nullable: true + sample_name: + type: string + maxLength: 255 + nullable: true + stack: + type: array + maxItems: 50 + description: Only present when include_stack=true was requested. + items: + type: string + maxLength: 2000 + occurred_at: + type: string + format: date-time + + LogLine: + type: object + required: [timestamp, level, source, message, run_id] + properties: + timestamp: + type: string + format: date-time + level: + type: string + enum: [debug, info, warning, error, critical] + source: + type: string + enum: [orchestrator, worker, build, test_runner, web] + message: + type: string + maxLength: 4000 + run_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + nullable: true + + ErrorSummaryBucket: + type: object + required: [key, count, severity] + properties: + key: + type: string + maxLength: 100 + count: + type: integer + minimum: 0 + severity: + type: string + enum: [info, warning, error, critical] + sample_ids: + type: array + maxItems: 1000 + items: + type: integer + minimum: 1 + first_seen_at: + type: string + format: date-time + nullable: true + last_seen_at: + type: string + format: date-time + nullable: true + + SystemHealth: + type: object + required: [status, checked_at, dependencies] + properties: + status: + type: string + enum: [ok, degraded, down] + checked_at: + type: string + format: date-time + dependencies: + type: array + maxItems: 20 + items: + type: object + required: [name, status] + properties: + name: + type: string + maxLength: 100 + status: + type: string + enum: [ok, degraded, down] + message: + type: string + maxLength: 500 + nullable: true + + QueueJob: + type: object + required: [run_id, status, platform, queued_at] + properties: + run_id: + type: integer + minimum: 1 + status: + type: string + enum: [queued, running] + platform: + type: string + enum: [linux, windows] + queued_at: + type: string + format: date-time + started_at: + type: string + format: date-time + nullable: true + position: + type: integer + minimum: 1 + nullable: true + description: Queue position. Null for jobs that are already running. + + Artifact: + type: object + required: [artifact_id, run_id, type, filename, content_type, storage_status] + properties: + artifact_id: + type: string + maxLength: 100 + run_id: + type: integer + minimum: 1 + sample_id: + type: integer + minimum: 1 + nullable: true + type: + type: string + enum: [build_log, sample_output, expected_output, diff, media_info, binary] + filename: + type: string + maxLength: 255 + content_type: + type: string + maxLength: 100 + size_bytes: + type: integer + minimum: 0 + nullable: true + storage_status: + type: string + enum: [ok, degraded, missing] + description: > + ok = file verified in at least one storage backend. + degraded = file exists but integrity or redundancy check failed. + missing = file not found in any storage backend. + download_url: + type: string + format: uri + nullable: true + description: > + Only present and non-null when storage_status is ok or degraded. + Always a verified URL. Null when storage_status=missing. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4aaae11e..ae684782 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,6 @@ PyGithub==2.9.1 blinker==1.9.0 click==8.3.3 PyYAML==6.0.3 +marshmallow>=3.21 +argon2-cffi>=23.0 +Flask-Limiter>=3.5 diff --git a/run.py b/run.py index e277c6d9..23e43456 100755 --- a/run.py +++ b/run.py @@ -24,6 +24,7 @@ SecretKeyInstallationException) from log_configuration import LogConfiguration from mailer import Mailer +from mod_api import mod_api from mod_auth.controllers import mod_auth from mod_ci.controllers import mod_ci from mod_customized.controllers import mod_customized @@ -273,3 +274,5 @@ def teardown(exception: Optional[Exception]): app.register_blueprint(mod_ci) app.register_blueprint(mod_customized, url_prefix='/custom') app.register_blueprint(mod_health) +# REST API v1 +app.register_blueprint(mod_api, url_prefix='/api/v1')