From a7ffbb792b2e5fdb881708ed0c35f0f62d8018f0 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Fri, 5 Jun 2026 10:20:14 -0400 Subject: [PATCH 1/3] fix(api): accept presigned s3Key for MCP uploads (attachment/document/evidence) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Customer-reported: the MCP server times out on uploads. Root cause is the documented one — these tools take the whole file as base64 inside the tool argument, so the LLM must emit the entire file token-by-token, which is impractically slow and hits the client's ~4-min timeout. Questionnaire and policy-PDF were already migrated to the presigned-upload pattern; the rest were not. Migrates the three tools the customer hit to the same pattern: accept an optional `s3Key` (from POST /v1/uploads/presign) alongside `fileData`. The service resolves the bytes from whichever is provided via UploadsService.readUploadAsBase64, which enforces that the key belongs to the caller's org. The MCP overlay strips `fileData` from these tools so agents must use create-upload-url -> PUT to S3 -> pass the s3Key; the base spec keeps fileData for the web UI / direct callers. - attachments (upload-task-attachment): DTO + service + module - knowledge-base (upload-document): DTO + service + module; also added the missing @ApiProperty decorators so the MCP tool finally has a real schema - offboarding-checklist (complete-checklist-item, upload-evidence): DTO + service; delegates to the now-fixed AttachmentsService - UploadPurpose: added `document` - regenerated packages/docs/openapi.json (carries the new s3Key fields; also synced some pre-existing drift that was stale in the committed spec) Tests: new attachments.service.spec (s3Key path + neither->400); extended knowledge-base.service.spec (s3Key path + neither->400); offboarding specs still green. typecheck clean; AppModule boots (DI wiring verified). The MCP generator stays healthy: 0 operations declare more than one security scheme. Out of scope (separate issues): create-version automation script-generation also times out, but that is heavy synchronous AI work, not base64 — needs an async job pattern. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../api/src/attachments/attachments.module.ts | 4 +- .../attachments/attachments.service.spec.ts | 86 +++ .../src/attachments/attachments.service.ts | 23 +- .../src/attachments/upload-attachment.dto.ts | 18 +- .../knowledge-base/dto/upload-document.dto.ts | 26 +- .../knowledge-base/knowledge-base.module.ts | 3 +- .../knowledge-base.service.spec.ts | 54 +- .../knowledge-base/knowledge-base.service.ts | 25 +- .../dto/complete-checklist-item.dto.ts | 12 +- .../offboarding-checklist.service.ts | 19 +- .../src/uploads/dto/create-upload-url.dto.ts | 1 + .../.speakeasy/mcp-uploads-overlay.yaml | 14 + packages/docs/openapi.json | 642 +++++++++++------- 13 files changed, 657 insertions(+), 270 deletions(-) create mode 100644 apps/api/src/attachments/attachments.service.spec.ts diff --git a/apps/api/src/attachments/attachments.module.ts b/apps/api/src/attachments/attachments.module.ts index 52999437e8..dc8cab4b32 100644 --- a/apps/api/src/attachments/attachments.module.ts +++ b/apps/api/src/attachments/attachments.module.ts @@ -1,10 +1,12 @@ import { Module } from '@nestjs/common'; import { AuthModule } from '../auth/auth.module'; +import { UploadsModule } from '../uploads/uploads.module'; import { AttachmentsController } from './attachments.controller'; import { AttachmentsService } from './attachments.service'; @Module({ - imports: [AuthModule], // Import AuthModule for HybridAuthGuard dependencies + // AuthModule: HybridAuthGuard deps. UploadsModule: presigned-upload s3Key reads. + imports: [AuthModule, UploadsModule], controllers: [AttachmentsController], providers: [AttachmentsService], exports: [AttachmentsService], diff --git a/apps/api/src/attachments/attachments.service.spec.ts b/apps/api/src/attachments/attachments.service.spec.ts new file mode 100644 index 0000000000..cde9ffe991 --- /dev/null +++ b/apps/api/src/attachments/attachments.service.spec.ts @@ -0,0 +1,86 @@ +import { BadRequestException } from '@nestjs/common'; + +// Mocks must be declared before importing the service under test. +jest.mock('@/app/s3', () => ({ + s3Client: { send: jest.fn().mockResolvedValue({}) }, + getSignedUrl: jest.fn().mockResolvedValue('https://signed.example/file'), +})); + +jest.mock('@db', () => ({ + db: { attachment: { create: jest.fn() } }, + AttachmentType: { + image: 'image', + video: 'video', + audio: 'audio', + document: 'document', + other: 'other', + }, + AttachmentEntityType: { task: 'task', offboarding_checklist: 'offboarding_checklist' }, +})); + +jest.mock('../utils/file-type-validation', () => ({ + validateFileContent: jest.fn(), +})); + +import { db } from '@db'; +import { AttachmentsService } from './attachments.service'; + +const mockUploadsService = { readUploadAsBase64: jest.fn() }; + +describe('AttachmentsService — presigned s3Key uploads', () => { + let service: AttachmentsService; + + beforeEach(() => { + jest.clearAllMocks(); + process.env.APP_AWS_BUCKET_NAME = 'test-bucket'; + service = new AttachmentsService(mockUploadsService as never); + }); + + it('resolves the file from s3Key (presigned) — no base64 through the LLM — and uploads it', async () => { + mockUploadsService.readUploadAsBase64.mockResolvedValue( + Buffer.from('hello world').toString('base64'), + ); + (db.attachment.create as jest.Mock).mockResolvedValue({ + id: 'att_1', + name: 'rbac.pdf', + type: 'document', + url: 'org_1/attachments/task/tsk_1/key', + createdAt: new Date(), + }); + + const result = await service.uploadAttachment( + 'org_1', + 'tsk_1', + 'task' as never, + { + fileName: 'rbac.pdf', + fileType: 'application/pdf', + s3Key: 'org_1/uploads/attachment/123-rbac.pdf', + } as never, + 'usr_1', + ); + + // Fetched the bytes from the org-scoped presigned key instead of base64. + expect(mockUploadsService.readUploadAsBase64).toHaveBeenCalledWith( + 'org_1', + 'org_1/uploads/attachment/123-rbac.pdf', + ); + expect(db.attachment.create).toHaveBeenCalled(); + expect(result.id).toBe('att_1'); + }); + + it('throws when neither fileData nor s3Key is provided', async () => { + await expect( + service.uploadAttachment( + 'org_1', + 'tsk_1', + 'task' as never, + { fileName: 'rbac.pdf', fileType: 'application/pdf' } as never, + 'usr_1', + ), + ).rejects.toBeInstanceOf(BadRequestException); + + expect(mockUploadsService.readUploadAsBase64).not.toHaveBeenCalled(); + expect(db.attachment.create).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/api/src/attachments/attachments.service.ts b/apps/api/src/attachments/attachments.service.ts index 6bb88bed94..849037373a 100644 --- a/apps/api/src/attachments/attachments.service.ts +++ b/apps/api/src/attachments/attachments.service.ts @@ -15,6 +15,7 @@ import { import { randomBytes } from 'crypto'; import { AttachmentResponseDto } from '../tasks/dto/task-responses.dto'; import { UploadAttachmentDto } from './upload-attachment.dto'; +import { UploadsService } from '../uploads/uploads.service'; import { validateFileContent } from '../utils/file-type-validation'; @Injectable() @@ -24,7 +25,7 @@ export class AttachmentsService { private readonly MAX_FILE_SIZE_BYTES = 100 * 1024 * 1024; // 100MB private readonly SIGNED_URL_EXPIRY = 900; // 15 minutes - constructor() { + constructor(private readonly uploadsService: UploadsService) { // AWS configuration is validated at startup via ConfigModule // Safe to access environment variables directly since they're validated this.bucketName = process.env.APP_AWS_BUCKET_NAME!; @@ -115,8 +116,26 @@ export class AttachmentsService { ); } + // Resolve the file content from either inline base64 (UI/direct callers) + // or a presigned-upload s3Key (AI/MCP clients — avoids slow base64 through + // an LLM). readUploadAsBase64 enforces that the key belongs to this org. + const fileData = + uploadDto.fileData ?? + (uploadDto.s3Key + ? await this.uploadsService.readUploadAsBase64( + organizationId, + uploadDto.s3Key, + ) + : undefined); + + if (!fileData) { + throw new BadRequestException( + 'Provide either fileData (base64) or s3Key from /v1/uploads/presign.', + ); + } + // Validate file size - const fileBuffer = Buffer.from(uploadDto.fileData, 'base64'); + const fileBuffer = Buffer.from(fileData, 'base64'); if (fileBuffer.length > this.MAX_FILE_SIZE_BYTES) { throw new BadRequestException( `File size exceeds maximum allowed size of ${this.MAX_FILE_SIZE_BYTES / (1024 * 1024)}MB`, diff --git a/apps/api/src/attachments/upload-attachment.dto.ts b/apps/api/src/attachments/upload-attachment.dto.ts index 6b950486ec..8f8db20a9e 100644 --- a/apps/api/src/attachments/upload-attachment.dto.ts +++ b/apps/api/src/attachments/upload-attachment.dto.ts @@ -29,15 +29,29 @@ export class UploadAttachmentDto { fileType: string; @ApiProperty({ - description: 'Base64 encoded file data', + description: + 'Base64-encoded file contents. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=attachment) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide exactly one of fileData or s3Key.', + required: false, example: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==', }) + @IsOptional() @IsString() @IsNotEmpty() @MaxLength(134_217_728) @IsBase64() - fileData: string; + fileData?: string; + + @ApiProperty({ + description: + 'Key of a file already uploaded via /v1/uploads/presign (purpose=attachment). The server fetches the bytes from storage — no base64 needed. Provide exactly one of fileData or s3Key.', + required: false, + example: 'org_abc123/uploads/attachment/1700000000000-rbac-matrix.xlsx', + }) + @IsOptional() + @IsString() + @IsNotEmpty() + s3Key?: string; @ApiProperty({ description: 'Description of the attachment', diff --git a/apps/api/src/knowledge-base/dto/upload-document.dto.ts b/apps/api/src/knowledge-base/dto/upload-document.dto.ts index 5240521176..557d2a0d13 100644 --- a/apps/api/src/knowledge-base/dto/upload-document.dto.ts +++ b/apps/api/src/knowledge-base/dto/upload-document.dto.ts @@ -1,18 +1,42 @@ +import { ApiProperty } from '@nestjs/swagger'; import { IsOptional, IsString } from 'class-validator'; export class UploadDocumentDto { + @ApiProperty({ description: 'Organization ID that owns the document' }) @IsString() organizationId!: string; + @ApiProperty({ description: 'File name', example: 'rbac-matrix.xlsx' }) @IsString() fileName!: string; + @ApiProperty({ + description: 'MIME type of the file', + example: + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + }) @IsString() fileType!: string; + @ApiProperty({ + description: + 'Base64-encoded file contents. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=document) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide exactly one of fileData or s3Key.', + required: false, + }) + @IsOptional() + @IsString() + fileData?: string; // base64 encoded + + @ApiProperty({ + description: + 'Key of a file already uploaded via /v1/uploads/presign (purpose=document). The server fetches the bytes from storage — no base64 needed. Provide exactly one of fileData or s3Key.', + required: false, + }) + @IsOptional() @IsString() - fileData!: string; // base64 encoded + s3Key?: string; + @ApiProperty({ description: 'Optional description', required: false }) @IsOptional() @IsString() description?: string; diff --git a/apps/api/src/knowledge-base/knowledge-base.module.ts b/apps/api/src/knowledge-base/knowledge-base.module.ts index bce607bd61..2774e28ed3 100644 --- a/apps/api/src/knowledge-base/knowledge-base.module.ts +++ b/apps/api/src/knowledge-base/knowledge-base.module.ts @@ -1,10 +1,11 @@ import { Module } from '@nestjs/common'; import { AuthModule } from '../auth/auth.module'; +import { UploadsModule } from '../uploads/uploads.module'; import { KnowledgeBaseController } from './knowledge-base.controller'; import { KnowledgeBaseService } from './knowledge-base.service'; @Module({ - imports: [AuthModule], + imports: [AuthModule, UploadsModule], controllers: [KnowledgeBaseController], providers: [KnowledgeBaseService], }) diff --git a/apps/api/src/knowledge-base/knowledge-base.service.spec.ts b/apps/api/src/knowledge-base/knowledge-base.service.spec.ts index 3b829abce4..efe03abc0c 100644 --- a/apps/api/src/knowledge-base/knowledge-base.service.spec.ts +++ b/apps/api/src/knowledge-base/knowledge-base.service.spec.ts @@ -1,5 +1,11 @@ +import { BadRequestException } from '@nestjs/common'; import { Test, TestingModule } from '@nestjs/testing'; import { KnowledgeBaseService } from './knowledge-base.service'; +import { UploadsService } from '../uploads/uploads.service'; + +const mockUploadsService = { + readUploadAsBase64: jest.fn(), +}; jest.mock('@db', () => ({ db: { @@ -68,7 +74,10 @@ describe('KnowledgeBaseService', () => { beforeEach(async () => { const module: TestingModule = await Test.createTestingModule({ - providers: [KnowledgeBaseService], + providers: [ + KnowledgeBaseService, + { provide: UploadsService, useValue: mockUploadsService }, + ], }).compile(); service = module.get(KnowledgeBaseService); @@ -215,6 +224,49 @@ describe('KnowledgeBaseService', () => { 'base64data', ); }); + + it('resolves content from s3Key (presigned upload) when no fileData', async () => { + mockUploadsService.readUploadAsBase64.mockResolvedValue('fromS3base64'); + (uploadToS3 as jest.Mock).mockResolvedValue({ + s3Key: 'org_1/doc.pdf', + fileSize: 2048, + }); + (mockDb.knowledgeBaseDocument.create as jest.Mock).mockResolvedValue({ + id: 'd2', + name: 'doc.pdf', + s3Key: 'org_1/doc.pdf', + }); + + await service.uploadDocument({ + organizationId: 'org_1', + fileName: 'doc.pdf', + fileType: 'application/pdf', + s3Key: 'org_1/uploads/document/123-doc.pdf', + } as any); + + // Fetched the bytes from the presigned key (org-scoped, no base64 via LLM) + expect(mockUploadsService.readUploadAsBase64).toHaveBeenCalledWith( + 'org_1', + 'org_1/uploads/document/123-doc.pdf', + ); + expect(uploadToS3).toHaveBeenCalledWith( + 'org_1', + 'doc.pdf', + 'application/pdf', + 'fromS3base64', + ); + }); + + it('throws when neither fileData nor s3Key is provided', async () => { + await expect( + service.uploadDocument({ + organizationId: 'org_1', + fileName: 'doc.pdf', + fileType: 'application/pdf', + } as any), + ).rejects.toBeInstanceOf(BadRequestException); + expect(uploadToS3).not.toHaveBeenCalled(); + }); }); describe('getDownloadUrl', () => { diff --git a/apps/api/src/knowledge-base/knowledge-base.service.ts b/apps/api/src/knowledge-base/knowledge-base.service.ts index 1b754192db..beb2612b98 100644 --- a/apps/api/src/knowledge-base/knowledge-base.service.ts +++ b/apps/api/src/knowledge-base/knowledge-base.service.ts @@ -1,8 +1,9 @@ -import { Injectable, Logger } from '@nestjs/common'; +import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { db } from '@db'; import { tasks, auth } from '@trigger.dev/sdk'; import { syncManualAnswerToVector } from '@/vector-store/lib'; import { UploadDocumentDto } from './dto/upload-document.dto'; +import { UploadsService } from '../uploads/uploads.service'; import { DeleteDocumentDto } from './dto/delete-document.dto'; import { GetDocumentUrlDto } from './dto/get-document-url.dto'; import { ProcessDocumentsDto } from './dto/process-documents.dto'; @@ -25,6 +26,8 @@ import { export class KnowledgeBaseService { private readonly logger = new Logger(KnowledgeBaseService.name); + constructor(private readonly uploadsService: UploadsService) {} + async listDocuments(organizationId: string) { return db.knowledgeBaseDocument.findMany({ where: { organizationId }, @@ -44,12 +47,30 @@ export class KnowledgeBaseService { } async uploadDocument(dto: UploadDocumentDto) { + // Resolve content from inline base64 (UI/direct) or a presigned-upload + // s3Key (AI/MCP clients — avoids slow base64 through an LLM). The read + // enforces that the key belongs to this org. + const fileData = + dto.fileData ?? + (dto.s3Key + ? await this.uploadsService.readUploadAsBase64( + dto.organizationId, + dto.s3Key, + ) + : undefined); + + if (!fileData) { + throw new BadRequestException( + 'Provide either fileData (base64) or s3Key from /v1/uploads/presign.', + ); + } + // Upload to S3 const { s3Key, fileSize } = await uploadToS3( dto.organizationId, dto.fileName, dto.fileType, - dto.fileData, + fileData, ); // Create database record diff --git a/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts b/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts index 017b7614a2..ced0b9f064 100644 --- a/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts +++ b/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts @@ -19,7 +19,8 @@ export class CompleteChecklistItemDto { fileType?: string; @ApiProperty({ - description: 'Base64 encoded evidence file', + description: + 'Base64-encoded evidence file. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=evidence) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide fileData or s3Key (not both).', required: false, }) @IsOptional() @@ -27,4 +28,13 @@ export class CompleteChecklistItemDto { @MaxLength(134_217_728) @IsBase64() fileData?: string; + + @ApiProperty({ + description: + 'Key of an evidence file already uploaded via /v1/uploads/presign (purpose=evidence). The server fetches the bytes from storage — no base64 needed. Provide fileData or s3Key (not both).', + required: false, + }) + @IsOptional() + @IsString() + s3Key?: string; } diff --git a/apps/api/src/offboarding-checklist/offboarding-checklist.service.ts b/apps/api/src/offboarding-checklist/offboarding-checklist.service.ts index 09a4378478..23fadea7cc 100644 --- a/apps/api/src/offboarding-checklist/offboarding-checklist.service.ts +++ b/apps/api/src/offboarding-checklist/offboarding-checklist.service.ts @@ -13,12 +13,15 @@ interface CompleteChecklistItemDto { fileName?: string; fileType?: string; fileData?: string; + s3Key?: string; } interface UploadEvidenceDto { fileName: string; fileType: string; - fileData: string; + // Either inline base64 (UI/direct) or a presigned-upload s3Key (AI/MCP). + fileData?: string; + s3Key?: string; description?: string; } @@ -211,7 +214,14 @@ export class OffboardingChecklistService { throw new NotFoundException('Template item not found'); } - if (template.evidenceRequired && (!dto.fileData || !dto.fileName || !dto.fileType)) { + // Evidence can arrive as inline base64 (fileData) or a presigned-upload + // s3Key (AI/MCP clients — avoids slow base64 through an LLM). + const hasEvidenceFile = Boolean(dto.fileData || dto.s3Key); + + if ( + template.evidenceRequired && + (!hasEvidenceFile || !dto.fileName || !dto.fileType) + ) { throw new BadRequestException('Evidence is required to complete this item'); } @@ -225,8 +235,10 @@ export class OffboardingChecklistService { }, }); - if (dto.fileName && dto.fileData && dto.fileType) { + if (dto.fileName && hasEvidenceFile && dto.fileType) { try { + // AttachmentsService.uploadAttachment resolves the bytes from whichever + // of fileData / s3Key is provided. await this.attachmentsService.uploadAttachment( organizationId, completion.id, @@ -234,6 +246,7 @@ export class OffboardingChecklistService { { fileName: dto.fileName, fileData: dto.fileData, + s3Key: dto.s3Key, fileType: dto.fileType, }, completedById, diff --git a/apps/api/src/uploads/dto/create-upload-url.dto.ts b/apps/api/src/uploads/dto/create-upload-url.dto.ts index 34776c779b..af2cf2a1e1 100644 --- a/apps/api/src/uploads/dto/create-upload-url.dto.ts +++ b/apps/api/src/uploads/dto/create-upload-url.dto.ts @@ -11,6 +11,7 @@ export enum UploadPurpose { policyPdf = 'policy_pdf', evidence = 'evidence', attachment = 'attachment', + document = 'document', general = 'general', } diff --git a/apps/mcp-server/.speakeasy/mcp-uploads-overlay.yaml b/apps/mcp-server/.speakeasy/mcp-uploads-overlay.yaml index 8c6e95b18e..083332aba5 100644 --- a/apps/mcp-server/.speakeasy/mcp-uploads-overlay.yaml +++ b/apps/mcp-server/.speakeasy/mcp-uploads-overlay.yaml @@ -79,3 +79,17 @@ actions: update: x-speakeasy-mcp: disabled: true + + # 7-9. Force presigned uploads for task attachments, knowledge-base documents, + # and offboarding evidence — same reason as the questionnaire tools above: + # base64 through an LLM is impractically slow and times out (customer-reported). + # Strip the inline base64 `fileData` from the MCP tool surface so agents must + # use create-upload-url (purpose=attachment|document|evidence) -> PUT to S3 -> + # pass the returned `s3Key`. The base spec keeps `fileData` for the web UI / + # direct callers; only the MCP tools lose it. + - target: "$.components.schemas.UploadAttachmentDto.properties.fileData" + remove: true + - target: "$.components.schemas.UploadDocumentDto.properties.fileData" + remove: true + - target: "$.components.schemas.CompleteChecklistItemDto.properties.fileData" + remove: true diff --git a/packages/docs/openapi.json b/packages/docs/openapi.json index 99f0678f1c..da59719315 100644 --- a/packages/docs/openapi.json +++ b/packages/docs/openapi.json @@ -2118,6 +2118,14 @@ "example": "mem_abc123def456", "type": "string" } + }, + { + "name": "skipOffboarding", + "required": true, + "in": "query", + "schema": { + "type": "string" + } } ], "responses": { @@ -3033,6 +3041,66 @@ } } }, + "/v1/uploads/presign": { + "post": { + "description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the s3Key instead of sending file data. Bytes never pass through the LLM.", + "operationId": "UploadsController_createUploadUrl_v1", + "parameters": [ + { + "name": "X-Organization-Id", + "in": "header", + "description": "Organization ID (required for session auth, optional for API key auth)", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateUploadUrlDto" + } + } + } + }, + "responses": { + "201": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UploadUrlResponseDto" + } + } + } + } + }, + "security": [ + { + "apikey": [] + } + ], + "summary": "Get a presigned URL to upload a file", + "tags": [ + "Uploads" + ], + "x-mint": { + "metadata": { + "title": "Get a presigned URL to upload a file | Comp AI API", + "sidebarTitle": "Get a presigned URL to upload a file", + "description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the.", + "og:title": "Get a presigned URL to upload a file | Comp AI API", + "og:description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the." + } + }, + "x-speakeasy-mcp": { + "name": "create-upload-url" + } + } + }, "/v1/timelines": { "get": { "operationId": "TimelinesController_findAll_v1", @@ -3269,18 +3337,11 @@ "name": "department", "required": false, "in": "query", - "description": "Filter by department", + "description": "Filter by department. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", "schema": { - "type": "string", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ] + "maxLength": 64, + "example": "it", + "type": "string" } }, { @@ -7106,9 +7167,9 @@ "metadata": { "title": "List compliance policies | Comp AI API", "sidebarTitle": "List compliance policies", - "description": "Lists active compliance policies by default. Use includeArchived=true to include archived rows.", + "description": "Lists active compliance policies by default. Use includeArchived=true to include archived rows and excludeContent=true when you only need policy metadata.", "og:title": "List compliance policies | Comp AI API", - "og:description": "Lists active compliance policies by default. Use includeArchived=true to include archived rows." + "og:description": "Lists active compliance policies by default. Use includeArchived=true to include archived rows and excludeContent=true when you only need policy metadata." } }, "x-codeSamples": [ @@ -7184,7 +7245,6 @@ "signedBy": [], "reviewDate": "2024-12-31T00:00:00.000Z", "isArchived": false, - "archivedAt": null, "createdAt": "2024-01-01T00:00:00.000Z", "updatedAt": "2024-01-15T00:00:00.000Z", "organizationId": "org_abc123def456", @@ -8261,7 +8321,6 @@ ], "reviewDate": "2024-12-31T00:00:00.000Z", "isArchived": false, - "archivedAt": null, "createdAt": "2024-01-01T00:00:00.000Z", "updatedAt": "2024-01-15T00:00:00.000Z", "organizationId": "org_abc123def456", @@ -10086,66 +10145,6 @@ } } }, - "/v1/uploads/presign": { - "post": { - "description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the s3Key instead of sending file data. Bytes never pass through the LLM.", - "operationId": "UploadsController_createUploadUrl_v1", - "parameters": [ - { - "name": "X-Organization-Id", - "in": "header", - "description": "Organization ID (required for session auth, optional for API key auth)", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateUploadUrlDto" - } - } - } - }, - "responses": { - "201": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UploadUrlResponseDto" - } - } - } - } - }, - "security": [ - { - "apikey": [] - } - ], - "summary": "Get a presigned URL to upload a file", - "tags": [ - "Uploads" - ], - "x-mint": { - "metadata": { - "title": "Get a presigned URL to upload a file | Comp AI API", - "sidebarTitle": "Get a presigned URL to upload a file", - "description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the.", - "og:title": "Get a presigned URL to upload a file | Comp AI API", - "og:description": "Returns a presigned S3 URL plus the s3Key the file lands at. PUT the raw file bytes to that URL, then call the feature tool (e.g. upload-and-parse) with the." - } - }, - "x-speakeasy-mcp": { - "name": "create-upload-url" - } - } - }, "/v1/tasks": { "get": { "description": "List compliance tasks with assignments and status so teams can track audit readiness, evidence work, and control implementation.", @@ -10269,17 +10268,10 @@ }, "department": { "type": "string", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], "nullable": true, - "example": "it" + "example": "it", + "maxLength": 64, + "description": "Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted." }, "controlIds": { "type": "array", @@ -10980,16 +10972,9 @@ }, "department": { "type": "string", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "example": "it", + "maxLength": 64, + "description": "Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted." }, "reviewDate": { "type": "string", @@ -17421,6 +17406,49 @@ } } }, + "/v1/soa/get-setup": { + "post": { + "operationId": "SOAController_getSetup_v1", + "parameters": [], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EnsureSOASetupDto" + } + } + } + }, + "responses": { + "200": { + "description": "Setup returned (configuration/document may be null)" + } + }, + "security": [ + { + "apikey": [] + } + ], + "summary": "Read SOA configuration and document without creating either", + "tags": [ + "SOA" + ], + "description": "Read SOA configuration and document without creating either in Comp AI. Create, auto-fill, review, approve, and export ISO 27001 Statement of Applicability documents.", + "x-mint": { + "metadata": { + "title": "Read SOA configuration and document without | Comp AI API", + "sidebarTitle": "Read SOA configuration and document without creating either", + "description": "Read SOA configuration and document without creating either in Comp AI. Create, auto-fill, review, approve, and export ISO 27001 Statement of Applicability.", + "og:title": "Read SOA configuration and document without | Comp AI API", + "og:description": "Read SOA configuration and document without creating either in Comp AI. Create, auto-fill, review, approve, and export ISO 27001 Statement of Applicability." + } + }, + "x-speakeasy-mcp": { + "name": "get-setup" + } + } + }, "/v1/soa/approve": { "post": { "operationId": "SOAController_approveDocument_v1", @@ -19102,10 +19130,83 @@ } } }, + "/v1/integrations/sync/device-sync-provider": { + "get": { + "operationId": "SyncController_getDeviceSyncProvider_v1", + "parameters": [], + "responses": { + "200": { + "description": "" + } + }, + "security": [ + { + "apikey": [] + } + ], + "summary": "Get the currently configured device sync provider", + "tags": [ + "Integrations" + ], + "description": "Get the currently configured device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect automated evidence.", + "x-mint": { + "metadata": { + "title": "Get the currently configured device sync | Comp AI API", + "sidebarTitle": "Get the currently configured device sync provider", + "description": "Get the currently configured device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage.", + "og:title": "Get the currently configured device sync | Comp AI API", + "og:description": "Get the currently configured device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage." + } + }, + "x-speakeasy-mcp": { + "name": "get-device-sync-provider" + } + }, + "post": { + "operationId": "SyncController_setDeviceSyncProvider_v1", + "parameters": [], + "responses": { + "201": { + "description": "" + } + }, + "security": [ + { + "apikey": [] + } + ], + "summary": "Set the device sync provider", + "tags": [ + "Integrations" + ], + "description": "Set the device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect automated evidence.", + "x-mint": { + "metadata": { + "title": "Set the device sync provider | Comp AI API", + "sidebarTitle": "Set the device sync provider", + "description": "Set the device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect.", + "og:title": "Set the device sync provider | Comp AI API", + "og:description": "Set the device sync provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect." + } + }, + "x-speakeasy-mcp": { + "name": "set-device-sync-provider" + } + } + }, "/v1/integrations/sync/available-providers": { "get": { "operationId": "SyncController_getAvailableSyncProviders_v1", - "parameters": [], + "parameters": [ + { + "name": "syncType", + "required": true, + "in": "query", + "schema": { + "type": "string" + } + } + ], "responses": { "200": { "description": "" @@ -19116,18 +19217,18 @@ "apikey": [] } ], - "summary": "List employee sync providers available to the org", + "summary": "List sync providers available to the org", "tags": [ "Integrations" ], - "description": "List employee sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect automated evidence.", + "description": "List sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect automated evidence.", "x-mint": { "metadata": { - "title": "List employee sync providers available to the | Comp AI API", - "sidebarTitle": "List employee sync providers available to the org", - "description": "List employee sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage.", - "og:title": "List employee sync providers available to the | Comp AI API", - "og:description": "List employee sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage." + "title": "List sync providers available to the org | Comp AI API", + "sidebarTitle": "List sync providers available to the org", + "description": "List sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables.", + "og:title": "List sync providers available to the org | Comp AI API", + "og:description": "List sync providers available to the org in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables." } }, "x-speakeasy-mcp": { @@ -19185,6 +19286,56 @@ } } }, + "/v1/integrations/sync/dynamic/{providerSlug}/devices": { + "post": { + "operationId": "SyncController_syncDynamicProviderDevices_v1", + "parameters": [ + { + "name": "providerSlug", + "required": true, + "in": "path", + "schema": { + "type": "string" + } + }, + { + "name": "connectionId", + "required": true, + "in": "query", + "schema": { + "type": "string" + } + } + ], + "responses": { + "201": { + "description": "" + } + }, + "security": [ + { + "apikey": [] + } + ], + "summary": "Sync devices for a dynamic provider", + "tags": [ + "Integrations" + ], + "description": "Sync devices for a dynamic provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables, and collect automated evidence.", + "x-mint": { + "metadata": { + "title": "Sync devices for a dynamic provider | Comp AI API", + "sidebarTitle": "Sync devices for a dynamic provider", + "description": "Sync devices for a dynamic provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables.", + "og:title": "Sync devices for a dynamic provider | Comp AI API", + "og:description": "Sync devices for a dynamic provider in Comp AI. Connect vendor systems, configure OAuth apps, run compliance checks, sync employees, manage variables." + } + }, + "x-speakeasy-mcp": { + "name": "sync-dynamic-provider-devices" + } + } + }, "/v1/cloud-security/activity": { "get": { "operationId": "CloudSecurityController_getActivity_v1", @@ -21318,7 +21469,7 @@ }, "/v1/evidence-forms/{formType}/upload-submission": { "post": { - "description": "Upload a file as an evidence submission in Comp AI. Collect, review, upload, and export structured evidence submissions for compliance tasks and document requirements.", + "description": "Upload a PDF or image file and create a submission for the given form type, bypassing form-specific validation. Accepts session, API key, or service token auth. For API key / service token callers without an explicit user attribution, the.", "operationId": "EvidenceFormsController_uploadSubmission_v1", "parameters": [ { @@ -21357,9 +21508,9 @@ "metadata": { "title": "Upload a file as an evidence submission | Comp AI API", "sidebarTitle": "Upload a file as an evidence submission", - "description": "Upload a file as an evidence submission in Comp AI. Collect, review, upload, and export structured evidence submissions for compliance tasks and document.", + "description": "Upload a PDF or image file and create a submission for the given form type, bypassing form-specific validation. Accepts session, API key, or service token.", "og:title": "Upload a file as an evidence submission | Comp AI API", - "og:description": "Upload a file as an evidence submission in Comp AI. Collect, review, upload, and export structured evidence submissions for compliance tasks and document." + "og:description": "Upload a PDF or image file and create a submission for the given form type, bypassing form-specific validation. Accepts session, API key, or service token." } }, "x-speakeasy-mcp": { @@ -24262,16 +24413,7 @@ }, "department": { "type": "string", - "description": "Member department", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], + "description": "Member department. May be one of the built-in values (none, admin, gov, hr, it, itsm, qms) or a custom department name.", "example": "it" }, "jobTitle": { @@ -24381,17 +24523,9 @@ }, "department": { "type": "string", - "description": "Member department", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Member department. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "isActive": { "type": "boolean", @@ -24459,17 +24593,9 @@ }, "department": { "type": "string", - "description": "Member department", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Member department. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "isActive": { "type": "boolean", @@ -24544,9 +24670,14 @@ }, "fileData": { "type": "string", - "description": "Base64 encoded file data", + "description": "Base64-encoded file contents. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=attachment) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide exactly one of fileData or s3Key.", "example": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" }, + "s3Key": { + "type": "string", + "description": "Key of a file already uploaded via /v1/uploads/presign (purpose=attachment). The server fetches the bytes from storage — no base64 needed. Provide exactly one of fileData or s3Key.", + "example": "org_abc123/uploads/attachment/1700000000000-rbac-matrix.xlsx" + }, "description": { "type": "string", "description": "Description of the attachment", @@ -24561,8 +24692,7 @@ }, "required": [ "fileName", - "fileType", - "fileData" + "fileType" ] }, "EmailPreferencesDto": { @@ -24645,9 +24775,14 @@ }, "fileData": { "type": "string", - "description": "Base64 encoded file data", + "description": "Base64-encoded file contents. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=attachment) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide exactly one of fileData or s3Key.", "example": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" }, + "s3Key": { + "type": "string", + "description": "Key of a file already uploaded via /v1/uploads/presign (purpose=attachment). The server fetches the bytes from storage — no base64 needed. Provide exactly one of fileData or s3Key.", + "example": "org_abc123/uploads/attachment/1700000000000-rbac-matrix.xlsx" + }, "description": { "type": "string", "description": "Description of the attachment", @@ -24677,7 +24812,6 @@ "required": [ "fileName", "fileType", - "fileData", "entityId", "entityType" ] @@ -24726,6 +24860,64 @@ "createdAt" ] }, + "CreateUploadUrlDto": { + "type": "object", + "properties": { + "purpose": { + "type": "string", + "enum": [ + "questionnaire", + "policy_pdf", + "evidence", + "attachment", + "document", + "general" + ], + "description": "What the file is for. Controls where the file is stored and which feature is expected to consume the returned s3Key.", + "example": "questionnaire" + }, + "fileName": { + "type": "string", + "description": "Original filename, used for the stored object name. Non-alphanumeric characters are replaced with underscores.", + "example": "vendor-security-questionnaire.xlsx" + }, + "fileType": { + "type": "string", + "description": "MIME type of the file (e.g. application/pdf, text/csv). Recorded as metadata and passed to the feature endpoint; the PUT itself is content-type agnostic, so the upload never fails on a header mismatch.", + "example": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + } + }, + "required": [ + "purpose", + "fileName", + "fileType" + ] + }, + "UploadUrlResponseDto": { + "type": "object", + "properties": { + "uploadUrl": { + "type": "string", + "description": "Presigned S3 URL. Send the raw file bytes with a plain HTTP PUT to this URL — no Content-Type or auth headers are required (the signature is in the URL). Then call the feature endpoint with the s3Key below.", + "example": "https://bucket.s3.us-east-1.amazonaws.com/org_x/uploads/...?X-Amz-Signature=..." + }, + "s3Key": { + "type": "string", + "description": "The S3 key the file will land at. Pass this to the feature endpoint (e.g. questionnaire upload-and-parse) instead of base64 file data.", + "example": "org_abc/uploads/questionnaire/1735000000-questionnaire.xlsx" + }, + "expiresIn": { + "type": "number", + "description": "Seconds until the presigned URL expires.", + "example": 900 + } + }, + "required": [ + "uploadUrl", + "s3Key", + "expiresIn" + ] + }, "CreateRiskDto": { "type": "object", "properties": { @@ -24759,17 +24951,9 @@ }, "department": { "type": "string", - "description": "Department responsible for the risk", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Department responsible for the risk. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "status": { "type": "string", @@ -24903,17 +25087,9 @@ }, "department": { "type": "string", - "description": "Department responsible for the risk", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Department responsible for the risk. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "status": { "type": "string", @@ -25978,16 +26154,7 @@ }, "department": { "type": "string", - "description": "Department this policy applies to", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], + "description": "Department this policy applies to. May be one of the built-in values (none, admin, gov, hr, it, itsm, qms) or a custom department name.", "example": "it", "nullable": true }, @@ -26236,17 +26403,9 @@ }, "department": { "type": "string", - "description": "Department this policy applies to", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Department this policy applies to. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "isRequiredToSign": { "type": "boolean", @@ -26360,17 +26519,9 @@ }, "department": { "type": "string", - "description": "Department this policy applies to", - "enum": [ - "none", - "admin", - "gov", - "hr", - "it", - "itsm", - "qms" - ], - "example": "it" + "description": "Department this policy applies to. Built-in values: none, admin, gov, hr, it, itsm, qms. Custom department names are also accepted.", + "example": "it", + "maxLength": 64 }, "isRequiredToSign": { "type": "boolean", @@ -26561,63 +26712,6 @@ "type": "object", "properties": {} }, - "CreateUploadUrlDto": { - "type": "object", - "properties": { - "purpose": { - "type": "string", - "enum": [ - "questionnaire", - "policy_pdf", - "evidence", - "attachment", - "general" - ], - "description": "What the file is for. Controls where the file is stored and which feature is expected to consume the returned s3Key.", - "example": "questionnaire" - }, - "fileName": { - "type": "string", - "description": "Original filename, used for the stored object name. Non-alphanumeric characters are replaced with underscores.", - "example": "vendor-security-questionnaire.xlsx" - }, - "fileType": { - "type": "string", - "description": "MIME type of the file (e.g. application/pdf, text/csv). Recorded as metadata and passed to the feature endpoint; the PUT itself is content-type agnostic, so the upload never fails on a header mismatch.", - "example": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - } - }, - "required": [ - "purpose", - "fileName", - "fileType" - ] - }, - "UploadUrlResponseDto": { - "type": "object", - "properties": { - "uploadUrl": { - "type": "string", - "description": "Presigned S3 URL. Send the raw file bytes with a plain HTTP PUT to this URL — no Content-Type or auth headers are required (the signature is in the URL). Then call the feature endpoint with the s3Key below.", - "example": "https://bucket.s3.us-east-1.amazonaws.com/org_x/uploads/...?X-Amz-Signature=..." - }, - "s3Key": { - "type": "string", - "description": "The S3 key the file will land at. Pass this to the feature endpoint (e.g. questionnaire upload-and-parse) instead of base64 file data.", - "example": "org_abc/uploads/questionnaire/1735000000-questionnaire.xlsx" - }, - "expiresIn": { - "type": "number", - "description": "Seconds until the presigned URL expires.", - "example": 900 - } - }, - "required": [ - "uploadUrl", - "s3Key", - "expiresIn" - ] - }, "TaskResponseDto": { "type": "object", "properties": { @@ -27679,7 +27773,39 @@ }, "UploadDocumentDto": { "type": "object", - "properties": {} + "properties": { + "organizationId": { + "type": "string", + "description": "Organization ID that owns the document" + }, + "fileName": { + "type": "string", + "description": "File name", + "example": "rbac-matrix.xlsx" + }, + "fileType": { + "type": "string", + "description": "MIME type of the file", + "example": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + }, + "fileData": { + "type": "string", + "description": "Base64-encoded file contents. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=document) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide exactly one of fileData or s3Key." + }, + "s3Key": { + "type": "string", + "description": "Key of a file already uploaded via /v1/uploads/presign (purpose=document). The server fetches the bytes from storage — no base64 needed. Provide exactly one of fileData or s3Key." + }, + "description": { + "type": "string", + "description": "Optional description" + } + }, + "required": [ + "organizationId", + "fileName", + "fileType" + ] }, "ProcessDocumentsDto": { "type": "object", @@ -28711,7 +28837,11 @@ }, "fileData": { "type": "string", - "description": "Base64 encoded evidence file" + "description": "Base64-encoded evidence file. For the web UI / direct callers. AI/MCP clients should instead upload via /v1/uploads/presign (purpose=evidence) and pass `s3Key` — base64 through an LLM is impractically slow and times out. Provide fileData or s3Key (not both)." + }, + "s3Key": { + "type": "string", + "description": "Key of an evidence file already uploaded via /v1/uploads/presign (purpose=evidence). The server fetches the bytes from storage — no base64 needed. Provide fileData or s3Key (not both)." } } } From 4e167b0f662ff1aed52c85391950a7856237e66d Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Fri, 5 Jun 2026 11:03:09 -0400 Subject: [PATCH 2/3] fix(api): guard presigned-upload reads against oversized files Addresses the cubic review on PR #3042. A presigned S3 PUT cannot enforce a size limit, so an authenticated client could upload an arbitrarily large file and then call a feature endpoint that reads it back via readUploadAsBase64 -> getObjectAsBuffer, loading the whole object into memory (plus ~1.33x for base64) before any size check ran. That is an avoidable OOM/DoS vector shared by attachment, knowledge-base, and questionnaire uploads. - Add getObjectContentLength (a HEAD request) and check the object size BEFORE downloading in readUploadAsBase64; reject anything over a configurable ceiling (default 100MB, matching the feature services' decoded-buffer limit). - Add the missing @MaxLength(134_217_728) + @IsBase64() to UploadDocumentDto's fileData, matching the other migrated upload DTOs (caps the inline base64 path at the validation layer). - Cover both with unit tests: oversized rejected before download, caller maxBytes honored, unknown content-length still proceeds, HEAD failure gives a clear error. Issues identified by cubic. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/api/src/app/s3.ts | 25 +++++++++ .../knowledge-base/dto/upload-document.dto.ts | 6 ++- apps/api/src/uploads/uploads.service.spec.ts | 53 ++++++++++++++++++- apps/api/src/uploads/uploads.service.ts | 43 ++++++++++++++- 4 files changed, 124 insertions(+), 3 deletions(-) diff --git a/apps/api/src/app/s3.ts b/apps/api/src/app/s3.ts index e74ea40ce0..cf0b43350f 100644 --- a/apps/api/src/app/s3.ts +++ b/apps/api/src/app/s3.ts @@ -1,5 +1,6 @@ import { GetObjectCommand, + HeadObjectCommand, PutObjectCommand, S3Client, type GetObjectCommandOutput, @@ -195,3 +196,27 @@ export async function getObjectAsBuffer( const bytes = await response.Body.transformToByteArray(); return Buffer.from(bytes); } + +/** + * Fetch an S3 object's size (in bytes) via a HEAD request, WITHOUT downloading + * the body. Used to reject oversized uploads before loading them into memory — + * `getObjectAsBuffer` would otherwise buffer the entire object (and base64 + * callers expand it ~1.33x on top), so a single huge file could OOM the API. + * + * Returns `undefined` if S3 doesn't report a ContentLength (callers should treat + * that as "size unknown" rather than "zero"). + */ +export async function getObjectContentLength( + bucket: string, + key: string, +): Promise { + if (!s3Client) { + throw new Error('S3 client not configured'); + } + + const response = await s3Client.send( + new HeadObjectCommand({ Bucket: bucket, Key: key }), + ); + + return response.ContentLength; +} diff --git a/apps/api/src/knowledge-base/dto/upload-document.dto.ts b/apps/api/src/knowledge-base/dto/upload-document.dto.ts index 557d2a0d13..d4a042c633 100644 --- a/apps/api/src/knowledge-base/dto/upload-document.dto.ts +++ b/apps/api/src/knowledge-base/dto/upload-document.dto.ts @@ -1,5 +1,5 @@ import { ApiProperty } from '@nestjs/swagger'; -import { IsOptional, IsString } from 'class-validator'; +import { IsBase64, IsOptional, IsString, MaxLength } from 'class-validator'; export class UploadDocumentDto { @ApiProperty({ description: 'Organization ID that owns the document' }) @@ -25,6 +25,10 @@ export class UploadDocumentDto { }) @IsOptional() @IsString() + // ~128MB of base64 ≈ 100MB decoded — caps the inline payload at the validation + // layer (before it is decoded), matching the other migrated upload DTOs. + @MaxLength(134_217_728) + @IsBase64() fileData?: string; // base64 encoded @ApiProperty({ diff --git a/apps/api/src/uploads/uploads.service.spec.ts b/apps/api/src/uploads/uploads.service.spec.ts index 064271003c..25327b2e73 100644 --- a/apps/api/src/uploads/uploads.service.spec.ts +++ b/apps/api/src/uploads/uploads.service.spec.ts @@ -8,12 +8,14 @@ jest.mock('../app/s3', () => ({ s3Client: { send: jest.fn() }, getSignedUrl: jest.fn(async () => 'https://test-bucket.s3.amazonaws.com/signed'), getObjectAsBuffer: jest.fn(), + getObjectContentLength: jest.fn(), })); // eslint-disable-next-line @typescript-eslint/no-require-imports const s3 = require('../app/s3') as { getSignedUrl: jest.Mock; getObjectAsBuffer: jest.Mock; + getObjectContentLength: jest.Mock; }; describe('UploadsService', () => { @@ -83,6 +85,7 @@ describe('UploadsService', () => { describe('readUploadAsBase64', () => { it('fetches the object and returns base64 for a valid org key', async () => { + s3.getObjectContentLength.mockResolvedValueOnce(11); s3.getObjectAsBuffer.mockResolvedValueOnce(Buffer.from('hello world')); const result = await service.readUploadAsBase64( @@ -97,10 +100,58 @@ describe('UploadsService', () => { await expect( service.readUploadAsBase64(orgId, 'other_org/uploads/questionnaire/x.csv'), ).rejects.toThrow(/does not belong to this organization/); + expect(s3.getObjectContentLength).not.toHaveBeenCalled(); expect(s3.getObjectAsBuffer).not.toHaveBeenCalled(); }); - it('throws a clear error when the object is missing in S3', async () => { + it('rejects an oversized object via HEAD, before downloading it', async () => { + // 200MB — over the 100MB default ceiling. + s3.getObjectContentLength.mockResolvedValueOnce(200 * 1024 * 1024); + + await expect( + service.readUploadAsBase64(orgId, `${orgId}/uploads/document/huge.bin`), + ).rejects.toThrow(/maximum allowed size/); + + // The whole point: we must NOT download the body for an oversized file. + expect(s3.getObjectAsBuffer).not.toHaveBeenCalled(); + }); + + it('honors a caller-provided maxBytes ceiling', async () => { + s3.getObjectContentLength.mockResolvedValueOnce(2 * 1024 * 1024); // 2MB + + await expect( + service.readUploadAsBase64( + orgId, + `${orgId}/uploads/document/2mb.bin`, + 1 * 1024 * 1024, // 1MB cap + ), + ).rejects.toThrow(/maximum allowed size/); + expect(s3.getObjectAsBuffer).not.toHaveBeenCalled(); + }); + + it('proceeds when S3 does not report a content length', async () => { + s3.getObjectContentLength.mockResolvedValueOnce(undefined); + s3.getObjectAsBuffer.mockResolvedValueOnce(Buffer.from('abc')); + + const result = await service.readUploadAsBase64( + orgId, + `${orgId}/uploads/document/unknown-size.bin`, + ); + + expect(result).toBe(Buffer.from('abc').toString('base64')); + }); + + it('throws a clear error when the object cannot be stat-ed (HEAD fails)', async () => { + s3.getObjectContentLength.mockRejectedValueOnce(new Error('NoSuchKey')); + + await expect( + service.readUploadAsBase64(orgId, `${orgId}/uploads/questionnaire/missing.csv`), + ).rejects.toThrow(/No file found/); + expect(s3.getObjectAsBuffer).not.toHaveBeenCalled(); + }); + + it('throws a clear error when the object body fails to download', async () => { + s3.getObjectContentLength.mockResolvedValueOnce(100); s3.getObjectAsBuffer.mockRejectedValueOnce(new Error('NoSuchKey')); await expect( diff --git a/apps/api/src/uploads/uploads.service.ts b/apps/api/src/uploads/uploads.service.ts index 518ee993b7..001346070d 100644 --- a/apps/api/src/uploads/uploads.service.ts +++ b/apps/api/src/uploads/uploads.service.ts @@ -1,6 +1,12 @@ import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { PutObjectCommand } from '@aws-sdk/client-s3'; -import { BUCKET_NAME, getObjectAsBuffer, getSignedUrl, s3Client } from '../app/s3'; +import { + BUCKET_NAME, + getObjectAsBuffer, + getObjectContentLength, + getSignedUrl, + s3Client, +} from '../app/s3'; import { CreateUploadUrlDto, UploadUrlResponseDto, @@ -47,6 +53,14 @@ export class UploadsService { * of a leaked URL, long enough for a real upload. */ private static readonly UPLOAD_URL_TTL_SECONDS = 900; + /** + * Default ceiling for files read back from S3 via the presigned flow. A plain + * presigned PUT cannot enforce a size limit, so this is the backstop that + * stops an oversized upload from being loaded into memory. Matches the 100MB + * limit the feature services enforce on the decoded buffer. + */ + static readonly DEFAULT_MAX_UPLOAD_BYTES = 100 * 1024 * 1024; + /** * Generate a presigned S3 PUT URL plus the org-scoped key the file will land * at. The key prefix is always `{organizationId}/uploads/{purpose}/` so files @@ -97,12 +111,39 @@ export class UploadsService { async readUploadAsBase64( organizationId: string, s3Key: string, + maxBytes: number = UploadsService.DEFAULT_MAX_UPLOAD_BYTES, ): Promise { if (!BUCKET_NAME) { throw new BadRequestException('File storage is not configured'); } this.assertKeyBelongsToOrg(organizationId, s3Key); + // Reject oversized uploads via a HEAD request BEFORE downloading and + // base64-encoding the object. A presigned PUT can't cap upload size, so + // without this an authenticated client could PUT a multi-GB file and have + // the API load it fully into memory (buffer + ~1.33x base64) and OOM. + let contentLength: number | undefined; + try { + contentLength = await getObjectContentLength(BUCKET_NAME, s3Key); + } catch (error) { + this.logger.warn( + `Failed to stat uploaded file ${s3Key}: ${ + error instanceof Error ? error.message : 'unknown error' + }`, + ); + throw new BadRequestException( + 'No file found at the given s3Key — upload it via the presigned URL first.', + ); + } + + if (contentLength !== undefined && contentLength > maxBytes) { + throw new BadRequestException( + `File exceeds the maximum allowed size of ${Math.floor( + maxBytes / (1024 * 1024), + )}MB`, + ); + } + try { const buffer = await getObjectAsBuffer(BUCKET_NAME, s3Key); return buffer.toString('base64'); From f07678999e0efe5b74531f6b925ddeb877427c3e Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Fri, 5 Jun 2026 11:10:07 -0400 Subject: [PATCH 3/3] fix(api): cap inline upload base64 at the true 100MB limit (UI-safe) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The base64 max-length on the inline fileData fields was 134_217_728, which is the encoded length of only 96 MiB of data — but the web UI dropzones and the feature services both allow 100 MiB. So a 96-100 MiB file a user can pick in the UI would be rejected with a 400 at validation. Adding that same cap to UploadDocumentDto (the cubic fix) would have introduced this break for knowledge-base uploads, whose dropzone allows 100 MiB. - Add upload-limits.ts as the single source of truth: MAX_UPLOAD_BYTES (100 MiB) and MAX_UPLOAD_BASE64_LENGTH (its base64 length, 139,810,136). - Point all three inline upload DTOs (attachment, knowledge-base document, offboarding evidence) and the presigned-read ceiling at it. Loosening a cap only accepts more (still <= the 100 MiB the services enforce on decode), so no currently-valid upload is newly rejected. - Verified the web UI sends raw, prefix-stripped base64, so the @IsBase64() check accepts existing UI uploads unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/attachments/upload-attachment.dto.ts | 3 ++- .../knowledge-base/dto/upload-document.dto.ts | 8 +++--- .../dto/complete-checklist-item.dto.ts | 3 ++- apps/api/src/uploads/upload-limits.spec.ts | 15 +++++++++++ apps/api/src/uploads/upload-limits.ts | 27 +++++++++++++++++++ apps/api/src/uploads/uploads.service.ts | 7 ++--- 6 files changed, 55 insertions(+), 8 deletions(-) create mode 100644 apps/api/src/uploads/upload-limits.spec.ts create mode 100644 apps/api/src/uploads/upload-limits.ts diff --git a/apps/api/src/attachments/upload-attachment.dto.ts b/apps/api/src/attachments/upload-attachment.dto.ts index 8f8db20a9e..3e38d4ec07 100644 --- a/apps/api/src/attachments/upload-attachment.dto.ts +++ b/apps/api/src/attachments/upload-attachment.dto.ts @@ -8,6 +8,7 @@ import { MaxLength, } from 'class-validator'; import { IsMimeTypeField } from '../utils/mime-type.validator'; +import { MAX_UPLOAD_BASE64_LENGTH } from '../uploads/upload-limits'; export class UploadAttachmentDto { @ApiProperty({ @@ -38,7 +39,7 @@ export class UploadAttachmentDto { @IsOptional() @IsString() @IsNotEmpty() - @MaxLength(134_217_728) + @MaxLength(MAX_UPLOAD_BASE64_LENGTH) @IsBase64() fileData?: string; diff --git a/apps/api/src/knowledge-base/dto/upload-document.dto.ts b/apps/api/src/knowledge-base/dto/upload-document.dto.ts index d4a042c633..08088a79ad 100644 --- a/apps/api/src/knowledge-base/dto/upload-document.dto.ts +++ b/apps/api/src/knowledge-base/dto/upload-document.dto.ts @@ -1,5 +1,6 @@ import { ApiProperty } from '@nestjs/swagger'; import { IsBase64, IsOptional, IsString, MaxLength } from 'class-validator'; +import { MAX_UPLOAD_BASE64_LENGTH } from '../../uploads/upload-limits'; export class UploadDocumentDto { @ApiProperty({ description: 'Organization ID that owns the document' }) @@ -25,9 +26,10 @@ export class UploadDocumentDto { }) @IsOptional() @IsString() - // ~128MB of base64 ≈ 100MB decoded — caps the inline payload at the validation - // layer (before it is decoded), matching the other migrated upload DTOs. - @MaxLength(134_217_728) + // Cap the inline payload at the validation layer (before it is decoded), + // matching the other migrated upload DTOs. The limit is the base64 length of + // the 100 MiB file ceiling — see upload-limits.ts. + @MaxLength(MAX_UPLOAD_BASE64_LENGTH) @IsBase64() fileData?: string; // base64 encoded diff --git a/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts b/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts index ced0b9f064..fa690233a7 100644 --- a/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts +++ b/apps/api/src/offboarding-checklist/dto/complete-checklist-item.dto.ts @@ -1,6 +1,7 @@ import { ApiProperty } from '@nestjs/swagger'; import { IsOptional, IsString, MaxLength, IsBase64 } from 'class-validator'; import { IsMimeTypeField } from '../../utils/mime-type.validator'; +import { MAX_UPLOAD_BASE64_LENGTH } from '../../uploads/upload-limits'; export class CompleteChecklistItemDto { @ApiProperty({ description: 'Optional notes', required: false }) @@ -25,7 +26,7 @@ export class CompleteChecklistItemDto { }) @IsOptional() @IsString() - @MaxLength(134_217_728) + @MaxLength(MAX_UPLOAD_BASE64_LENGTH) @IsBase64() fileData?: string; diff --git a/apps/api/src/uploads/upload-limits.spec.ts b/apps/api/src/uploads/upload-limits.spec.ts new file mode 100644 index 0000000000..83ffe1d4d0 --- /dev/null +++ b/apps/api/src/uploads/upload-limits.spec.ts @@ -0,0 +1,15 @@ +import { MAX_UPLOAD_BASE64_LENGTH, MAX_UPLOAD_BYTES } from './upload-limits'; + +describe('upload-limits', () => { + it('caps decoded uploads at 100 MiB', () => { + expect(MAX_UPLOAD_BYTES).toBe(100 * 1024 * 1024); + }); + + it('allows the base64 of a full 100 MiB file (no false 413 for UI uploads)', () => { + // Regression guard: the previous literal (134_217_728) was the base64 length + // of only 96 MiB, so a 96–100 MiB file the UI/service accept was rejected. + expect(MAX_UPLOAD_BASE64_LENGTH).toBe(139_810_136); + expect(MAX_UPLOAD_BASE64_LENGTH).toBe(Math.ceil(MAX_UPLOAD_BYTES / 3) * 4); + expect(MAX_UPLOAD_BASE64_LENGTH).toBeGreaterThan(134_217_728); + }); +}); diff --git a/apps/api/src/uploads/upload-limits.ts b/apps/api/src/uploads/upload-limits.ts new file mode 100644 index 0000000000..e1372aefd2 --- /dev/null +++ b/apps/api/src/uploads/upload-limits.ts @@ -0,0 +1,27 @@ +/** + * Shared upload size limits for both upload paths: + * - inline base64 `fileData` (web UI / direct callers), capped on the DTO via + * MAX_UPLOAD_BASE64_LENGTH so an oversized payload is rejected at validation + * time (before it is decoded); + * - presigned `s3Key` (AI/MCP clients), capped in UploadsService via a HEAD + * request before the object is downloaded. + * + * Keep these as the single source of truth so the DTO caps, the service caps, + * and the web UI dropzone limits can't drift apart. + */ + +/** Maximum decoded file size accepted by any upload path (100 MiB). */ +export const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; + +/** + * Maximum length of a base64-encoded inline `fileData` field. + * + * Base64 inflates bytes by 4/3 (4 chars per 3 bytes, padded), so this is the + * encoded length of a MAX_UPLOAD_BYTES file: `4 * ceil(bytes / 3)` = 139,810,136. + * + * IMPORTANT: it must be the base64 length of the FULL byte limit, not the byte + * limit itself. The previous literal (134_217_728 = 128 MiB of characters) only + * permitted ~96 MiB of decoded data, so a 96–100 MiB file the UI dropzone and + * the service both allow was wrongly rejected with a 400. + */ +export const MAX_UPLOAD_BASE64_LENGTH = Math.ceil(MAX_UPLOAD_BYTES / 3) * 4; diff --git a/apps/api/src/uploads/uploads.service.ts b/apps/api/src/uploads/uploads.service.ts index 001346070d..1ee857e304 100644 --- a/apps/api/src/uploads/uploads.service.ts +++ b/apps/api/src/uploads/uploads.service.ts @@ -11,6 +11,7 @@ import { CreateUploadUrlDto, UploadUrlResponseDto, } from './dto/create-upload-url.dto'; +import { MAX_UPLOAD_BYTES } from './upload-limits'; /** * ============================================================================ @@ -56,10 +57,10 @@ export class UploadsService { /** * Default ceiling for files read back from S3 via the presigned flow. A plain * presigned PUT cannot enforce a size limit, so this is the backstop that - * stops an oversized upload from being loaded into memory. Matches the 100MB - * limit the feature services enforce on the decoded buffer. + * stops an oversized upload from being loaded into memory. Shares the 100MB + * limit the feature services / DTOs enforce (see upload-limits.ts). */ - static readonly DEFAULT_MAX_UPLOAD_BYTES = 100 * 1024 * 1024; + static readonly DEFAULT_MAX_UPLOAD_BYTES = MAX_UPLOAD_BYTES; /** * Generate a presigned S3 PUT URL plus the org-scoped key the file will land