From f6db436a6e8ac8142393f6b7e423478098c0058d Mon Sep 17 00:00:00 2001 From: Andres Moreno Date: Wed, 18 Feb 2026 12:08:56 -0700 Subject: [PATCH 1/4] apply claude code fixes --- docs/architecture-diagrams.md | 68 ++++++++++++++++++++++++++++ functions/agents/clip-detector.mjs | 16 ++++++- functions/clips/get-clip.mjs | 46 ++++++++++++++++++- functions/utils/transcripts.mjs | 2 +- functions/utils/video-processing.mjs | 8 ++-- 5 files changed, 132 insertions(+), 8 deletions(-) diff --git a/docs/architecture-diagrams.md b/docs/architecture-diagrams.md index 1714160..974f576 100644 --- a/docs/architecture-diagrams.md +++ b/docs/architecture-diagrams.md @@ -1,5 +1,73 @@ # Architecture Diagrams +## Simplified Architecture (Presentation View) + +```mermaid +graph LR + subgraph Client + React[React App
TypeScript + Vite] + end + + subgraph Auth + Cognito[Cognito
User Pool] + end + + subgraph API + Gateway[API Gateway
REST API] + Auth[Lambda
Authorizer] + end + + subgraph Core + Lambda[Lambda Functions
Episodes, Clips, Teams] + end + + subgraph AI + Bedrock[AWS Bedrock
Nova Pro Agent] + end + + subgraph Processing + MediaConvert[MediaConvert
Video Chunks] + StepFn[Step Functions
Clip Workflow] + end + + subgraph Storage + DynamoDB[(DynamoDB
Single Table)] + S3[(S3
Videos/Transcripts)] + end + + subgraph Events + EventBridge[EventBridge
Event Bus] + Momento[Momento
Real-time] + end + + React -->|HTTPS| Gateway + React -.->|Auth| Cognito + Gateway --> Auth + Auth --> Lambda + Lambda --> DynamoDB + Lambda --> S3 + S3 --> EventBridge + EventBridge --> Bedrock + EventBridge --> MediaConvert + EventBridge --> StepFn + Bedrock --> DynamoDB + StepFn --> S3 + EventBridge --> Momento + Momento -.->|Subscribe| React + + style React fill:#61dafb,stroke:#333,stroke-width:2px + style Cognito fill:#ff9900,stroke:#333,stroke-width:2px + style Gateway fill:#ff9900,stroke:#333,stroke-width:2px + style Lambda fill:#ff9900,stroke:#333,stroke-width:2px + style Bedrock fill:#ff9900,stroke:#333,stroke-width:2px + style DynamoDB fill:#4053d6,stroke:#333,stroke-width:2px + style S3 fill:#569a31,stroke:#333,stroke-width:2px + style EventBridge fill:#ff4081,stroke:#333,stroke-width:2px + style Momento fill:#00d4ff,stroke:#333,stroke-width:2px + style MediaConvert fill:#ff9900,stroke:#333,stroke-width:2px + style StepFn fill:#ff9900,stroke:#333,stroke-width:2px +``` + ## 1. Backend Architecture Diagram ```mermaid diff --git a/functions/agents/clip-detector.mjs b/functions/agents/clip-detector.mjs index fbb56f0..4443cf6 100644 --- a/functions/agents/clip-detector.mjs +++ b/functions/agents/clip-detector.mjs @@ -197,8 +197,20 @@ Each clip you pass to **createClip** must contain the schema: { "segments": [ - { "startTime": "00:14:32", "endTime": "00:15:18", "speaker": "Allen", "order": 1, "transcript": "Did you know agents could do this?" } - { "startTime": "00:41:01", "endTime": "00:41:05", "speaker": "Andres": "order": 2, "transcript": "No I didn't, but now we can use it" } + { + "startTime": "00:14:32,000", + "endTime": "00:15:18,500", + "speaker": "Allen", + "order": 1, + "transcript": "Did you know agents could do this? I was blown away the first time I saw it work end-to-end. You basically hand it a tool and it figures out the rest — no scaffolding, no hand-holding. It just goes. And the crazy part is it gets it right most of the time." + }, + { + "startTime": "00:41:01,000", + "endTime": "00:41:05,200", + "speaker": "Andres", + "order": 2, + "transcript": "No I didn't, but now we can use it in production." + } ], "title": "Why we let our AI agent go rogue (on purpose)", "summary": "Allen and Andres debate what happens when you remove safety guardrails from an agent and whether chaos teaches more than control.", diff --git a/functions/clips/get-clip.mjs b/functions/clips/get-clip.mjs index c306098..685d1b9 100644 --- a/functions/clips/get-clip.mjs +++ b/functions/clips/get-clip.mjs @@ -1,11 +1,14 @@ import { Logger } from '@aws-lambda-powertools/logger'; import { DynamoDBClient, GetItemCommand } from '@aws-sdk/client-dynamodb'; +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; import { marshall, unmarshall } from '@aws-sdk/util-dynamodb'; import { formatResponse } from '../utils/api.mjs'; import { getCurrentClipStatus } from '../utils/clips.mjs'; +import { parseSrtFile, timeToSeconds } from '../utils/transcripts.mjs'; const logger = new Logger({ serviceName: 'clips' }); const ddb = new DynamoDBClient(); +const s3 = new S3Client(); export const handler = async (event) => { try { @@ -46,12 +49,51 @@ export const handler = async (event) => { const segments = clip.segments || []; const segmentCount = segments.length; + // Attempt to extract accurate transcript text from the source SRT by matching + // each segment's time range. Falls back to the AI-stored text if the SRT is + // unavailable or yields no matching entries. + let srtEntries = []; + try { + const transcriptKey = `${tenantId}/${episodeId}/transcript.srt`; + const s3Response = await s3.send(new GetObjectCommand({ + Bucket: process.env.BUCKET_NAME, + Key: transcriptKey + })); + const srtContent = await s3Response.Body.transformToString(); + srtEntries = parseSrtFile(srtContent); + } catch (err) { + logger.warn('Could not load SRT for transcript extraction, falling back to stored text', { + error: err.message, + episodeId, + tenantId + }); + } + const transcript = segments .sort((a, b) => (a.order || 0) - (b.order || 0)) .map(segment => { - const speaker = segment.speaker || 'unknown'; + const speakerLabel = segment.speaker ? `[${segment.speaker}]: ` : ''; + + if (srtEntries.length > 0) { + const segStart = timeToSeconds(segment.startTime); + const segEnd = timeToSeconds(segment.endTime); + + const relevantEntries = srtEntries.filter(entry => { + const entryStart = timeToSeconds(entry.startTime); + const entryEnd = timeToSeconds(entry.endTime); + return entryStart < segEnd && entryEnd > segStart; + }); + + if (relevantEntries.length > 0) { + // Use the full SRT text (preserves per-entry speaker labels where present) + const text = relevantEntries.map(e => e.text).join(' '); + return text; + } + } + + // Fallback: use what the AI stored const text = segment.transcript || ''; - return `[${speaker}]: ${text}`; + return `${speakerLabel}${text}`; }) .join('\n\n'); diff --git a/functions/utils/transcripts.mjs b/functions/utils/transcripts.mjs index 1c81064..0f7f975 100644 --- a/functions/utils/transcripts.mjs +++ b/functions/utils/transcripts.mjs @@ -70,7 +70,7 @@ export const extractSpeakerFromText = (text) => { export const timeToSeconds = (timeStr) => { const [time, ms] = timeStr.split(','); const [hours, minutes, seconds] = time.split(':').map(Number); - return hours * 3600 + minutes * 60 + seconds + parseInt(ms) / 1000; + return hours * 3600 + minutes * 60 + seconds + (ms ? parseInt(ms) / 1000 : 0); }; export const secondsToTime = (totalSeconds) => { diff --git a/functions/utils/video-processing.mjs b/functions/utils/video-processing.mjs index 3438ad6..2862052 100644 --- a/functions/utils/video-processing.mjs +++ b/functions/utils/video-processing.mjs @@ -9,14 +9,16 @@ export const timeToSeconds = (timeStr) => { throw new Error('Invalid time string'); } - const parts = timeStr.split(':').map(part => parseInt(part, 10)); + const [timePart, msPart] = timeStr.split(','); + const parts = timePart.split(':').map(Number); + const milliseconds = msPart ? parseInt(msPart) / 1000 : 0; if (parts.length === 2) { const [minutes, seconds] = parts; - return minutes * 60 + seconds; + return minutes * 60 + seconds + milliseconds; } else if (parts.length === 3) { const [hours, minutes, seconds] = parts; - return hours * 3600 + minutes * 60 + seconds; + return hours * 3600 + minutes * 60 + seconds + milliseconds; } else { throw new Error('Time string must be in HH:MM:SS or MM:SS format'); } From e1f6d762956f7378bdda0183ac0d0c1159d9e8ea Mon Sep 17 00:00:00 2001 From: Andres Moreno Date: Wed, 18 Feb 2026 12:20:25 -0700 Subject: [PATCH 2/4] let's see --- template.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/template.yaml b/template.yaml index a4dc086..88fd1db 100644 --- a/template.yaml +++ b/template.yaml @@ -929,9 +929,14 @@ Resources: Action: - dynamodb:GetItem Resource: !GetAtt StreamPostProcessingTable.Arn + - Effect: Allow + Action: + - s3:GetObject + Resource: !Sub arn:${AWS::Partition}:s3:::${TranscriptBucket}/* Environment: Variables: TABLE_NAME: !Ref StreamPostProcessingTable + BUCKET_NAME: !Ref TranscriptBucket Events: Get: Type: Api From 06f34c8b34129471e8989669e2d0825185f968cc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 19:44:58 +0000 Subject: [PATCH 3/4] Initial plan From c7c1405acf2b17dbd1fb0d2b508597e0847e131e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Feb 2026 19:47:56 +0000 Subject: [PATCH 4/4] Fix: Always prepend speaker label to SRT transcript text Co-authored-by: andmoredev <33256364+andmoredev@users.noreply.github.com> --- functions/clips/get-clip.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/functions/clips/get-clip.mjs b/functions/clips/get-clip.mjs index 685d1b9..6cd79f5 100644 --- a/functions/clips/get-clip.mjs +++ b/functions/clips/get-clip.mjs @@ -85,9 +85,9 @@ export const handler = async (event) => { }); if (relevantEntries.length > 0) { - // Use the full SRT text (preserves per-entry speaker labels where present) + // Use the full SRT text const text = relevantEntries.map(e => e.text).join(' '); - return text; + return `${speakerLabel}${text}`; } }