Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 72 additions & 7 deletions functions/clips/get-clip.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
import { marshall, unmarshall } from '@aws-sdk/util-dynamodb';
import { formatResponse } from '../utils/api.mjs';
import { getCurrentClipStatus } from '../utils/clips.mjs';
import { parseSrtFile, timeToSeconds } from '../utils/transcripts.mjs';
import { parseSrtFile, timeToSeconds, detectSpeaker } from '../utils/transcripts.mjs';

const logger = new Logger({ serviceName: 'clips' });
const ddb = new DynamoDBClient();
Expand Down Expand Up @@ -55,25 +55,27 @@ export const handler = async (event) => {
let srtEntries = [];
try {
const transcriptKey = `${tenantId}/${episodeId}/transcript.srt`;
logger.info('Loading SRT for transcript extraction', { transcriptKey, bucket: process.env.BUCKET_NAME });
const s3Response = await s3.send(new GetObjectCommand({
Bucket: process.env.BUCKET_NAME,
Key: transcriptKey
}));
const srtContent = await s3Response.Body.transformToString();
srtEntries = parseSrtFile(srtContent);
logger.info('SRT loaded and parsed', { entryCount: srtEntries.length, episodeId });
} catch (err) {
logger.warn('Could not load SRT for transcript extraction, falling back to stored text', {
error: err.message,
errorName: err.name,
episodeId,
tenantId
tenantId,
bucketNameSet: !!process.env.BUCKET_NAME
});
}

const transcript = segments
.sort((a, b) => (a.order || 0) - (b.order || 0))
.map(segment => {
const speakerLabel = segment.speaker ? `[${segment.speaker}]: ` : '';

if (srtEntries.length > 0) {
const segStart = timeToSeconds(segment.startTime);
const segEnd = timeToSeconds(segment.endTime);
Expand All @@ -84,14 +86,77 @@ export const handler = async (event) => {
return entryStart < segEnd && entryEnd > segStart;
});

logger.info('SRT entry match result', {
segmentOrder: segment.order,
startTime: segment.startTime,
endTime: segment.endTime,
matchedEntries: relevantEntries.length
});

if (relevantEntries.length > 0) {
// Use the full SRT text
const text = relevantEntries.map(e => e.text).join(' ');
return `${speakerLabel}${text}`;
// Strip the inline "Speaker: " prefix from each SRT entry, then
// group consecutive entries by speaker into clean labelled blocks.
//
// Speaker labels only appear on the FIRST line of each speaker's
// turn in the SRT — continuation lines have no prefix. To correctly
// attribute unlabelled entries at the start of a segment we scan
// backwards through all entries that precede segStart to find the
// last explicit speaker label. Fall back to segment.speaker if no
// earlier label exists.
let initialSpeaker = segment.speaker || null;
for (const entry of srtEntries) {
if (timeToSeconds(entry.startTime) >= segStart) break;
const { speaker } = detectSpeaker(entry.text);
if (speaker) initialSpeaker = speaker;
}

logger.info('Initial speaker for segment', {
segmentOrder: segment.order,
initialSpeaker,
segmentSpeaker: segment.speaker
});

const blocks = [];
let currentSpeaker = initialSpeaker;
let currentLines = [];

for (const entry of relevantEntries) {
const { speaker, dialogue } = detectSpeaker(entry.text);
const entrySpeaker = speaker || currentSpeaker;

if (entrySpeaker !== currentSpeaker && currentLines.length > 0) {
const label = currentSpeaker ? `[${currentSpeaker}]: ` : '';
blocks.push(`${label}${currentLines.join(' ')}`);
currentLines = [];
}

currentSpeaker = entrySpeaker;
if (dialogue.trim()) currentLines.push(dialogue);

Copilot AI Feb 18, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When an SRT entry contains only a speaker label with no dialogue (e.g., "Speaker:" with just whitespace after), the detectSpeaker function returns the entire text as dialogue with speaker set to null. This causes the speaker label itself to be included as dialogue text in the transcript output. Consider checking if the dialogue contains only the speaker pattern before adding it, or handle this edge case explicitly.

Suggested change
if (dialogue.trim()) currentLines.push(dialogue);
const trimmedDialogue = (dialogue || '').trim();
// Skip empty dialogue lines.
if (!trimmedDialogue) {
continue;
}
// If detectSpeaker did not find a speaker, but the dialogue itself
// looks like just a "Speaker:" label with no content, skip it so
// that the label is not included as transcript text.
const speakerLabelOnlyPattern = /^\s*[^:]+:\s*$/;
if (!speaker && speakerLabelOnlyPattern.test(trimmedDialogue)) {
continue;
}
currentLines.push(trimmedDialogue);

Copilot uses AI. Check for mistakes.

logger.info('Entry Speaker', entrySpeaker);
}

if (currentLines.length > 0) {
const label = currentSpeaker ? `[${currentSpeaker}]: ` : '';
blocks.push(`${label}${currentLines.join(' ')}`);
}

logger.info('Transcript built from SRT', {
segmentOrder: segment.order,
blockCount: blocks.length ,
speaker: currentSpeaker
});
return blocks.join('\n\n');
}
}

// Fallback: use what the AI stored
logger.info('Using AI-stored transcript fallback', {
segmentOrder: segment.order,
segmentSpeaker: segment.speaker ?? 'no speaker',
reason: srtEntries.length === 0 ? 'no-srt-entries' : 'no-matching-entries'
});
const speakerLabel = segment.speaker ? `[${segment.speaker}]: ` : '';
const text = segment.transcript || '';
return `${speakerLabel}${text}`;
})
Expand Down
Loading