-
Notifications
You must be signed in to change notification settings - Fork 0
apply claude code fixes #37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f6db436
e1f6d76
06f34c8
c7c1405
be4c1b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,14 @@ | ||
| import { Logger } from '@aws-lambda-powertools/logger'; | ||
| import { DynamoDBClient, GetItemCommand } from '@aws-sdk/client-dynamodb'; | ||
| import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; | ||
| import { marshall, unmarshall } from '@aws-sdk/util-dynamodb'; | ||
| import { formatResponse } from '../utils/api.mjs'; | ||
| import { getCurrentClipStatus } from '../utils/clips.mjs'; | ||
| import { parseSrtFile, timeToSeconds } from '../utils/transcripts.mjs'; | ||
|
|
||
| const logger = new Logger({ serviceName: 'clips' }); | ||
| const ddb = new DynamoDBClient(); | ||
| const s3 = new S3Client(); | ||
|
|
||
| export const handler = async (event) => { | ||
| try { | ||
|
|
@@ -46,12 +49,51 @@ export const handler = async (event) => { | |
| const segments = clip.segments || []; | ||
| const segmentCount = segments.length; | ||
|
|
||
| // Attempt to extract accurate transcript text from the source SRT by matching | ||
| // each segment's time range. Falls back to the AI-stored text if the SRT is | ||
| // unavailable or yields no matching entries. | ||
| let srtEntries = []; | ||
| try { | ||
| const transcriptKey = `${tenantId}/${episodeId}/transcript.srt`; | ||
| const s3Response = await s3.send(new GetObjectCommand({ | ||
| Bucket: process.env.BUCKET_NAME, | ||
| Key: transcriptKey | ||
| })); | ||
| const srtContent = await s3Response.Body.transformToString(); | ||
| srtEntries = parseSrtFile(srtContent); | ||
| } catch (err) { | ||
| logger.warn('Could not load SRT for transcript extraction, falling back to stored text', { | ||
| error: err.message, | ||
| episodeId, | ||
| tenantId | ||
| }); | ||
| } | ||
|
|
||
| const transcript = segments | ||
| .sort((a, b) => (a.order || 0) - (b.order || 0)) | ||
| .map(segment => { | ||
| const speaker = segment.speaker || 'unknown'; | ||
| const speakerLabel = segment.speaker ? `[${segment.speaker}]: ` : ''; | ||
|
|
||
| if (srtEntries.length > 0) { | ||
| const segStart = timeToSeconds(segment.startTime); | ||
| const segEnd = timeToSeconds(segment.endTime); | ||
|
|
||
| const relevantEntries = srtEntries.filter(entry => { | ||
| const entryStart = timeToSeconds(entry.startTime); | ||
| const entryEnd = timeToSeconds(entry.endTime); | ||
| return entryStart < segEnd && entryEnd > segStart; | ||
| }); | ||
|
Comment on lines
+77
to
+85
|
||
|
|
||
| if (relevantEntries.length > 0) { | ||
| // Use the full SRT text | ||
| const text = relevantEntries.map(e => e.text).join(' '); | ||
| return `${speakerLabel}${text}`; | ||
| } | ||
| } | ||
|
|
||
| // Fallback: use what the AI stored | ||
| const text = segment.transcript || ''; | ||
| return `[${speaker}]: ${text}`; | ||
| return `${speakerLabel}${text}`; | ||
|
andmoredev marked this conversation as resolved.
|
||
| }) | ||
| .join('\n\n'); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,7 @@ export const extractSpeakerFromText = (text) => { | |
| export const timeToSeconds = (timeStr) => { | ||
| const [time, ms] = timeStr.split(','); | ||
| const [hours, minutes, seconds] = time.split(':').map(Number); | ||
| return hours * 3600 + minutes * 60 + seconds + parseInt(ms) / 1000; | ||
| return hours * 3600 + minutes * 60 + seconds + (ms ? parseInt(ms) / 1000 : 0); | ||
|
||
| }; | ||
|
|
||
| export const secondsToTime = (totalSeconds) => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -9,14 +9,16 @@ export const timeToSeconds = (timeStr) => { | |||||
| throw new Error('Invalid time string'); | ||||||
| } | ||||||
|
|
||||||
| const parts = timeStr.split(':').map(part => parseInt(part, 10)); | ||||||
| const [timePart, msPart] = timeStr.split(','); | ||||||
| const parts = timePart.split(':').map(Number); | ||||||
| const milliseconds = msPart ? parseInt(msPart) / 1000 : 0; | ||||||
|
||||||
| const milliseconds = msPart ? parseInt(msPart) / 1000 : 0; | |
| const milliseconds = msPart ? parseInt(msPart, 10) / 1000 : 0; |
Copilot
AI
Feb 18, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new millisecond support in timeToSeconds functions is not adequately tested. The existing tests in tests/unit/utils/video-processing.test.js only test the HH:MM:SS and MM:SS formats without milliseconds. Tests should be added to verify parsing of HH:MM:SS,mmm and MM:SS,mmm formats, including edge cases like '00:00:00,000', '01:30:45,500', and '10:00,100'.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot open a new pull request to apply changes based on this feedback
Copilot
AI
Feb 18, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error message states 'Time string must be in HH:MM:SS or MM:SS format' but this is now outdated. The function now supports milliseconds with the format HH:MM:SS,mmm or MM:SS,mmm. The error message should be updated to reflect this: 'Time string must be in HH:MM:SS, MM:SS, HH:MM:SS,mmm, or MM:SS,mmm format'.
| throw new Error('Time string must be in HH:MM:SS or MM:SS format'); | |
| throw new Error('Time string must be in HH:MM:SS, MM:SS, HH:MM:SS,mmm, or MM:SS,mmm format'); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The example timestamps in the prompt now include milliseconds (e.g., "00:14:32,000"), but there's an inconsistency with the broader codebase. The global TimestampSchema in schemas/common.mjs still only accepts HH:MM:SS format without milliseconds (/^\d{2}:\d{2}:\d{2}$/), and tests explicitly verify this. While the create-clips tool schema was updated locally to accept milliseconds, this creates a potential validation mismatch. If segments are stored with millisecond timestamps, they may fail validation in other parts of the system that use the global schema. Consider updating the global TimestampSchema or documenting why some timestamps have milliseconds while others don't.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot open a new pull request to apply changes based on this feedback