Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions backend/app/api/routes/stt_evaluations/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from app.crud.file import get_files_by_ids
from app.crud.language import get_language_by_id
from app.crud.stt_evaluations import (
get_samples_by_dataset_id,
get_stt_dataset_by_id,
list_stt_datasets,
get_samples_by_dataset_id,
)
from app.models.stt_evaluation import (
STTDatasetCreate,
Expand Down Expand Up @@ -168,15 +168,10 @@ def get_dataset(
samples = []
for s in sample_records:
signed_url = None
if include_signed_url and storage and s.file_id in file_map:
try:
signed_url = storage.get_signed_url(
file_map.get(s.file_id).object_store_url
)
except Exception as e:
logger.warning(
f"[get_dataset] Failed to generate signed URL for file_id {s.file_id}: {e}"
)
if storage and s.file_id in file_map:
signed_url = storage.get_signed_url(
file_map[s.file_id].object_store_url
)
Comment on lines +171 to +174
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Missing error handling for storage.get_signed_url() will cause request failures.

Same issue as in backend/app/crud/stt_evaluations/result.py: the get_signed_url method raises CloudStorageError on AWS failures. Without try/except, any URL generation failure will return a 500 error to the client instead of gracefully setting signed_url = None for that sample.

🛡️ Proposed fix to restore defensive error handling
         samples = []
         for s in sample_records:
             signed_url = None
-            if storage and s.file_id in file_map:
-                signed_url = storage.get_signed_url(
-                    file_map[s.file_id].object_store_url
-                )
+            if storage and s.file_id in file_map:
+                try:
+                    signed_url = storage.get_signed_url(
+                        file_map[s.file_id].object_store_url
+                    )
+                except Exception:
+                    logger.warning(
+                        f"[get_dataset] Failed to generate signed URL for file {s.file_id}"
+                    )
+                    signed_url = None
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/api/routes/stt_evaluations/dataset.py` around lines 171 - 174,
Wrap the call to storage.get_signed_url(...) in a try/except that catches
CloudStorageError and falls back to signed_url = None so a failure to generate a
signed URL doesn't crash the request; update the block where storage and
s.file_id are checked (variables: storage, s, file_map, signed_url) to mirror
the defensive pattern used in backend/app/crud/stt_evaluations/result.py,
logging or swallowing the CloudStorageError as appropriate and ensuring
signed_url is set to None on error.


samples.append(
STTSamplePublic(
Expand Down
11 changes: 11 additions & 0 deletions backend/app/api/routes/tts_evaluations/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.celery.utils import start_low_priority_job
from app.core.cloud import get_cloud_storage
from app.crud.tts_evaluations import (
create_tts_run,
get_results_by_run_id,
Expand Down Expand Up @@ -169,6 +170,9 @@ def get_tts_evaluation_run(
auth_context: AuthContextDep,
run_id: int,
include_results: bool = Query(True, description="Include results in response"),
include_signed_url: bool = Query(
False, description="Include signed URLs for generated audio files"
),
) -> APIResponse[TTSEvaluationRunWithResults]:
"""Get a TTS evaluation run with results."""
run = get_tts_run_by_id(
Expand All @@ -185,11 +189,18 @@ def get_tts_evaluation_run(
results_total = 0

if include_results:
storage = None
if include_signed_url:
storage = get_cloud_storage(
session=session, project_id=auth_context.project_.id
)

results, results_total = get_results_by_run_id(
session=session,
run_id=run_id,
org_id=auth_context.organization_.id,
project_id=auth_context.project_.id,
storage=storage,
)

return APIResponse.success_response(
Expand Down
17 changes: 15 additions & 2 deletions backend/app/api/routes/tts_evaluations/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import logging

from fastapi import APIRouter, Body, Depends, HTTPException
from fastapi import APIRouter, Body, Depends, HTTPException, Query

from app.api.deps import AuthContextDep, SessionDep
from app.api.permissions import Permission, require_permission
from app.core.cloud import get_cloud_storage
from app.crud.tts_evaluations import (
get_tts_result_by_id,
update_tts_human_feedback,
Expand Down Expand Up @@ -92,6 +93,9 @@ def get_result(
session: SessionDep,
auth_context: AuthContextDep,
result_id: int,
include_signed_url: bool = Query(
False, description="Include signed URL for generated audio file"
),
) -> APIResponse[TTSResultPublic]:
"""Get a TTS result by ID."""
result = get_tts_result_by_id(
Expand All @@ -104,4 +108,13 @@ def get_result(
if not result:
raise HTTPException(status_code=404, detail="Result not found")

return APIResponse.success_response(data=TTSResultPublic.from_model(result))
signed_url = None
if include_signed_url and result.object_store_url is not None:
storage = get_cloud_storage(
session=session, project_id=auth_context.project_.id
)
signed_url = storage.get_signed_url(result.object_store_url)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if result.object_store url is None then it would raise some issue . so add some condition if result.object_store_url is not None. if its not an issue then u can ignore it


return APIResponse.success_response(
data=TTSResultPublic.from_model(result, signed_url=signed_url)
Comment on lines +111 to +119
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Missing error handling for cloud storage operations will cause 500 errors.

Multiple unhandled exception sources:

  1. get_cloud_storage() can raise ValueError if project not found, or re-raise initialization errors (see backend/app/core/cloud/storage.py:267-284).
  2. storage.get_signed_url() raises CloudStorageError on AWS failures.
  3. result.object_store_url may be None for pending/failed results.

All of these will return 500 errors instead of gracefully returning signed_url = None.

🛡️ Proposed fix with comprehensive error handling
     signed_url = None
     if include_signed_url:
-        storage = get_cloud_storage(
-            session=session, project_id=auth_context.project_.id
-        )
-        signed_url = storage.get_signed_url(result.object_store_url)
-
-    return APIResponse.success_response(
-        data=TTSResultPublic.from_model(result, signed_url=signed_url)
-    )
+        if result.object_store_url:
+            try:
+                storage = get_cloud_storage(
+                    session=session, project_id=auth_context.project_.id
+                )
+                signed_url = storage.get_signed_url(result.object_store_url)
+            except Exception:
+                logger.warning(
+                    f"[get_result] Failed to generate signed URL for result {result_id}"
+                )
+                signed_url = None
+
+    return APIResponse.success_response(
+        data=TTSResultPublic.from_model(result, signed_url=signed_url)
+    )
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/api/routes/tts_evaluations/result.py` around lines 111 - 119,
When include_signed_url is true, guard the cloud storage flow to avoid raising
unhandled exceptions: check result.object_store_url for truthiness before
calling get_cloud_storage or storage.get_signed_url; wrap calls to
get_cloud_storage(...) and storage.get_signed_url(...) in try/except that
catches ValueError and CloudStorageError (and any initialization errors), log
the error, and leave signed_url = None on failure; then pass signed_url
(possibly None) into TTSResultPublic.from_model so the endpoint returns a
graceful success response instead of a 500.

)
17 changes: 5 additions & 12 deletions backend/app/crud/stt_evaluations/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

import logging

from sqlmodel import Session, select, func
from sqlmodel import Session, func, select

from app.core.cloud.storage import CloudStorage
from app.core.exception_handlers import HTTPException
from app.core.util import now
from app.models.file import File
from app.core.cloud.storage import CloudStorage
from app.models.stt_evaluation import (
STTResult,
STTResultWithSample,
STTSample,
STTSamplePublic,
STTResultWithSample,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -103,14 +103,7 @@ def get_results_by_run_id(
# Convert to response models
results = []
for result, sample, file in rows:
signed_url = None
if storage:
try:
signed_url = storage.get_signed_url(file.object_store_url)
except Exception as e:
logger.warning(
f"[get_results_by_run_id] Failed to generate signed URL: {e}"
)
signed_url = storage.get_signed_url(file.object_store_url) if storage else None
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Missing error handling for storage.get_signed_url() will cause request failures.

The get_signed_url method raises CloudStorageError on AWS failures (see backend/app/core/cloud/storage.py:241-247). Without try/except, a single URL generation failure will crash the entire request with a 500 error instead of gracefully returning None for that sample's signed_url.

This contradicts the PR description's claim of "improved error handling...to gracefully handle failures without raising exceptions" and breaks the established codebase pattern (see backend/app/services/stt_evaluations/helpers.py:27-35 and backend/app/crud/stt_evaluations/batch.py:96-106).

🛡️ Proposed fix to restore defensive error handling
     results = []
     for result, sample, file in rows:
-        signed_url = storage.get_signed_url(file.object_store_url) if storage else None
+        signed_url = None
+        if storage:
+            try:
+                signed_url = storage.get_signed_url(file.object_store_url)
+            except Exception:
+                logger.warning(
+                    f"[get_results_by_run_id] Failed to generate signed URL for file {file.id}"
+                )
+                signed_url = None
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/crud/stt_evaluations/result.py` at line 106, The signed_url
assignment using storage.get_signed_url may raise CloudStorageError and must be
wrapped in a try/except: catch backend.app.core.cloud.storage.CloudStorageError
(or the imported alias) around the call in result.py where signed_url =
storage.get_signed_url(file.object_store_url), log the error (or debug) and set
signed_url = None on failure so a single URL failure doesn't raise a 500; mirror
the defensive pattern used in services/stt_evaluations/helpers.py and
crud/stt_evaluations/batch.py.


sample_public = STTSamplePublic(
id=sample.id,
Expand Down Expand Up @@ -226,4 +219,4 @@ def count_results_by_status(

rows = session.exec(statement).all()

return {status: count for status, count in rows}
return dict(rows)
8 changes: 4 additions & 4 deletions backend/app/crud/tts_evaluations/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

from sqlmodel import Session

from app.models.batch_job import BatchJobType

from app.celery.utils import start_low_priority_job
from app.core.batch import GeminiBatchProvider
from app.crud.evaluations.cron_utils import (
Expand All @@ -29,7 +27,7 @@
)
from app.crud.tts_evaluations.run import update_tts_run
from app.models import EvaluationRun
from app.models.batch_job import BatchJob
from app.models.batch_job import BatchJob, BatchJobType
from app.models.job import JobStatus
from app.models.stt_evaluation import EvaluationType

Expand Down Expand Up @@ -151,7 +149,9 @@ async def _on_batch_succeeded(batch_job: BatchJob, provider_name: str) -> bool:
return True

async def _on_already_succeeded(batch_job: BatchJob, provider_name: str) -> bool:
pending = get_pending_results_for_run(session, run.id, provider_name)
pending = get_pending_results_for_run(
session=session, run_id=run.id, provider=provider_name
)
if pending:
logger.info(
f"{log_prefix} Dispatching reprocessing for "
Expand Down
12 changes: 10 additions & 2 deletions backend/app/crud/tts_evaluations/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from sqlmodel import Session, func, select

from app.core.cloud.storage import CloudStorage
from app.core.exception_handlers import HTTPException
from app.core.util import now
from app.models.job import JobStatus
Expand Down Expand Up @@ -104,6 +105,7 @@ def get_results_by_run_id(
run_id: int,
org_id: int,
project_id: int,
storage: CloudStorage | None = None,
) -> tuple[list[TTSResultPublic], int]:
"""Get all results for an evaluation run.

Expand All @@ -112,6 +114,7 @@ def get_results_by_run_id(
run_id: Run ID
org_id: Organization ID
project_id: Project ID
storage: Optional cloud storage instance for generating signed URLs

Returns:
tuple[list[TTSResultPublic], int]: Results and total count
Expand All @@ -127,7 +130,12 @@ def get_results_by_run_id(
rows = session.exec(statement).all()
total = len(rows)

results = [TTSResultPublic.from_model(result) for result in rows]
results = []
for result in rows:
signed_url = (
storage.get_signed_url(result.object_store_url) if storage else None
)
results.append(TTSResultPublic.from_model(result, signed_url=signed_url))
Comment on lines +133 to +138
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Missing error handling and null check for signed URL generation.

Two issues:

  1. storage.get_signed_url() raises CloudStorageError on AWS failures, which will crash the entire request.
  2. result.object_store_url may be None for pending/failed TTS results that haven't generated audio yet. Calling get_signed_url(None) will fail.
🛡️ Proposed fix with error handling and null check
     results = []
     for result in rows:
-        signed_url = (
-            storage.get_signed_url(result.object_store_url) if storage else None
-        )
+        signed_url = None
+        if storage and result.object_store_url:
+            try:
+                signed_url = storage.get_signed_url(result.object_store_url)
+            except Exception:
+                logger.warning(
+                    f"[get_results_by_run_id] Failed to generate signed URL for result {result.id}"
+                )
+                signed_url = None
         results.append(TTSResultPublic.from_model(result, signed_url=signed_url))
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/crud/tts_evaluations/result.py` around lines 133 - 138, In the
loop that builds results (where results = [] and you call
TTSResultPublic.from_model), guard against None and storage failures by only
calling storage.get_signed_url when result.object_store_url is not None, and
wrap that call in a try/except that catches CloudStorageError (or the storage
client's specific exception) to set signed_url = None on failure; also consider
logging the exception for debugging before continuing so a single failed
signed-URL generation won’t crash the whole request.


return results, total

Expand Down Expand Up @@ -294,4 +302,4 @@ def count_results_by_status(

rows = session.exec(statement).all()

return {status: count for status, count in rows}
return dict(rows)
9 changes: 8 additions & 1 deletion backend/app/models/tts_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ class TTSResultPublic(BaseModel):
id: int
sample_text: str
object_store_url: str | None
signed_url: str | None = None
duration_seconds: float | None = None
size_bytes: int | None = None
provider: str
Expand All @@ -224,12 +225,18 @@ class TTSResultPublic(BaseModel):
updated_at: datetime

@classmethod
def from_model(cls, result: TTSResult) -> TTSResultPublic:
def from_model(
cls,
result: TTSResult,
*,
signed_url: str | None = None,
) -> TTSResultPublic:
"""Create from a TTSResult model instance."""
return cls(
id=result.id,
sample_text=result.sample_text,
object_store_url=result.object_store_url,
signed_url=signed_url,
duration_seconds=(result.metadata_ or {}).get("duration_seconds"),
size_bytes=(result.metadata_ or {}).get("size_bytes"),
provider=result.provider,
Expand Down
22 changes: 9 additions & 13 deletions backend/app/tests/api/routes/test_stt_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import pytest
from unittest.mock import MagicMock, patch

import pytest
from fastapi.testclient import TestClient
from sqlmodel import Session

from app.models import EvaluationDataset, EvaluationRun, File, FileType
from app.models.stt_evaluation import STTSample, STTResult, EvaluationType
from app.core.util import now
from app.crud.language import get_language_by_locale
from app.models import EvaluationDataset, EvaluationRun, File, FileType
from app.models.stt_evaluation import EvaluationType, STTResult, STTSample
Comment on lines +3 to +10
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Keep the failure-path mocks exception-based so these tests cover the real contract.

Line 744 and Line 1077 now return None, but get_signed_url() fails by raising CloudStorageError, not by returning None. This change stops both tests from exercising the actual graceful-error-handling path, so they will no longer catch the 500 regression in the route/CRUD code.

🧪 Suggested fix
+from app.core.cloud.storage import CloudStorageError
 from app.core.util import now
@@
-        mock_storage.get_signed_url.return_value = None
+        mock_storage.get_signed_url.side_effect = CloudStorageError("S3 error")
@@
-        mock_storage.get_signed_url.return_value = None
+        mock_storage.get_signed_url.side_effect = CloudStorageError("S3 error")

Also applies to: 744-745, 1077-1078

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/tests/api/routes/test_stt_evaluation.py` around lines 3 - 10, The
mocks in test_stt_evaluation.py are returning None for the failure paths but the
real get_signed_url() raises CloudStorageError on failure; update the
failing-path mocks to raise CloudStorageError instead of returning None so the
tests exercise the actual error-handling path; locate the mock/patch of
get_signed_url() in the tests and replace the return value with raising
CloudStorageError (import or reference CloudStorageError) so routes/CRUD code
hit the same exception flow as production.

from app.tests.utils.auth import TestAuthContext
from app.core.util import now


# Helper functions
Expand Down Expand Up @@ -741,9 +741,7 @@ def test_get_stt_dataset_signed_url_failure(
"""Test getting an STT dataset when signed URL generation fails."""
# Mock cloud storage to raise an exception
mock_storage = MagicMock()
mock_storage.get_signed_url.side_effect = Exception(
"Failed to generate signed URL"
)
mock_storage.get_signed_url.return_value = None
mock_get_cloud_storage.return_value = mock_storage

dataset = create_test_stt_dataset(
Expand Down Expand Up @@ -1076,9 +1074,7 @@ def test_get_stt_run_signed_url_failure(
"""Test getting an STT run when signed URL generation fails."""
# Mock cloud storage to raise an exception
mock_storage = MagicMock()
mock_storage.get_signed_url.side_effect = Exception(
"Failed to generate signed URL"
)
mock_storage.get_signed_url.return_value = None
mock_get_cloud_storage.return_value = mock_storage

# Create dataset, sample, run, and result
Expand Down Expand Up @@ -1278,7 +1274,7 @@ def test_list_audio_files_with_signed_urls(
mock_get_cloud_storage.return_value = mock_storage

# Create test file
file = create_test_file(
_file = create_test_file(
db=db,
organization_id=user_api_key.organization_id,
project_id=user_api_key.project_id,
Expand Down Expand Up @@ -1314,7 +1310,7 @@ def test_list_audio_files_without_signed_urls(
) -> None:
"""Test listing audio files without signed URLs."""
# Create test file
file = create_test_file(
_file = create_test_file(
db=db,
organization_id=user_api_key.organization_id,
project_id=user_api_key.project_id,
Expand Down Expand Up @@ -1345,7 +1341,7 @@ def test_list_audio_files_project_isolation(
) -> None:
"""Test that audio files are isolated by project."""
# Create file in user's project
user_file = create_test_file(
_user_file = create_test_file(
db=db,
organization_id=user_api_key.organization_id,
project_id=user_api_key.project_id,
Expand Down
Loading