Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""alter doc transform table for celery

Revision ID: eed36ae3c79a
Revises: ecda6b144627
Create Date: 2025-11-12 20:08:39.774862

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "eed36ae3c79a"
down_revision = "ecda6b144627"
branch_labels = None
depends_on = None


def upgrade():
op.add_column(
"doc_transformation_job",
sa.Column("task_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
)
op.add_column(
"doc_transformation_job",
sa.Column("trace_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
)
op.alter_column(
"doc_transformation_job", "created_at", new_column_name="inserted_at"
)


def downgrade():
op.alter_column(
"doc_transformation_job", "inserted_at", new_column_name="created_at"
)
op.drop_column("doc_transformation_job", "trace_id")
op.drop_column("doc_transformation_job", "task_id")
2 changes: 1 addition & 1 deletion backend/app/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
assistants,
collections,
config,
documents,
doc_transformation_job,
documents,
login,
llm,
organization,
Expand Down
82 changes: 67 additions & 15 deletions backend/app/api/routes/doc_transformation_job.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,97 @@
from uuid import UUID
import logging

from fastapi import APIRouter, HTTPException, Query, Path as FastPath
from fastapi import APIRouter, HTTPException, Query, Path

from app.api.deps import CurrentUserOrgProject, SessionDep
from app.crud.doc_transformation_job import DocTransformationJobCrud
from app.models import DocTransformationJob, DocTransformationJobs
from app.crud import DocTransformationJobCrud, DocumentCrud
from app.models import (
DocTransformationJobPublic,
DocTransformationJobsPublic,
TransformedDocumentPublic,
)
from app.utils import APIResponse
from app.services.documents.helpers import build_job_schema, build_job_schemas
from app.core.cloud import get_cloud_storage


router = APIRouter(prefix="/documents/transformations", tags=["doc_transformation_job"])
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/documents/transformation", tags=["documents"])


@router.get(
"/{job_id}",
description="Get the status and details of a document transformation job.",
response_model=APIResponse[DocTransformationJob],
response_model=APIResponse[DocTransformationJobPublic],
)
def get_transformation_job(
session: SessionDep,
current_user: CurrentUserOrgProject,
job_id: UUID = FastPath(description="Transformation job ID"),
job_id: UUID = Path(..., description="Transformation job ID"),
include_url: bool = Query(
False, description="Include a signed URL for the transformed document"
),
):
crud = DocTransformationJobCrud(session, current_user.project_id)
job = crud.read_one(job_id)
return APIResponse.success_response(job)
job_crud = DocTransformationJobCrud(session, current_user.project_id)
doc_crud = DocumentCrud(session, current_user.project_id)

job = job_crud.read_one(job_id)
storage = (
get_cloud_storage(session=session, project_id=current_user.project_id)
if include_url
else None
)

job_schema = build_job_schema(
job=job,
doc_crud=doc_crud,
include_url=include_url,
storage=storage,
)
return APIResponse.success_response(job_schema)


@router.get(
"/",
description="Get the status and details of multiple document transformation jobs by IDs.",
response_model=APIResponse[DocTransformationJobs],
response_model=APIResponse[DocTransformationJobsPublic],
)
def get_multiple_transformation_jobs(
session: SessionDep,
current_user: CurrentUserOrgProject,
job_ids: list[UUID] = Query(
description="List of transformation job IDs", min=1, max_length=100
...,
description="List of transformation job IDs",
min_items=1,
max_items=100,
),
include_url: bool = Query(
False, description="Include a signed URL for each transformed document"
),
):
crud = DocTransformationJobCrud(session, project_id=current_user.project_id)
jobs = crud.read_each(set(job_ids))
jobs_not_found = set(job_ids) - {job.id for job in jobs}
job_crud = DocTransformationJobCrud(session, project_id=current_user.project_id)
doc_crud = DocumentCrud(session, project_id=current_user.project_id)

jobs = job_crud.read_each(set(job_ids))
jobs_found_ids = {job.id for job in jobs}
jobs_not_found = set(job_ids) - jobs_found_ids

storage = (
get_cloud_storage(session=session, project_id=current_user.project_id)
if include_url
else None
)

job_schemas = build_job_schemas(
jobs=jobs,
doc_crud=doc_crud,
include_url=include_url,
storage=storage,
)

return APIResponse.success_response(
DocTransformationJobs(jobs=jobs, jobs_not_found=list(jobs_not_found))
DocTransformationJobsPublic(
jobs=job_schemas,
jobs_not_found=list(jobs_not_found),
)
)
Loading