ProjectTech4DevAI · nishika26 · Oct 9, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 17, 2025
diff --git a/.../app/alembic/versions/7ab577d3af26_delete_non_successful_columns_from_collection_table.py b/.../app/alembic/versions/7ab577d3af26_delete_non_successful_columns_from_collection_table.py
@@ -0,0 +1,36 @@
+"""delete processing and failed columns from collection table
+
+Revision ID: 7ab577d3af26
+Revises: c6fb6d0b5897
+Create Date: 2025-10-06 13:59:28.561706
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+
+
+# revision identifiers, used by Alembic.
+revision = "7ab577d3af26"
+down_revision = "c6fb6d0b5897"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute(
+        """
+        DELETE FROM collection
+        WHERE status IN ('processing', 'failed')
+        """
+    )
+    op.execute(
+        """
+        DELETE FROM collection
+        WHERE llm_service_id IS NULL
+        """
+    )
+
+
+def downgrade():
+    pass
diff --git a/...p/alembic/versions/b30727137e65_adding_collection_job_table_and_alter_collection_table.py b/...p/alembic/versions/b30727137e65_adding_collection_job_table_and_alter_collection_table.py
@@ -0,0 +1,113 @@
+"""adding collection job table and altering collections table
+
+Revision ID: b30727137e65
+Revises: 7ab577d3af26
+Create Date: 2025-10-05 14:19:14.213933
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "b30727137e65"
+down_revision = "7ab577d3af26"
+branch_labels = None
+depends_on = None
+
+collection_job_status_enum = postgresql.ENUM(
+    "PENDING",
+    "PROCESSING",
+    "SUCCESSFUL",
+    "FAILED",
+    name="collectionjobstatus",
+    create_type=False,
+)
+
+collection_action_type = postgresql.ENUM(
+    "CREATE",
+    "DELETE",
+    name="collectionactiontype",
+    create_type=False,
+)
+
+
+def upgrade():
+    collection_job_status_enum.create(op.get_bind(), checkfirst=True)
+    collection_action_type.create(op.get_bind(), checkfirst=True)
+    op.create_table(
+        "collection_jobs",
+        sa.Column("action_type", collection_action_type, nullable=False),
+        sa.Column("collection_id", sa.Uuid(), nullable=True),
+        sa.Column("project_id", sa.Integer(), nullable=False),
+        sa.Column("id", sa.Uuid(), nullable=False),
+        sa.Column("status", collection_job_status_enum, nullable=False),
+        sa.Column("task_id", sa.String(), nullable=True),
+        sa.Column("trace_id", sa.String(), nullable=True),
+        sa.Column("error_message", sa.Text(), nullable=True),
+        sa.Column("inserted_at", sa.DateTime(), nullable=False),
+        sa.Column("updated_at", sa.DateTime(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["collection_id"], ["collection.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    op.alter_column("collection", "created_at", new_column_name="inserted_at")
+    op.alter_column(
+        "collection", "llm_service_id", existing_type=sa.VARCHAR(), nullable=False
+    )
+    op.alter_column(
+        "collection", "llm_service_name", existing_type=sa.VARCHAR(), nullable=False
+    )
+    op.drop_constraint("collection_owner_id_fkey", "collection", type_="foreignkey")
+    op.drop_column("collection", "owner_id")
+    op.drop_column("collection", "status")
+    op.drop_column("collection", "error_message")
+
+
+def downgrade():
+    op.add_column(
+        "collection",
+        sa.Column("error_message", sa.VARCHAR(), autoincrement=False, nullable=True),
+    )
+    collectionstatus = postgresql.ENUM(
+        "processing", "successful", "failed", name="collectionstatus"
+    )
+
+    op.add_column(
+        "collection",
+        sa.Column(
+            "status",
+            collectionstatus,
+            server_default=sa.text("'processing'::collectionstatus"),
+            nullable=True,
+        ),
+    )
+    op.add_column(
+        "collection",
+        sa.Column("owner_id", sa.Integer(), nullable=True),
+    )
+
+    op.execute("UPDATE collection SET status = 'processing' WHERE status IS NULL")
+    op.execute("UPDATE collection SET owner_id = 1 WHERE owner_id IS NULL")
+    op.create_foreign_key(
+        "collection_owner_id_fkey",
+        "collection",
+        "user",
+        ["owner_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+    op.alter_column("collection", "status", nullable=False)
+    op.alter_column("collection", "owner_id", nullable=False)
+    op.alter_column("collection", "inserted_at", new_column_name="created_at")
+    op.alter_column(
+        "collection", "llm_service_name", existing_type=sa.VARCHAR(), nullable=True
+    )
+    op.alter_column(
+        "collection", "llm_service_id", existing_type=sa.VARCHAR(), nullable=True
+    )
+    op.drop_table("collection_jobs")
diff --git a/backend/app/api/docs/collections/create.md b/backend/app/api/docs/collections/create.md
@@ -19,8 +19,9 @@ OpenAI. Failure can occur from OpenAI being down, or some parameter
 value being invalid. It can also fail due to document types not be
 accepted. This is especially true for PDFs that may not be parseable.
 
-The immediate response from the endpoint is a packet containing a
-`key`. Once the collection has been created, information about the
-collection will be returned to the user via the callback URL. If a
-callback URL is not provided, clients can poll the `info` endpoint
-with the `key` to retrieve the same information.
+The immediate response from the endpoint is `collection_job` object which is
+going to contain the collection "job ID", status and action type ("CREATE").
+Once the collection has been created, information about the collection will
+be returned to the user via the callback URL. If a callback URL is not provided,
+clients can poll the `collection job info` endpoint with the `id` in the
+`collection_job` object returned as it is the `job id`, to retrieve the same information.
diff --git a/backend/app/api/docs/collections/delete.md b/backend/app/api/docs/collections/delete.md
@@ -6,4 +6,8 @@ Remove a collection from the platform. This is a two step process:
 
 No action is taken on the documents themselves: the contents of the
 documents that were a part of the collection remain unchanged, those
-documents can still be accessed via the documents endpoints.
+documents can still be accessed via the documents endpoints. The response from this
+endpoint will be a `collection_job` object which will contain the collection `job ID`,
+status and action type ("DELETE"). when you take the id returned and use the collection job
+info endpoint, if the job is successful, you will get the status as successful and nothing will
+be returned as the collection as it has been deleted and marked as deleted.
diff --git a/backend/app/api/docs/collections/info.md b/backend/app/api/docs/collections/info.md
@@ -1,5 +1,4 @@
-Retrieve all AI-platform information about a collection given its
-ID. This route is very helpful for:
+Retrieve detailed information about a specific collection by its ID from the collection table. Note that this endpoint CANNOT be used as a polling endpoint for collection creation because an entry will be made in the collection table only after the resource creation and association has been successful.
 
-* Understanding whether a `create` request has finished
-* Obtaining the OpenAI assistant ID (`llm_service_id`)
+This endpoint returns metadata for the collection, including its project, organization,
+timestamps, and associated LLM service details (`llm_service_id`).
diff --git a/backend/app/api/docs/collections/job_info.md b/backend/app/api/docs/collections/job_info.md
@@ -0,0 +1,12 @@
+Retrieve information about a collection job by the collection job ID. This endpoint can be considered the polling endpoint for collection creation job. This endpoint provides detailed status and metadata for a specific collection job
+in the AI platform. It is especially useful for:
+
+* Fetching the collection job object containing the ID which will be collection job id, collection ID, status of the job as well as error message.
+
+* If the job has finished, has been successful and it was a job of creation of collection then this endpoint will fetch the associated collection details from the collection table, including:
+    - `llm_service_id`: The OpenAI assistant or model used for the collection.
+    - Collection metadata such as ID, project, organization, and timestamps.
+
+* If the job of delete collection was successful, we will get the status as successful and nothing will be returned as collection.
+
+* Containing a simplified error messages in the retrieved collection job object when a job has failed.
diff --git a/backend/app/api/main.py b/backend/app/api/main.py
@@ -20,13 +20,15 @@
     credentials,
     fine_tuning,
     model_evaluation,
+    collection_job,
 )
 from app.core.config import settings
 
 api_router = APIRouter()
 api_router.include_router(api_keys.router)
 api_router.include_router(assistants.router)
 api_router.include_router(collections.router)
+api_router.include_router(collection_job.router)
 api_router.include_router(credentials.router)
 api_router.include_router(documents.router)
 api_router.include_router(doc_transformation_job.router)

diff --git a/backend/app/api/routes/collection_job.py b/backend/app/api/routes/collection_job.py
@@ -0,0 +1,50 @@
+import logging
+from uuid import UUID
+
+from fastapi import APIRouter
+from fastapi import Path as FastPath
+
+
+from app.api.deps import SessionDep, CurrentUserOrgProject
+from app.crud import (
+    CollectionCrud,
+    CollectionJobCrud,
+)
+from app.models import CollectionJobStatus, CollectionJobPublic, CollectionActionType
+from app.models.collection import CollectionPublic
+from app.utils import APIResponse, load_description
+from app.services.collections.helpers import extract_error_message
+
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/collections", tags=["collections"])
+
+
+@router.get(
+    "/info/jobs/{job_id}",
+    description=load_description("collections/job_info.md"),
+    response_model=APIResponse[CollectionJobPublic],
+)
+def collection_job_info(
+    session: SessionDep,
+    current_user: CurrentUserOrgProject,
+    job_id: UUID = FastPath(description="Collection job to retrieve"),
+):
+    collection_job_crud = CollectionJobCrud(session, current_user.project_id)
+    collection_job = collection_job_crud.read_one(job_id)
+
+    job_out = CollectionJobPublic.model_validate(collection_job)
+
+    if (
+        collection_job.status == CollectionJobStatus.SUCCESSFUL
+        and collection_job.action_type == CollectionActionType.CREATE
+        and collection_job.collection_id
+    ):
+        collection_crud = CollectionCrud(session, current_user.project_id)
+        collection = collection_crud.read_one(collection_job.collection_id)
+        job_out.collection = CollectionPublic.model_validate(collection)
+
+    if collection_job.status == CollectionJobStatus.FAILED and job_out.error_message:
+        job_out.error_message = extract_error_message(job_out.error_message)
+
+    return APIResponse.success_response(data=job_out)