Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
8123e26
fix process_response argument
avirajsingh7 Sep 12, 2025
073a61c
Refactor process_response to use get_openai_client for OpenAI API cli…
avirajsingh7 Sep 12, 2025
a757f29
Refactor response api:
avirajsingh7 Sep 17, 2025
824652e
move response code to service
avirajsingh7 Sep 17, 2025
b471c3e
Implement job management and integrate it with response processing
avirajsingh7 Sep 18, 2025
580c3f5
pass trace id to job table
avirajsingh7 Sep 18, 2025
0de7cd0
pre commit and pass trace id to celery
avirajsingh7 Sep 18, 2025
19928ab
add task id to response log
avirajsingh7 Sep 18, 2025
b1e8fb4
Refactor response handling: split response logic into separate modules
avirajsingh7 Sep 22, 2025
3d9b2fa
Refactor callback handling: move send_callback function to utils and …
avirajsingh7 Sep 22, 2025
b962760
Add ResponseJobStatus model and update responses endpoint to return s…
avirajsingh7 Sep 22, 2025
7764568
fix init
avirajsingh7 Sep 22, 2025
a569485
Add tests for JobCrud and response job handling
avirajsingh7 Sep 22, 2025
87ecdb6
update Job model to use string for task_id
avirajsingh7 Sep 22, 2025
96160c6
Add tests for response generation and processing, including success a…
avirajsingh7 Sep 22, 2025
f150372
pre commit
avirajsingh7 Sep 23, 2025
e7fe68d
rename test_jobs
avirajsingh7 Sep 23, 2025
e62fa53
move tenant from user to project, add collections in services folder …
nishika26 Sep 23, 2025
bc40a18
Add flower dependency to pyproject.toml and uv.lock
avirajsingh7 Sep 24, 2025
f3f1565
Refactor job table migration and enhance error handling in job schedu…
avirajsingh7 Sep 24, 2025
7504ef7
Add CALLBACK_TIMEOUT setting and update send_callback to use it
avirajsingh7 Sep 24, 2025
1268dfb
Add callback timeout settings and update send_callback function to us…
avirajsingh7 Sep 24, 2025
6bc2a47
adding batching document in helper function, updating test cases for …
nishika26 Sep 24, 2025
65380f8
Remove unused response_chunks in CallbackResponse and update related …
avirajsingh7 Sep 25, 2025
db0c276
Fix update_job test to assert failure status and correct error message
avirajsingh7 Sep 25, 2025
6fe72d6
Refactor get_additional_data function to simplify exclusion logic for…
avirajsingh7 Sep 25, 2025
eaf8202
Update job_type field description for clarity and consistency
avirajsingh7 Sep 25, 2025
fcbd764
Remove unused imports and add conditional previous_response_id in gen…
avirajsingh7 Sep 25, 2025
4083b62
services test cases
nishika26 Sep 25, 2025
37e0b7a
Merge branch 'feature/response_api_to_celery' into feature/collection…
nishika26 Sep 25, 2025
2a69b55
Merge branch 'main' into feature/collection_to_celery
nishika26 Sep 26, 2025
7f948fb
fixing alembic head
nishika26 Sep 26, 2025
e274c45
final push for collection jobs
nishika26 Oct 1, 2025
4397434
collection job info test fix
nishika26 Oct 1, 2025
f5a5871
PR Reviews fixes
nishika26 Oct 6, 2025
d741ef6
removing unused imports
nishika26 Oct 6, 2025
81e0890
Merge branch 'main' into feature/collection_to_celery
nishika26 Oct 6, 2025
8e8fe7d
migration for removing failed collection columns
nishika26 Oct 6, 2025
5b6f2f2
migration for removing failed collection columns
nishika26 Oct 6, 2025
4a1f5ff
coderabbit and pr review fixes
nishika26 Oct 7, 2025
14d50a8
minor changes
nishika26 Oct 7, 2025
1dcab67
minor fixes
nishika26 Oct 7, 2025
7b09e3f
pr reviews
nishika26 Oct 8, 2025
95ef105
minimal fixes
nishika26 Oct 8, 2025
4de8c1d
changing router name
nishika26 Oct 8, 2025
ebf0cc8
Merge branch 'main' into feature/collection_to_celery
nishika26 Oct 8, 2025
2c0a11d
last pr review changes
nishika26 Oct 9, 2025
94234b1
minimal fixes
nishika26 Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""delete processing and failed columns from collection table

Revision ID: 7ab577d3af26
Revises: c6fb6d0b5897
Create Date: 2025-10-06 13:59:28.561706

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes


# revision identifiers, used by Alembic.
revision = "7ab577d3af26"
down_revision = "c6fb6d0b5897"
branch_labels = None
depends_on = None


def upgrade():
op.execute(
"""
DELETE FROM collection
WHERE status IN ('processing', 'failed')
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure no collection with empty llm_service_id exist

Copy link
Copy Markdown
Collaborator Author

@nishika26 nishika26 Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think there would be a situation where status turned to successful while no llm service id is there as they're committed together in the same transaction, they both persist atomically. By they i mean the status as successful and the llm service id , for reference - here . But for surity i will still add that sql query as well

"""
)
op.execute(
"""
DELETE FROM collection
WHERE llm_service_id IS NULL
"""
)


def downgrade():
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""adding collection job table and altering collections table

Revision ID: b30727137e65
Revises: 7ab577d3af26
Create Date: 2025-10-05 14:19:14.213933

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "b30727137e65"
down_revision = "7ab577d3af26"
branch_labels = None
depends_on = None

collection_job_status_enum = postgresql.ENUM(
"PENDING",
"PROCESSING",
"SUCCESSFUL",
"FAILED",
name="collectionjobstatus",
create_type=False,
)

collection_action_type = postgresql.ENUM(
"CREATE",
"DELETE",
name="collectionactiontype",
create_type=False,
)


def upgrade():
collection_job_status_enum.create(op.get_bind(), checkfirst=True)
collection_action_type.create(op.get_bind(), checkfirst=True)
op.create_table(
"collection_jobs",
sa.Column("action_type", collection_action_type, nullable=False),
sa.Column("collection_id", sa.Uuid(), nullable=True),
sa.Column("project_id", sa.Integer(), nullable=False),
sa.Column("id", sa.Uuid(), nullable=False),
sa.Column("status", collection_job_status_enum, nullable=False),
sa.Column("task_id", sa.String(), nullable=True),
sa.Column("trace_id", sa.String(), nullable=True),
sa.Column("error_message", sa.Text(), nullable=True),
sa.Column("inserted_at", sa.DateTime(), nullable=False),
sa.Column("updated_at", sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(
["collection_id"], ["collection.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)

op.alter_column("collection", "created_at", new_column_name="inserted_at")
op.alter_column(
"collection", "llm_service_id", existing_type=sa.VARCHAR(), nullable=False
)
op.alter_column(
"collection", "llm_service_name", existing_type=sa.VARCHAR(), nullable=False
)
op.drop_constraint("collection_owner_id_fkey", "collection", type_="foreignkey")
op.drop_column("collection", "owner_id")
op.drop_column("collection", "status")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kartpop
Should we clear all the collection from db where status is not successful?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an sql query migration file for this here

op.drop_column("collection", "error_message")


def downgrade():
op.add_column(
"collection",
sa.Column("error_message", sa.VARCHAR(), autoincrement=False, nullable=True),
)
collectionstatus = postgresql.ENUM(
"processing", "successful", "failed", name="collectionstatus"
)

op.add_column(
"collection",
sa.Column(
"status",
collectionstatus,
server_default=sa.text("'processing'::collectionstatus"),
nullable=True,
),
)
op.add_column(
"collection",
sa.Column("owner_id", sa.Integer(), nullable=True),
)

op.execute("UPDATE collection SET status = 'processing' WHERE status IS NULL")
op.execute("UPDATE collection SET owner_id = 1 WHERE owner_id IS NULL")
op.create_foreign_key(
"collection_owner_id_fkey",
"collection",
"user",
["owner_id"],
["id"],
ondelete="CASCADE",
)
op.alter_column("collection", "status", nullable=False)
op.alter_column("collection", "owner_id", nullable=False)
op.alter_column("collection", "inserted_at", new_column_name="created_at")
op.alter_column(
"collection", "llm_service_name", existing_type=sa.VARCHAR(), nullable=True
)
op.alter_column(
"collection", "llm_service_id", existing_type=sa.VARCHAR(), nullable=True
)
op.drop_table("collection_jobs")
11 changes: 6 additions & 5 deletions backend/app/api/docs/collections/create.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ OpenAI. Failure can occur from OpenAI being down, or some parameter
value being invalid. It can also fail due to document types not be
accepted. This is especially true for PDFs that may not be parseable.

The immediate response from the endpoint is a packet containing a
`key`. Once the collection has been created, information about the
collection will be returned to the user via the callback URL. If a
callback URL is not provided, clients can poll the `info` endpoint
with the `key` to retrieve the same information.
The immediate response from the endpoint is `collection_job` object which is
going to contain the collection "job ID", status and action type ("CREATE").
Once the collection has been created, information about the collection will
be returned to the user via the callback URL. If a callback URL is not provided,
clients can poll the `collection job info` endpoint with the `id` in the
`collection_job` object returned as it is the `job id`, to retrieve the same information.
6 changes: 5 additions & 1 deletion backend/app/api/docs/collections/delete.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@ Remove a collection from the platform. This is a two step process:

No action is taken on the documents themselves: the contents of the
documents that were a part of the collection remain unchanged, those
documents can still be accessed via the documents endpoints.
documents can still be accessed via the documents endpoints. The response from this
endpoint will be a `collection_job` object which will contain the collection `job ID`,
status and action type ("DELETE"). when you take the id returned and use the collection job
info endpoint, if the job is successful, you will get the status as successful and nothing will
be returned as the collection as it has been deleted and marked as deleted.
7 changes: 3 additions & 4 deletions backend/app/api/docs/collections/info.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Retrieve all AI-platform information about a collection given its
ID. This route is very helpful for:
Retrieve detailed information about a specific collection by its ID from the collection table. Note that this endpoint CANNOT be used as a polling endpoint for collection creation because an entry will be made in the collection table only after the resource creation and association has been successful.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One suggestion for all the md/docs files: these are user-facing documents, so they should focus on describing the behavior of the endpoints, not their internal implementation details.
For example:

  • If an endpoint triggers a webhook, simply mention that it does, no need to explain how it’s handled internally.
  • If an endpoint deletes a resource, just state that it deletes it, don’t specify whether it’s a soft or hard delete.

Similarly, avoid mentioning internal mechanisms like can be use for polling or background tasks unless they directly affect how the user interacts with the API.

Copy link
Copy Markdown
Collaborator Author

@nishika26 nishika26 Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noted, but since for now the main users of this endpoint had been dalgo, I think keeping that in mind I wrote the docs in that way . Stating that you need to poll on the job id returned is something that the user needs to know in this case. The fact that the deletion is soft or hard is sometimes needed to be known considering that our current user of the platform is dalgo, glific and us(the internal team) . So I think we can keep the docs which go into the behaviour of endpoints , anyway we will discuss this with other people as well, and according to their opinion we will apply those changes in v2


* Understanding whether a `create` request has finished
* Obtaining the OpenAI assistant ID (`llm_service_id`)
This endpoint returns metadata for the collection, including its project, organization,
timestamps, and associated LLM service details (`llm_service_id`).
12 changes: 12 additions & 0 deletions backend/app/api/docs/collections/job_info.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Retrieve information about a collection job by the collection job ID. This endpoint can be considered the polling endpoint for collection creation job. This endpoint provides detailed status and metadata for a specific collection job
in the AI platform. It is especially useful for:

* Fetching the collection job object containing the ID which will be collection job id, collection ID, status of the job as well as error message.

* If the job has finished, has been successful and it was a job of creation of collection then this endpoint will fetch the associated collection details from the collection table, including:
- `llm_service_id`: The OpenAI assistant or model used for the collection.
- Collection metadata such as ID, project, organization, and timestamps.

* If the job of delete collection was successful, we will get the status as successful and nothing will be returned as collection.

* Containing a simplified error messages in the retrieved collection job object when a job has failed.
2 changes: 2 additions & 0 deletions backend/app/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
credentials,
fine_tuning,
model_evaluation,
collection_job,
)
from app.core.config import settings

api_router = APIRouter()
api_router.include_router(api_keys.router)
api_router.include_router(assistants.router)
api_router.include_router(collections.router)
api_router.include_router(collection_job.router)
api_router.include_router(credentials.router)
api_router.include_router(documents.router)
api_router.include_router(doc_transformation_job.router)
Expand Down
50 changes: 50 additions & 0 deletions backend/app/api/routes/collection_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging
from uuid import UUID

from fastapi import APIRouter
from fastapi import Path as FastPath


from app.api.deps import SessionDep, CurrentUserOrgProject
from app.crud import (
CollectionCrud,
CollectionJobCrud,
)
from app.models import CollectionJobStatus, CollectionJobPublic, CollectionActionType
from app.models.collection import CollectionPublic
from app.utils import APIResponse, load_description
from app.services.collections.helpers import extract_error_message


logger = logging.getLogger(__name__)
router = APIRouter(prefix="/collections", tags=["collections"])


@router.get(
"/info/jobs/{job_id}",
description=load_description("collections/job_info.md"),
response_model=APIResponse[CollectionJobPublic],
)
def collection_job_info(
session: SessionDep,
current_user: CurrentUserOrgProject,
job_id: UUID = FastPath(description="Collection job to retrieve"),
):
collection_job_crud = CollectionJobCrud(session, current_user.project_id)
collection_job = collection_job_crud.read_one(job_id)

job_out = CollectionJobPublic.model_validate(collection_job)

if (
collection_job.status == CollectionJobStatus.SUCCESSFUL
and collection_job.action_type == CollectionActionType.CREATE
and collection_job.collection_id
):
collection_crud = CollectionCrud(session, current_user.project_id)
collection = collection_crud.read_one(collection_job.collection_id)
job_out.collection = CollectionPublic.model_validate(collection)

if collection_job.status == CollectionJobStatus.FAILED and job_out.error_message:
job_out.error_message = extract_error_message(job_out.error_message)

return APIResponse.success_response(data=job_out)
Loading