Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions backend/app/api/docs/llm/llm_call.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ for processing, and results are delivered via the callback URL when complete.
- **Note**: When using stored configuration, do not include the `blob` field in the request body

- **Mode 2: Ad-hoc Configuration**
- `blob` (object): Complete configuration object (see Create Config endpoint documentation for examples)
- `completion` (required):
- `provider` (required, string): Currently only "openai"
- `params` (required, object): Provider-specific parameters (flexible JSON)
- **Note**: When using ad-hoc configuration, do not include `id` and `version` fields
- `blob` (object): Complete configuration object
- `completion` (required, object): Completion configuration
- `provider` (required, string): Provider type - either `"openai"` (Kaapi abstraction) or `"openai-native"` (pass-through)
- `params` (required, object): Parameters structure depends on provider type (see schema for detailed structure)
- **Note**
- When using ad-hoc configuration, do not include `id` and `version` fields
- When using the Kaapi abstraction, parameters that are not supported by the selected provider or model are automatically suppressed. If any parameters are ignored, a list of warnings is included in the metadata.warnings. For example, the GPT-5 model does not support the temperature parameter, so Kaapi will neither throw an error nor pass this parameter to the model; instead, it will return a warning in the metadata.warnings response.
- **Recommendation**: Use stored configs (Mode 1) for production; use ad-hoc configs only for testing/validation
- **Schema**: Check the API schema or examples below for the complete parameter structure for each provider type

**`callback_url`** (optional, HTTPS URL):
- Webhook endpoint to receive the response
Expand All @@ -39,4 +43,7 @@ for processing, and results are delivered via the callback URL when complete.
- Custom JSON metadata
- Passed through unchanged in the response

### Note
- `warnings` list is automatically added in response metadata when using Kaapi configs if any parameters are suppressed or adjusted (e.g., temperature on reasoning models)

---
6 changes: 4 additions & 2 deletions backend/app/core/langfuse/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from asgi_correlation_id import correlation_id
from langfuse import Langfuse
from langfuse.client import StatefulGenerationClient, StatefulTraceClient
from app.models.llm import CompletionConfig, QueryParams, LLMCallResponse
from app.models.llm import NativeCompletionConfig, QueryParams, LLMCallResponse

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -130,7 +130,9 @@ def observe_llm_execution(

def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(completion_config: CompletionConfig, query: QueryParams, **kwargs):
def wrapper(
completion_config: NativeCompletionConfig, query: QueryParams, **kwargs
):
# Skip observability if no credentials provided
if not credentials:
logger.info("[Langfuse] No credentials - skipping observability")
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
CompletionConfig,
QueryParams,
ConfigBlob,
KaapiLLMParams,
KaapiCompletionConfig,
NativeCompletionConfig,
)
from app.models.llm.response import LLMCallResponse, LLMResponse, LLMOutput, Usage
74 changes: 68 additions & 6 deletions backend/app/models/llm/request.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,44 @@
from typing import Any, Literal
from typing import Annotated, Any, Literal, Union

from uuid import UUID
from sqlmodel import Field, SQLModel
from pydantic import model_validator, HttpUrl
from pydantic import Discriminator, model_validator, HttpUrl


class KaapiLLMParams(SQLModel):
"""
Kaapi-abstracted parameters for LLM providers.
These parameters are mapped internally to provider-specific API parameters.
Provides a unified contract across all LLM providers (OpenAI, Claude, Gemini, etc.).
Provider-specific mappings are handled at the mapper level.
"""

model: str = Field(
description="Model identifier to use for completion (e.g., 'gpt-4o', 'gpt-5')",
)
instructions: str | None = Field(
default=None,
description="System instructions to guide the model's behavior",
)
knowledge_base_ids: list[str] | None = Field(
default=None,
description="List of vector store IDs to use for knowledge retrieval",
)
reasoning: Literal["low", "medium", "high"] | None = Field(
default=None,
description="Reasoning configuration or instructions",
)
temperature: float | None = Field(
default=None,
ge=0.0,
le=2.0,
description="Sampling temperature between 0 and 2",
)
max_num_results: int | None = Field(
default=None,
ge=1,
description="Maximum number of results to return",
)


class ConversationConfig(SQLModel):
Expand Down Expand Up @@ -46,18 +82,44 @@ class QueryParams(SQLModel):
)


class CompletionConfig(SQLModel):
"""Completion configuration with provider and parameters."""
class NativeCompletionConfig(SQLModel):
"""
Native provider configuration (pass-through).
All parameters are forwarded as-is to the provider's API without transformation.
Supports any LLM provider's native API format.
"""

provider: Literal["openai"] = Field(
default="openai", description="LLM provider to use"
provider: Literal["openai-native"] = Field(
default="openai-native",
description="Native provider type (e.g., openai-native)",
)
params: dict[str, Any] = Field(
...,
description="Provider-specific parameters (schema varies by provider), should exactly match the provider's endpoint params structure",
)


class KaapiCompletionConfig(SQLModel):
"""
Kaapi abstraction for LLM completion providers.
Uses standardized Kaapi parameters that are mapped to provider-specific APIs internally.
Supports multiple providers: OpenAI, Claude, Gemini, etc.
"""

provider: Literal["openai"] = Field(..., description="LLM provider (openai)")
params: KaapiLLMParams = Field(
...,
description="Kaapi-standardized parameters mapped to provider-specific API",
)


# Discriminated union for completion configs based on provider field
CompletionConfig = Annotated[
Union[NativeCompletionConfig, KaapiCompletionConfig],
Field(discriminator="provider"),
]


class ConfigBlob(SQLModel):
"""Raw JSON blob of config."""

Expand Down
24 changes: 21 additions & 3 deletions backend/app/services/llm/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
from app.crud.credentials import get_provider_credential
from app.crud.jobs import JobCrud
from app.models import JobStatus, JobType, JobUpdate, LLMCallRequest
from app.models.llm.request import ConfigBlob, LLMCallConfig
from app.models.llm.request import ConfigBlob, LLMCallConfig, KaapiCompletionConfig
from app.utils import APIResponse, send_callback
from app.celery.utils import start_high_priority_job
from app.core.langfuse.langfuse import observe_llm_execution
from app.services.llm.providers.registry import get_llm_provider
from app.services.llm.mappers import transform_kaapi_config_to_native


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -170,10 +171,27 @@ def execute_job(
else:
config_blob = config.blob

try:
# Transform Kaapi config to native config if needed (before getting provider)
completion_config = config_blob.completion
if isinstance(completion_config, KaapiCompletionConfig):
completion_config, warnings = transform_kaapi_config_to_native(
completion_config
)
if request.request_metadata is None:
request.request_metadata = {}
request.request_metadata.setdefault("warnings", []).extend(warnings)
except Exception as e:
callback_response = APIResponse.failure_response(
error=f"Error processing configuration: {str(e)}",
metadata=request.request_metadata,
)
return handle_job_error(job_id, request.callback_url, callback_response)

try:
provider_instance = get_llm_provider(
session=session,
provider_type=config_blob.completion.provider,
provider_type=completion_config.provider, # Now always native provider type
project_id=project_id,
organization_id=organization_id,
)
Expand Down Expand Up @@ -203,7 +221,7 @@ def execute_job(
)(provider_instance.execute)

response, error = decorated_execute(
completion_config=config_blob.completion,
completion_config=completion_config,
query=request.query,
include_provider_raw_response=request.include_provider_raw_response,
)
Expand Down
94 changes: 94 additions & 0 deletions backend/app/services/llm/mappers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Parameter mappers for converting Kaapi-abstracted parameters to provider-specific formats."""

import litellm
from app.models.llm import KaapiLLMParams, KaapiCompletionConfig, NativeCompletionConfig


def map_kaapi_to_openai_params(kaapi_params: KaapiLLMParams) -> tuple[dict, list[str]]:
"""Map Kaapi-abstracted parameters to OpenAI API parameters.

This mapper transforms standardized Kaapi parameters into OpenAI-specific
parameter format, enabling provider-agnostic interface design.

Args:
kaapi_params: KaapiLLMParams instance with standardized parameters

Supported Mapping:
- model → model
- instructions → instructions
- knowledge_base_ids → tools[file_search].vector_store_ids
- max_num_results → tools[file_search].max_num_results (fallback default)
- reasoning → reasoning.effort (if reasoning supported by model else suppressed)
- temperature → temperature (if reasoning not supported by model else suppressed)

Returns:
Tuple of:
- Dictionary of OpenAI API parameters ready to be passed to the API
- List of warnings describing suppressed or ignored parameters
"""
openai_params = {}
warnings = []

support_reasoning = litellm.supports_reasoning(
model="openai/" + f"{kaapi_params.model}"
)

# Handle reasoning vs temperature mutual exclusivity
if support_reasoning:
if kaapi_params.reasoning is not None:
openai_params["reasoning"] = {"effort": kaapi_params.reasoning}

if kaapi_params.temperature is not None:
warnings.append(
"Parameter 'temperature' was suppressed because the selected model "
"supports reasoning, and temperature is ignored when reasoning is enabled."
)
else:
if kaapi_params.reasoning is not None:
warnings.append(
"Parameter 'reasoning' was suppressed because the selected model "
"does not support reasoning."
)

if kaapi_params.temperature is not None:
openai_params["temperature"] = kaapi_params.temperature

if kaapi_params.model:
openai_params["model"] = kaapi_params.model

if kaapi_params.instructions:
openai_params["instructions"] = kaapi_params.instructions

if kaapi_params.knowledge_base_ids:
openai_params["tools"] = [
{
"type": "file_search",
"vector_store_ids": kaapi_params.knowledge_base_ids,
"max_num_results": kaapi_params.max_num_results or 20,
}
]

return openai_params, warnings


def transform_kaapi_config_to_native(
kaapi_config: KaapiCompletionConfig,
) -> tuple[NativeCompletionConfig, list[str]]:
"""Transform Kaapi completion config to native provider config with mapped parameters.

Currently supports OpenAI. Future: Claude, Gemini mappers.

Args:
kaapi_config: KaapiCompletionConfig with abstracted parameters

Returns:
NativeCompletionConfig with provider-native parameters ready for API
"""
if kaapi_config.provider == "openai":
mapped_params, warnings = map_kaapi_to_openai_params(kaapi_config.params)
return (
NativeCompletionConfig(provider="openai-native", params=mapped_params),
warnings,
)

raise ValueError(f"Unsupported provider: {kaapi_config.provider}")
6 changes: 3 additions & 3 deletions backend/app/services/llm/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from abc import ABC, abstractmethod
from typing import Any

from app.models.llm import CompletionConfig, LLMCallResponse, QueryParams
from app.models.llm import NativeCompletionConfig, LLMCallResponse, QueryParams


class BaseProvider(ABC):
Expand All @@ -34,7 +34,7 @@ def __init__(self, client: Any):
@abstractmethod
def execute(
self,
completion_config: CompletionConfig,
completion_config: NativeCompletionConfig,
query: QueryParams,
include_provider_raw_response: bool = False,
) -> tuple[LLMCallResponse | None, str | None]:
Expand All @@ -43,7 +43,7 @@ def execute(
Directly passes the user's config params to provider API along with input.

Args:
completion_config: LLM completion configuration
completion_config: LLM completion configuration, pass params as-is to provider API
query: Query parameters including input and conversation_id
include_provider_raw_response: Whether to include the raw LLM provider response in the output

Expand Down
4 changes: 2 additions & 2 deletions backend/app/services/llm/providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from openai.types.responses.response import Response

from app.models.llm import (
CompletionConfig,
NativeCompletionConfig,
LLMCallResponse,
QueryParams,
LLMOutput,
Expand All @@ -30,7 +30,7 @@ def __init__(self, client: OpenAI):

def execute(
self,
completion_config: CompletionConfig,
completion_config: NativeCompletionConfig,
query: QueryParams,
include_provider_raw_response: bool = False,
) -> tuple[LLMCallResponse | None, str | None]:
Expand Down
24 changes: 14 additions & 10 deletions backend/app/services/llm/providers/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@


class LLMProvider:
OPENAI = "openai"
# Future constants:
# ANTHROPIC = "anthropic"
# GOOGLE = "google"
OPENAI_NATIVE = "openai-native"
# Future constants for native providers:
# CLAUDE_NATIVE = "claude-native"
# GEMINI_NATIVE = "gemini-native"

_registry: dict[str, type[BaseProvider]] = {
OPENAI: OpenAIProvider,
# ANTHROPIC: AnthropicProvider,
# GOOGLE: GoogleProvider,
OPENAI_NATIVE: OpenAIProvider,
# Future native providers:
# CLAUDE_NATIVE: ClaudeProvider,
# GEMINI_NATIVE: GeminiProvider,
}

@classmethod
Expand All @@ -45,19 +46,22 @@ def get_llm_provider(
) -> BaseProvider:
provider_class = LLMProvider.get(provider_type)

# e.g., "openai-native" → "openai", "claude-native" → "claude"
credential_provider = provider_type.replace("-native", "")

credentials = get_provider_credential(
session=session,
provider=provider_type,
provider=credential_provider,
project_id=project_id,
org_id=organization_id,
)

if not credentials:
raise ValueError(
f"Credentials for provider '{provider_type}' not configured for this project."
f"Credentials for provider '{credential_provider}' not configured for this project."
)

if provider_type == LLMProvider.OPENAI:
if provider_type == LLMProvider.OPENAI_NATIVE:
if "api_key" not in credentials:
raise ValueError("OpenAI credentials not configured for this project.")
client = OpenAI(api_key=credentials["api_key"])
Expand Down
Loading