diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py index 89d5ff0..a198203 100644 --- a/src/opengradient/__init__.py +++ b/src/opengradient/__init__.py @@ -88,6 +88,7 @@ async def stream_example(): InferenceResult, ModelOutput, ModelRepository, + ResponseFormat, SchedulerParams, TextGenerationOutput, TextGenerationStream, @@ -105,6 +106,7 @@ async def stream_example(): "SchedulerParams", "CandleType", "CandleOrder", + "ResponseFormat", "TextGenerationOutput", "TextGenerationStream", "x402SettlementMode", diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index ed54fd9..326a4ba 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -14,7 +14,7 @@ from x402.mechanisms.evm.exact.register import register_exact_evm_client from x402.mechanisms.evm.upto.register import register_upto_evm_client -from ..types import TEE_LLM, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode +from ..types import TEE_LLM, ResponseFormat, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode from .opg_token import Permit2ApprovalResult, ensure_opg_approval from .tee_connection import RegistryTEEConnection, StaticTEEConnection, TEEConnectionInterface from .tee_registry import TEERegistry @@ -44,6 +44,7 @@ class _ChatParams: stop_sequence: Optional[List[str]] tools: Optional[List[Dict]] tool_choice: Optional[str] + response_format: Optional[ResponseFormat] x402_settlement_mode: x402SettlementMode @@ -152,6 +153,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool if params.tools: payload["tools"] = params.tools payload["tool_choice"] = params.tool_choice or "auto" + if params.response_format: + payload["response_format"] = params.response_format.to_dict() return payload async def _call_with_tee_retry( @@ -297,6 +300,7 @@ async def chat( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, + response_format: Optional[ResponseFormat] = None, x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, stream: bool = False, ) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]: @@ -311,6 +315,11 @@ async def chat( temperature (float): Temperature for LLM inference, between 0 and 1. tools (List[dict], optional): Set of tools for function calling. tool_choice (str, optional): Sets a specific tool to choose. + response_format (ResponseFormat, optional): Enforce a specific output format. + Use ``ResponseFormat(type="json_object")`` for any valid JSON (not supported + by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})`` + to enforce a strict schema (supported by all providers including Anthropic). + Defaults to None (plain text). x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). @@ -324,8 +333,17 @@ async def chat( - If stream=True: Async generator yielding StreamChunk objects Raises: + ValueError: If ``response_format="json_object"`` is used with an Anthropic model. RuntimeError: If the inference fails. """ + if response_format is not None and response_format.type == "json_object": + provider = model.split("/")[0] + if provider == "anthropic": + raise ValueError( + "Anthropic models do not support response_format type 'json_object'. " + "Use ResponseFormat(type='json_schema', json_schema={...}) with an explicit schema instead." + ) + params = _ChatParams( model=model.split("/")[1], max_tokens=max_tokens, @@ -333,6 +351,7 @@ async def chat( stop_sequence=stop_sequence, tools=tools, tool_choice=tool_choice, + response_format=response_format, x402_settlement_mode=x402_settlement_mode, ) @@ -379,6 +398,7 @@ async def _request() -> TextGenerationOutput: transaction_hash="external", finish_reason=choices[0].get("finish_reason"), chat_output=message, + usage=result.get("usage"), tee_signature=result.get("tee_signature"), tee_timestamp=result.get("tee_timestamp"), **tee.metadata(), diff --git a/src/opengradient/types.py b/src/opengradient/types.py index a59293f..569f7d8 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -428,6 +428,9 @@ class TextGenerationOutput: completion_output: Optional[str] = None """Raw text returned by a completion request.""" + usage: Optional[Dict] = None + """Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server.""" + payment_hash: Optional[str] = None """Payment hash for the x402 transaction.""" @@ -513,10 +516,12 @@ class TEE_LLM(str, Enum): CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6" # Google models via TEE + # Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are scheduled + # for deprecation on June 17, 2026 (flash-lite: July 22, 2026). Replacements will be + # gemini-3-flash-preview, gemini-3.1-pro-preview, and gemini-3.1-flash-lite-preview. GEMINI_2_5_FLASH = "google/gemini-2.5-flash" GEMINI_2_5_PRO = "google/gemini-2.5-pro" GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite" - GEMINI_3_PRO = "google/gemini-3-pro-preview" GEMINI_3_FLASH = "google/gemini-3-flash-preview" # xAI Grok models via TEE @@ -526,6 +531,71 @@ class TEE_LLM(str, Enum): GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning" +@dataclass +class ResponseFormat: + """Controls the output format enforced by the TEE gateway. + + Use ``type="json_object"`` to receive any valid JSON object (supported by + OpenAI, Gemini, and Grok). Use ``type="json_schema"`` with a ``json_schema`` + definition to enforce a specific schema (supported by all providers, + including Anthropic). + + Attributes: + type: One of ``"text"``, ``"json_object"``, or ``"json_schema"``. + json_schema: Schema definition (required when ``type="json_schema"``). + Must contain ``name`` (str) and ``schema`` (dict). + ``strict`` (bool) is optional. + + Raises: + ValueError: If ``type`` is not a recognised value, or if + ``type="json_schema"`` is used without providing ``json_schema``. + + Examples:: + + # Any valid JSON object — OpenAI, Gemini, Grok only + ResponseFormat(type="json_object") + + # Strict schema — all providers including Anthropic + ResponseFormat( + type="json_schema", + json_schema={ + "name": "person", + "strict": True, + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + }, + }, + ) + """ + + type: str + json_schema: Optional[Dict] = None + + def __post_init__(self) -> None: + valid_types = ("text", "json_object", "json_schema") + if self.type not in valid_types: + raise ValueError( + f"ResponseFormat.type must be one of {valid_types}, got '{self.type}'" + ) + if self.type == "json_schema" and not self.json_schema: + raise ValueError( + "ResponseFormat.json_schema is required when type='json_schema'" + ) + + def to_dict(self) -> Dict: + """Serialise to a JSON-compatible dict for the TEE gateway request payload.""" + d: Dict = {"type": self.type} + if self.json_schema is not None: + d["json_schema"] = self.json_schema + return d + + @dataclass class SchedulerParams: frequency: int diff --git a/uv.lock b/uv.lock index e43ed2b..3ab6b1b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14'",