From a9d16bc5c770bbbace58f9fdb340a81e63ad23ed Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-877VI7G\\kylex" <kylexqian@gmail.com>
Date: Fri, 3 Apr 2026 03:13:19 -0700
Subject: [PATCH 1/2] Structured Output support

Added structured output support for chat. Tested all model providers + streaming.

We now also include the usage as part of the TextGenerationOutput. This was nice because it led to finding a bug in usage reporting for gemini in streaming.

Unit tests included as well
---
 pyproject.toml                 |  2 +-
 src/opengradient/__init__.py   |  2 +
 src/opengradient/client/llm.py | 22 ++++++++++-
 src/opengradient/types.py      | 68 ++++++++++++++++++++++++++++++++++
 uv.lock                        |  4 +-
 5 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 17512b8a..4fd346c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "opengradient"
-version = "0.9.4"
+version = "0.9.5"
 description = "Python SDK for OpenGradient decentralized model management & inference services"
 authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}]
 readme = "README.md"
diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py
index 89d5ff08..a198203d 100644
--- a/src/opengradient/__init__.py
+++ b/src/opengradient/__init__.py
@@ -88,6 +88,7 @@ async def stream_example():
     InferenceResult,
     ModelOutput,
     ModelRepository,
+    ResponseFormat,
     SchedulerParams,
     TextGenerationOutput,
     TextGenerationStream,
@@ -105,6 +106,7 @@ async def stream_example():
     "SchedulerParams",
     "CandleType",
     "CandleOrder",
+    "ResponseFormat",
     "TextGenerationOutput",
     "TextGenerationStream",
     "x402SettlementMode",
diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py
index ed54fd99..326a4ba9 100644
--- a/src/opengradient/client/llm.py
+++ b/src/opengradient/client/llm.py
@@ -14,7 +14,7 @@
 from x402.mechanisms.evm.exact.register import register_exact_evm_client
 from x402.mechanisms.evm.upto.register import register_upto_evm_client
 
-from ..types import TEE_LLM, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
+from ..types import TEE_LLM, ResponseFormat, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
 from .opg_token import Permit2ApprovalResult, ensure_opg_approval
 from .tee_connection import RegistryTEEConnection, StaticTEEConnection, TEEConnectionInterface
 from .tee_registry import TEERegistry
@@ -44,6 +44,7 @@ class _ChatParams:
     stop_sequence: Optional[List[str]]
     tools: Optional[List[Dict]]
     tool_choice: Optional[str]
+    response_format: Optional[ResponseFormat]
     x402_settlement_mode: x402SettlementMode
 
 
@@ -152,6 +153,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool
         if params.tools:
             payload["tools"] = params.tools
             payload["tool_choice"] = params.tool_choice or "auto"
+        if params.response_format:
+            payload["response_format"] = params.response_format.to_dict()
         return payload
 
     async def _call_with_tee_retry(
@@ -297,6 +300,7 @@ async def chat(
         temperature: float = 0.0,
         tools: Optional[List[Dict]] = None,
         tool_choice: Optional[str] = None,
+        response_format: Optional[ResponseFormat] = None,
         x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED,
         stream: bool = False,
     ) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]:
@@ -311,6 +315,11 @@ async def chat(
             temperature (float): Temperature for LLM inference, between 0 and 1.
             tools (List[dict], optional): Set of tools for function calling.
             tool_choice (str, optional): Sets a specific tool to choose.
+            response_format (ResponseFormat, optional): Enforce a specific output format.
+                Use ``ResponseFormat(type="json_object")`` for any valid JSON (not supported
+                by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})``
+                to enforce a strict schema (supported by all providers including Anthropic).
+                Defaults to None (plain text).
             x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
                 - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving).
                 - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient).
@@ -324,8 +333,17 @@ async def chat(
                 - If stream=True: Async generator yielding StreamChunk objects
 
         Raises:
+            ValueError: If ``response_format="json_object"`` is used with an Anthropic model.
             RuntimeError: If the inference fails.
         """
+        if response_format is not None and response_format.type == "json_object":
+            provider = model.split("/")[0]
+            if provider == "anthropic":
+                raise ValueError(
+                    "Anthropic models do not support response_format type 'json_object'. "
+                    "Use ResponseFormat(type='json_schema', json_schema={...}) with an explicit schema instead."
+                )
+
         params = _ChatParams(
             model=model.split("/")[1],
             max_tokens=max_tokens,
@@ -333,6 +351,7 @@ async def chat(
             stop_sequence=stop_sequence,
             tools=tools,
             tool_choice=tool_choice,
+            response_format=response_format,
             x402_settlement_mode=x402_settlement_mode,
         )
 
@@ -379,6 +398,7 @@ async def _request() -> TextGenerationOutput:
                 transaction_hash="external",
                 finish_reason=choices[0].get("finish_reason"),
                 chat_output=message,
+                usage=result.get("usage"),
                 tee_signature=result.get("tee_signature"),
                 tee_timestamp=result.get("tee_timestamp"),
                 **tee.metadata(),
diff --git a/src/opengradient/types.py b/src/opengradient/types.py
index a59293fa..035c67db 100644
--- a/src/opengradient/types.py
+++ b/src/opengradient/types.py
@@ -428,6 +428,9 @@ class TextGenerationOutput:
     completion_output: Optional[str] = None
     """Raw text returned by a completion request."""
 
+    usage: Optional[Dict] = None
+    """Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server."""
+
     payment_hash: Optional[str] = None
     """Payment hash for the x402 transaction."""
 
@@ -526,6 +529,71 @@ class TEE_LLM(str, Enum):
     GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning"
 
 
+@dataclass
+class ResponseFormat:
+    """Controls the output format enforced by the TEE gateway.
+
+    Use ``type="json_object"`` to receive any valid JSON object (supported by
+    OpenAI, Gemini, and Grok). Use ``type="json_schema"`` with a ``json_schema``
+    definition to enforce a specific schema (supported by all providers,
+    including Anthropic).
+
+    Attributes:
+        type: One of ``"text"``, ``"json_object"``, or ``"json_schema"``.
+        json_schema: Schema definition (required when ``type="json_schema"``).
+            Must contain ``name`` (str) and ``schema`` (dict).
+            ``strict`` (bool) is optional.
+
+    Raises:
+        ValueError: If ``type`` is not a recognised value, or if
+            ``type="json_schema"`` is used without providing ``json_schema``.
+
+    Examples::
+
+        # Any valid JSON object — OpenAI, Gemini, Grok only
+        ResponseFormat(type="json_object")
+
+        # Strict schema — all providers including Anthropic
+        ResponseFormat(
+            type="json_schema",
+            json_schema={
+                "name": "person",
+                "strict": True,
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "age": {"type": "integer"},
+                    },
+                    "required": ["name", "age"],
+                    "additionalProperties": False,
+                },
+            },
+        )
+    """
+
+    type: str
+    json_schema: Optional[Dict] = None
+
+    def __post_init__(self) -> None:
+        valid_types = ("text", "json_object", "json_schema")
+        if self.type not in valid_types:
+            raise ValueError(
+                f"ResponseFormat.type must be one of {valid_types}, got '{self.type}'"
+            )
+        if self.type == "json_schema" and not self.json_schema:
+            raise ValueError(
+                "ResponseFormat.json_schema is required when type='json_schema'"
+            )
+
+    def to_dict(self) -> Dict:
+        """Serialise to a JSON-compatible dict for the TEE gateway request payload."""
+        d: Dict = {"type": self.type}
+        if self.json_schema is not None:
+            d["json_schema"] = self.json_schema
+        return d
+
+
 @dataclass
 class SchedulerParams:
     frequency: int
diff --git a/uv.lock b/uv.lock
index 6d0327a6..3ab6b1b6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.11"
 resolution-markers = [
     "python_full_version >= '3.14'",
@@ -1867,7 +1867,7 @@ wheels = [
 
 [[package]]
 name = "opengradient"
-version = "0.9.3"
+version = "0.9.5"
 source = { editable = "." }
 dependencies = [
     { name = "click" },

From f81b2c0e9a7da4c4536769b7a70d14b18a611ecb Mon Sep 17 00:00:00 2001
From: kylexqian <kylexqian@gmail.com>
Date: Wed, 8 Apr 2026 17:10:28 -0700
Subject: [PATCH 2/2] Remove deprecated gemini-3-pro-preview from TEE_LLM, note
 Gemini 2.5 deprecations

gemini-3-pro-preview was shut down on March 9, 2026 and was already absent
from the TEE gateway model registry. Removes it from the SDK enum to keep
both in sync. Also adds comments on the Gemini 2.5 models (flash, pro,
flash-lite) noting their scheduled June/July 2026 deprecation dates and
their respective replacements.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/opengradient/types.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/opengradient/types.py b/src/opengradient/types.py
index 035c67db..569f7d8c 100644
--- a/src/opengradient/types.py
+++ b/src/opengradient/types.py
@@ -516,10 +516,12 @@ class TEE_LLM(str, Enum):
     CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6"
 
     # Google models via TEE
+    # Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are scheduled
+    # for deprecation on June 17, 2026 (flash-lite: July 22, 2026). Replacements will be
+    # gemini-3-flash-preview, gemini-3.1-pro-preview, and gemini-3.1-flash-lite-preview.
     GEMINI_2_5_FLASH = "google/gemini-2.5-flash"
     GEMINI_2_5_PRO = "google/gemini-2.5-pro"
     GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite"
-    GEMINI_3_PRO = "google/gemini-3-pro-preview"
     GEMINI_3_FLASH = "google/gemini-3-flash-preview"
 
     # xAI Grok models via TEE