From c7df2a33a8742e5544bb210020d6547094e6fd8a Mon Sep 17 00:00:00 2001
From: Season <season.saw@gmail.com>
Date: Thu, 11 Jun 2026 22:56:41 +0800
Subject: [PATCH] feat(ai): emit full usage on otel spans (cost, totals,
 cache/reasoning details)

otelMiddleware only emitted gen_ai.usage.input_tokens/output_tokens even
though TokenUsage already carries provider-reported cost, total tokens,
cache/reasoning breakdowns, and duration-based billing. Backends like
PostHog had to re-derive cost from their own price tables, losing cache
discounts and gateway markup (OpenRouter), and duration-billed activities
had no cost signal at all.

A shared usageAttributes() helper now builds the full guarded attribute
set at all three emission sites (RUN_FINISHED chunk, onUsage, onFinish
rollup):

- gen_ai.usage.total_tokens / gen_ai.usage.cost (de-facto extensions
  consumed directly by PostHog and LiteLLM-style backends)
- gen_ai.usage.cache_read.input_tokens, cache_creation.input_tokens,
  reasoning.output_tokens (official GenAI semconv names)
- tanstack.ai.usage.duration_seconds and the upstream cost split
  (no semconv equivalent exists)

E2E: new /api/otel-usage route drives the existing openai-usage-details
and openrouter-cost aimock mounts through otelMiddleware with a local
capture tracer; middleware.spec.ts asserts the attributes land on
iteration and root spans.

Fixes #721
---
 .changeset/otel-full-usage-emission.md     |   5 +
 docs/advanced/otel.md                      |  11 ++
 docs/config.json                           |   3 +-
 packages/ai/src/middlewares/otel.ts        |  69 +++++++--
 packages/ai/tests/middlewares/otel.test.ts | 145 ++++++++++++++++++
 testing/e2e/src/routeTree.gen.ts           |  21 +++
 testing/e2e/src/routes/api.otel-usage.ts   | 167 +++++++++++++++++++++
 testing/e2e/tests/middleware.spec.ts       |  71 +++++++++
 8 files changed, 479 insertions(+), 13 deletions(-)
 create mode 100644 .changeset/otel-full-usage-emission.md
 create mode 100644 testing/e2e/src/routes/api.otel-usage.ts

diff --git a/.changeset/otel-full-usage-emission.md b/.changeset/otel-full-usage-emission.md
new file mode 100644
index 000000000..bda815e31
--- /dev/null
+++ b/.changeset/otel-full-usage-emission.md
@@ -0,0 +1,5 @@
+---
+'@tanstack/ai': minor
+---
+
+`otelMiddleware` now emits the rest of the reported `TokenUsage` on spans instead of only input/output tokens (#721). When the provider reports them, spans carry `gen_ai.usage.total_tokens`, `gen_ai.usage.cost` (provider-reported cost — cache discounts and gateway markup included, so backends like PostHog no longer re-derive cost from price tables), the official semconv cache/reasoning breakdowns (`gen_ai.usage.cache_read.input_tokens`, `gen_ai.usage.cache_creation.input_tokens`, `gen_ai.usage.reasoning.output_tokens`), and TanStack-namespaced attributes for duration-based billing (`tanstack.ai.usage.duration_seconds`) and the upstream cost split (`tanstack.ai.usage.upstream_cost` / `upstream_input_cost` / `upstream_output_cost`). All attributes are guarded — spans stay unchanged when a provider doesn't report a field. Media-oriented fields (`unitsBilled`, per-modality token breakdowns) and the provider-shaped `providerUsageDetails` bag are intentionally not emitted; media-activity observability is tracked in #720.
diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md
index cf5f3ac2c..03af9daa8 100644
--- a/docs/advanced/otel.md
+++ b/docs/advanced/otel.md
@@ -72,6 +72,15 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam
 | iteration | `gen_ai.request.max_tokens` | from config |
 | iteration | `gen_ai.usage.input_tokens` | per iteration |
 | iteration | `gen_ai.usage.output_tokens` | per iteration |
+| root / iteration | `gen_ai.usage.total_tokens` | provider-reported total |
+| root / iteration | `gen_ai.usage.cost` | provider-reported cost, when available |
+| root / iteration | `gen_ai.usage.cache_read.input_tokens` | cached prompt tokens, when reported |
+| root / iteration | `gen_ai.usage.cache_creation.input_tokens` | cache-write prompt tokens, when reported |
+| root / iteration | `gen_ai.usage.reasoning.output_tokens` | reasoning/thinking tokens, when reported |
+| root / iteration | `tanstack.ai.usage.duration_seconds` | duration-based billing (e.g. transcription), when reported |
+| root / iteration | `tanstack.ai.usage.upstream_cost` | gateway upstream cost (e.g. OpenRouter), when reported |
+| root / iteration | `tanstack.ai.usage.upstream_input_cost` | upstream input cost split, when reported |
+| root / iteration | `tanstack.ai.usage.upstream_output_cost` | upstream output cost split, when reported |
 | iteration | `gen_ai.response.finish_reasons` | `[stop]`, `[tool_calls]`, ... |
 | root | `gen_ai.usage.input_tokens` | rolled up |
 | root | `gen_ai.usage.output_tokens` | rolled up |
@@ -81,6 +90,8 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam
 | tool | `gen_ai.tool.type` | `function` |
 | tool | `tanstack.ai.tool.outcome` | `success` / `error` |
 
+Usage attributes beyond input/output tokens are emitted only when the provider reports them, so spans stay clean otherwise. Cache and reasoning breakdowns use the official GenAI semconv names; `gen_ai.usage.cost` and `gen_ai.usage.total_tokens` are de-facto extensions consumed directly by backends like PostHog — without them, backends re-derive cost from their own price tables and lose cache discounts and gateway markup. Fields with no established convention (duration-based billing, the upstream cost split) are TanStack-namespaced.
+
 ### Metrics
 
 Two GenAI-standard histograms:
diff --git a/docs/config.json b/docs/config.json
index e3fc3b712..180fa7905 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -280,7 +280,8 @@
         {
           "label": "OpenTelemetry",
           "to": "advanced/otel",
-          "addedAt": "2026-05-08"
+          "addedAt": "2026-05-08",
+          "updatedAt": "2026-06-11"
         }
       ]
     },
diff --git a/packages/ai/src/middlewares/otel.ts b/packages/ai/src/middlewares/otel.ts
index c953e3f2f..5d789bb40 100644
--- a/packages/ai/src/middlewares/otel.ts
+++ b/packages/ai/src/middlewares/otel.ts
@@ -20,6 +20,7 @@ import type {
   ChatMiddleware,
   ChatMiddlewareContext,
 } from '../activities/chat/middleware/types'
+import type { TokenUsage } from '../types'
 
 /**
  * Scope (role) of an OTel span emitted by this middleware.
@@ -179,6 +180,59 @@ function firstNumber(...candidates: Array<unknown>): number | undefined {
   return undefined
 }
 
+/**
+ * Build the full set of `gen_ai.usage.*` span attributes from a `TokenUsage`.
+ *
+ * Beyond input/output tokens, this emits provider-reported cost, total tokens,
+ * cache and reasoning breakdowns, and duration-based billing — every field is
+ * guarded so spans stay clean when a provider doesn't report it. Cache and
+ * reasoning use the official GenAI semconv names; `gen_ai.usage.cost` and
+ * `gen_ai.usage.total_tokens` are de-facto extensions consumed by backends
+ * like PostHog (which otherwise re-derive cost from their own price tables,
+ * losing cache discounts and gateway markup). Fields with no semconv or
+ * de-facto convention (`costDetails`, `durationSeconds`) are
+ * TanStack-namespaced. Deliberately not emitted: `unitsBilled`,
+ * `providerUsageDetails`, and the per-modality token breakdowns — those are
+ * media-oriented; media-activity observability is tracked in #720.
+ */
+function usageAttributes(usage: TokenUsage): Record<string, AttributeValue> {
+  const attrs: Record<string, AttributeValue> = {
+    'gen_ai.usage.input_tokens': usage.promptTokens,
+    'gen_ai.usage.output_tokens': usage.completionTokens,
+  }
+  const optional: Array<[key: string, value: unknown]> = [
+    ['gen_ai.usage.total_tokens', usage.totalTokens],
+    ['gen_ai.usage.cost', usage.cost],
+    [
+      'gen_ai.usage.cache_read.input_tokens',
+      usage.promptTokensDetails?.cachedTokens,
+    ],
+    [
+      'gen_ai.usage.cache_creation.input_tokens',
+      usage.promptTokensDetails?.cacheWriteTokens,
+    ],
+    [
+      'gen_ai.usage.reasoning.output_tokens',
+      usage.completionTokensDetails?.reasoningTokens,
+    ],
+    ['tanstack.ai.usage.duration_seconds', usage.durationSeconds],
+    ['tanstack.ai.usage.upstream_cost', usage.costDetails?.upstreamCost],
+    [
+      'tanstack.ai.usage.upstream_input_cost',
+      usage.costDetails?.upstreamInputCost,
+    ],
+    [
+      'tanstack.ai.usage.upstream_output_cost',
+      usage.costDetails?.upstreamOutputCost,
+    ],
+  ]
+  for (const [key, value] of optional) {
+    const num = firstNumber(value)
+    if (num !== undefined) attrs[key] = num
+  }
+  return attrs
+}
+
 function errorMessage(err: unknown): string | undefined {
   if (err instanceof Error) return err.message
   if (typeof err === 'string') return err
@@ -524,10 +578,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         // `runOnUsage` when `chunk.usage` is present, and `onUsage` is the
         // canonical place for the metric. Recording in both would double-count.
         if (chunk.usage) {
-          span.setAttributes({
-            'gen_ai.usage.input_tokens': chunk.usage.promptTokens,
-            'gen_ai.usage.output_tokens': chunk.usage.completionTokens,
-          })
+          span.setAttributes(usageAttributes(chunk.usage))
         }
 
         if (captureContent && state.assistantTextBuffer.length > 0) {
@@ -584,10 +635,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         }
 
         const span = state.currentIterationSpan ?? state.rootSpan
-        span.setAttributes({
-          'gen_ai.usage.input_tokens': usage.promptTokens,
-          'gen_ai.usage.output_tokens': usage.completionTokens,
-        })
+        span.setAttributes(usageAttributes(usage))
       })
     },
 
@@ -905,10 +953,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         }
 
         if (info.usage) {
-          state.rootSpan.setAttributes({
-            'gen_ai.usage.input_tokens': info.usage.promptTokens,
-            'gen_ai.usage.output_tokens': info.usage.completionTokens,
-          })
+          state.rootSpan.setAttributes(usageAttributes(info.usage))
         }
         if (info.finishReason) {
           state.rootSpan.setAttribute('gen_ai.response.finish_reasons', [
diff --git a/packages/ai/tests/middlewares/otel.test.ts b/packages/ai/tests/middlewares/otel.test.ts
index 95bc3d6bf..68f5ec56c 100644
--- a/packages/ai/tests/middlewares/otel.test.ts
+++ b/packages/ai/tests/middlewares/otel.test.ts
@@ -307,6 +307,151 @@ describe('otelMiddleware — duration histogram and rollup', () => {
   })
 })
 
+describe('otelMiddleware — full usage emission', () => {
+  // Everything `TokenUsage` carries beyond input/output tokens: cost,
+  // totals, cache/reasoning breakdowns, duration-based billing, and the
+  // upstream cost split. Backends like PostHog consume `gen_ai.usage.cost`
+  // directly; without it they re-derive cost from their own price tables
+  // and lose cache discounts / gateway markup (OpenRouter).
+  const fullUsage = {
+    promptTokens: 100,
+    completionTokens: 50,
+    totalTokens: 165,
+    promptTokensDetails: { cachedTokens: 80, cacheWriteTokens: 10 },
+    completionTokensDetails: { reasoningTokens: 15 },
+    durationSeconds: 2.5,
+    cost: 0.0123,
+    costDetails: {
+      upstreamCost: 0.01,
+      upstreamInputCost: 0.004,
+      upstreamOutputCost: 0.006,
+    },
+  }
+
+  const expectFullUsageAttrs = (span: FakeSpan) => {
+    expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100)
+    expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50)
+    expect(span.attributes['gen_ai.usage.total_tokens']).toBe(165)
+    expect(span.attributes['gen_ai.usage.cost']).toBe(0.0123)
+    expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(80)
+    expect(span.attributes['gen_ai.usage.cache_creation.input_tokens']).toBe(
+      10,
+    )
+    expect(span.attributes['gen_ai.usage.reasoning.output_tokens']).toBe(15)
+    expect(span.attributes['tanstack.ai.usage.duration_seconds']).toBe(2.5)
+    expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBe(0.01)
+    expect(span.attributes['tanstack.ai.usage.upstream_input_cost']).toBe(
+      0.004,
+    )
+    expect(span.attributes['tanstack.ai.usage.upstream_output_cost']).toBe(
+      0.006,
+    )
+  }
+
+  it('emits cost, totals, and detail breakdowns from RUN_FINISHED chunk.usage', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onChunk?.(ctx, {
+      ...ev.runFinished('stop'),
+      model: 'gpt-4o',
+      usage: fullUsage,
+    })
+
+    expectFullUsageAttrs(spans[1]!)
+  })
+
+  it('emits cost, totals, and detail breakdowns from onUsage', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, fullUsage)
+
+    expectFullUsageAttrs(spans[1]!)
+  })
+
+  it('rolls up cost, totals, and detail breakdowns onto the root span on onFinish', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' })
+    await mw.onFinish?.(ctx, {
+      finishReason: 'stop',
+      duration: 1250,
+      content: '',
+      usage: fullUsage,
+    })
+
+    expectFullUsageAttrs(spans[0]!)
+  })
+
+  it('omits optional usage attributes when the provider does not report them', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, {
+      promptTokens: 100,
+      completionTokens: 50,
+      totalTokens: 150,
+    })
+
+    const span = spans[1]!
+    expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100)
+    expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50)
+    expect(span.attributes['gen_ai.usage.total_tokens']).toBe(150)
+    expect(span.attributes['gen_ai.usage.cost']).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.cache_read.input_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.cache_creation.input_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.reasoning.output_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.duration_seconds'],
+    ).toBeUndefined()
+    expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.upstream_input_cost'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.upstream_output_cost'],
+    ).toBeUndefined()
+  })
+
+  it('emits zero-valued usage fields instead of dropping them', async () => {
+    // cost 0 is a real report (OpenRouter free models), and the OpenRouter
+    // extractor deliberately preserves it. Pin that the presence guard is
+    // `!== undefined`, not truthiness — a truthy guard would drop zeros.
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, {
+      promptTokens: 100,
+      completionTokens: 50,
+      totalTokens: 150,
+      cost: 0,
+      promptTokensDetails: { cachedTokens: 0 },
+    })
+
+    const span = spans[1]!
+    expect(span.attributes['gen_ai.usage.cost']).toBe(0)
+    expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(0)
+  })
+})
+
 describe('otelMiddleware — tool spans', () => {
   it('creates a tool span as child of the iteration span (including after RUN_FINISHED)', async () => {
     const { tracer, spans } = createFakeTracer()
diff --git a/testing/e2e/src/routeTree.gen.ts b/testing/e2e/src/routeTree.gen.ts
index 23f0cc4ff..8b3dd03fe 100644
--- a/testing/e2e/src/routeTree.gen.ts
+++ b/testing/e2e/src/routeTree.gen.ts
@@ -26,6 +26,7 @@ import { Route as ApiTranscriptionRouteImport } from './routes/api.transcription
 import { Route as ApiToolsTestRouteImport } from './routes/api.tools-test'
 import { Route as ApiToolCallLifecycleWireRouteImport } from './routes/api.tool-call-lifecycle-wire'
 import { Route as ApiSummarizeRouteImport } from './routes/api.summarize'
+import { Route as ApiOtelUsageRouteImport } from './routes/api.otel-usage'
 import { Route as ApiOpenrouterWebToolsWireRouteImport } from './routes/api.openrouter-web-tools-wire'
 import { Route as ApiOpenrouterCostRouteImport } from './routes/api.openrouter-cost'
 import { Route as ApiOpenaiUsageDetailsRouteImport } from './routes/api.openai-usage-details'
@@ -136,6 +137,11 @@ const ApiSummarizeRoute = ApiSummarizeRouteImport.update({
   path: '/api/summarize',
   getParentRoute: () => rootRouteImport,
 } as any)
+const ApiOtelUsageRoute = ApiOtelUsageRouteImport.update({
+  id: '/api/otel-usage',
+  path: '/api/otel-usage',
+  getParentRoute: () => rootRouteImport,
+} as any)
 const ApiOpenrouterWebToolsWireRoute =
   ApiOpenrouterWebToolsWireRouteImport.update({
     id: '/api/openrouter-web-tools-wire',
@@ -284,6 +290,7 @@ export interface FileRoutesByFullPath {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -326,6 +333,7 @@ export interface FileRoutesByTo {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -369,6 +377,7 @@ export interface FileRoutesById {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -413,6 +422,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -455,6 +465,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -497,6 +508,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -540,6 +552,7 @@ export interface RootRouteChildren {
   ApiOpenaiUsageDetailsRoute: typeof ApiOpenaiUsageDetailsRoute
   ApiOpenrouterCostRoute: typeof ApiOpenrouterCostRoute
   ApiOpenrouterWebToolsWireRoute: typeof ApiOpenrouterWebToolsWireRoute
+  ApiOtelUsageRoute: typeof ApiOtelUsageRoute
   ApiSummarizeRoute: typeof ApiSummarizeRoute
   ApiToolCallLifecycleWireRoute: typeof ApiToolCallLifecycleWireRoute
   ApiToolsTestRoute: typeof ApiToolsTestRoute
@@ -670,6 +683,13 @@ declare module '@tanstack/react-router' {
       preLoaderRoute: typeof ApiSummarizeRouteImport
       parentRoute: typeof rootRouteImport
     }
+    '/api/otel-usage': {
+      id: '/api/otel-usage'
+      path: '/api/otel-usage'
+      fullPath: '/api/otel-usage'
+      preLoaderRoute: typeof ApiOtelUsageRouteImport
+      parentRoute: typeof rootRouteImport
+    }
     '/api/openrouter-web-tools-wire': {
       id: '/api/openrouter-web-tools-wire'
       path: '/api/openrouter-web-tools-wire'
@@ -921,6 +941,7 @@ const rootRouteChildren: RootRouteChildren = {
   ApiOpenaiUsageDetailsRoute: ApiOpenaiUsageDetailsRoute,
   ApiOpenrouterCostRoute: ApiOpenrouterCostRoute,
   ApiOpenrouterWebToolsWireRoute: ApiOpenrouterWebToolsWireRoute,
+  ApiOtelUsageRoute: ApiOtelUsageRoute,
   ApiSummarizeRoute: ApiSummarizeRoute,
   ApiToolCallLifecycleWireRoute: ApiToolCallLifecycleWireRoute,
   ApiToolsTestRoute: ApiToolsTestRoute,
diff --git a/testing/e2e/src/routes/api.otel-usage.ts b/testing/e2e/src/routes/api.otel-usage.ts
new file mode 100644
index 000000000..0171fdd2b
--- /dev/null
+++ b/testing/e2e/src/routes/api.otel-usage.ts
@@ -0,0 +1,167 @@
+import { createFileRoute } from '@tanstack/react-router'
+import { chat, createChatOptions } from '@tanstack/ai'
+import { otelMiddleware } from '@tanstack/ai/middlewares/otel'
+import { createOpenaiChatCompletions } from '@tanstack/ai-openai'
+import { createOpenRouterText } from '@tanstack/ai-openrouter'
+import type {
+  AttributeValue,
+  Context,
+  Span,
+  SpanContext,
+  Tracer,
+} from '@opentelemetry/api'
+
+const LLMOCK_DEFAULT_BASE = process.env.LLMOCK_URL || 'http://127.0.0.1:4010'
+const DUMMY_KEY = 'sk-e2e-test-dummy-key'
+
+interface CapturedSpan {
+  name: string
+  kind?: number
+  attributes: Record<string, AttributeValue>
+  ended: boolean
+}
+
+/**
+ * Single-request in-memory tracer. Unlike the per-testId capture in
+ * `api.middleware-test.ts`, everything here happens inside one POST, so spans
+ * collect into a local array returned directly in the response body.
+ */
+function createLocalCaptureTracer(): {
+  tracer: Tracer
+  spans: Array<CapturedSpan>
+} {
+  const spans: Array<CapturedSpan> = []
+  let spanSeq = 0
+  const tracer: Tracer = {
+    startSpan(name, options = {}, _ctx?: Context): Span {
+      const id = `span-${spanSeq++}`
+      const attributes: Record<string, AttributeValue> = {}
+      for (const [k, v] of Object.entries(options.attributes ?? {})) {
+        if (v !== undefined) attributes[k] = v as AttributeValue
+      }
+      const captured: CapturedSpan = {
+        name,
+        kind: options.kind,
+        attributes,
+        ended: false,
+      }
+      spans.push(captured)
+      const span: Span = {
+        spanContext(): SpanContext {
+          return { traceId: 'otel-usage-trace', spanId: id, traceFlags: 1 }
+        },
+        setAttribute(key, value) {
+          captured.attributes[key] = value as AttributeValue
+          return span
+        },
+        setAttributes(next) {
+          for (const [k, v] of Object.entries(next)) {
+            captured.attributes[k] = v as AttributeValue
+          }
+          return span
+        },
+        addEvent() {
+          return span
+        },
+        addLink() {
+          return span
+        },
+        addLinks() {
+          return span
+        },
+        setStatus() {
+          return span
+        },
+        updateName(next) {
+          captured.name = next
+          return span
+        },
+        end() {
+          captured.ended = true
+        },
+        isRecording() {
+          return !captured.ended
+        },
+        recordException() {},
+      }
+      return span
+    },
+    // Minimal implementation — otelMiddleware never calls startActiveSpan.
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    startActiveSpan(...args: Array<any>) {
+      const fn = args[args.length - 1] as (span: Span) => unknown
+      const name = args[0] as string
+      const span = tracer.startSpan(name, {})
+      try {
+        return fn(span)
+      } finally {
+        span.end()
+      }
+    },
+  }
+  return { tracer, spans }
+}
+
+/**
+ * Drives a chat adapter with `otelMiddleware` against the existing
+ * hand-crafted aimock mounts that report rich usage, and returns the captured
+ * spans. Companion E2E proof for full-usage span emission (#721):
+ *
+ * - `provider: 'openai'` → `/openai-usage-details` mount, whose trailing usage
+ *   chunk carries `total_tokens`, `prompt_tokens_details.cached_tokens`, and
+ *   `completion_tokens_details.reasoning_tokens`.
+ * - `provider: 'openrouter'` → `/openrouter-cost` mount, whose trailing usage
+ *   chunk carries `cost` / `cost_details`.
+ *
+ * The spec asserts the corresponding `gen_ai.usage.*` / `tanstack.ai.usage.*`
+ * attributes land on the iteration and root spans.
+ */
+export const Route = createFileRoute('/api/otel-usage')({
+  server: {
+    handlers: {
+      POST: async ({ request }) => {
+        let provider = 'openai'
+        try {
+          const body = (await request.json()) as { provider?: string }
+          if (typeof body.provider === 'string') provider = body.provider
+        } catch {
+          // No/invalid body — default provider.
+        }
+
+        const adapter =
+          provider === 'openrouter'
+            ? createOpenRouterText('openai/gpt-4o' as never, DUMMY_KEY, {
+                serverURL: `${LLMOCK_DEFAULT_BASE}/openrouter-cost/v1`,
+              })
+            : createOpenaiChatCompletions('gpt-4o', DUMMY_KEY, {
+                baseURL: `${LLMOCK_DEFAULT_BASE}/openai-usage-details/v1`,
+              })
+
+        const { tracer, spans } = createLocalCaptureTracer()
+
+        try {
+          for await (const _chunk of chat({
+            ...createChatOptions({ adapter }),
+            messages: [{ role: 'user', content: 'hi' }],
+            middleware: [otelMiddleware({ tracer })],
+          })) {
+            // Drain — the assertions live on the captured spans.
+          }
+        } catch (error) {
+          return new Response(
+            JSON.stringify({
+              ok: false,
+              error: error instanceof Error ? error.message : String(error),
+            }),
+            { status: 200, headers: { 'Content-Type': 'application/json' } },
+          )
+        }
+
+        return new Response(JSON.stringify({ ok: true, spans }), {
+          status: 200,
+          headers: { 'Content-Type': 'application/json' },
+        })
+      },
+    },
+  },
+})
diff --git a/testing/e2e/tests/middleware.spec.ts b/testing/e2e/tests/middleware.spec.ts
index e039d78ab..ccfbc19a5 100644
--- a/testing/e2e/tests/middleware.spec.ts
+++ b/testing/e2e/tests/middleware.spec.ts
@@ -193,6 +193,77 @@ test.describe('Middleware Lifecycle', () => {
     }
   })
 
+  test('otel middleware emits total/cache/reasoning usage details on spans', async ({
+    request,
+  }) => {
+    // `/api/otel-usage` drives the OpenAI adapter against the
+    // `/openai-usage-details` aimock mount (total_tokens + cached_tokens +
+    // reasoning_tokens) with otelMiddleware attached, and returns the
+    // captured spans. End-to-end proof for #721: the full TokenUsage reaches
+    // span attributes, not just input/output tokens.
+    const res = await request.post('/api/otel-usage', {
+      data: { provider: 'openai' },
+    })
+    expect(res.ok()).toBe(true)
+    const { ok, error, spans } = await res.json()
+    expect(error ?? null).toBeNull()
+    expect(ok).toBe(true)
+
+    const iterationSpans = spans.filter(
+      (s: any) => s.kind === SpanKind.CLIENT && s.ended,
+    )
+    expect(iterationSpans.length).toBeGreaterThanOrEqual(1)
+    expect(iterationSpans[0].attributes).toMatchObject({
+      'gen_ai.usage.input_tokens': 100,
+      'gen_ai.usage.output_tokens': 50,
+      'gen_ai.usage.total_tokens': 150,
+      'gen_ai.usage.cache_read.input_tokens': 80,
+      'gen_ai.usage.reasoning.output_tokens': 30,
+    })
+
+    // The root span rolls up the final usage on onFinish.
+    const rootSpans = spans.filter((s: any) => s.kind === SpanKind.INTERNAL)
+    expect(rootSpans).toHaveLength(1)
+    expect(rootSpans[0].attributes).toMatchObject({
+      'gen_ai.usage.total_tokens': 150,
+      'gen_ai.usage.cache_read.input_tokens': 80,
+      'gen_ai.usage.reasoning.output_tokens': 30,
+    })
+  })
+
+  test('otel middleware emits provider-reported cost on spans', async ({
+    request,
+  }) => {
+    // OpenRouter adapter against the `/openrouter-cost` mount, whose trailing
+    // usage chunk carries `cost` / `cost_details`. Backends like PostHog read
+    // `gen_ai.usage.cost` directly instead of re-deriving from price tables.
+    const res = await request.post('/api/otel-usage', {
+      data: { provider: 'openrouter' },
+    })
+    expect(res.ok()).toBe(true)
+    const { ok, error, spans } = await res.json()
+    expect(error ?? null).toBeNull()
+    expect(ok).toBe(true)
+
+    const iterationSpans = spans.filter(
+      (s: any) => s.kind === SpanKind.CLIENT && s.ended,
+    )
+    expect(iterationSpans.length).toBeGreaterThanOrEqual(1)
+    expect(iterationSpans[0].attributes).toMatchObject({
+      'gen_ai.usage.input_tokens': 11,
+      'gen_ai.usage.output_tokens': 3,
+      'gen_ai.usage.total_tokens': 14,
+      'gen_ai.usage.cost': 0.0042,
+      'tanstack.ai.usage.upstream_cost': 0.0038,
+      'tanstack.ai.usage.upstream_input_cost': 0.0012,
+      'tanstack.ai.usage.upstream_output_cost': 0.0026,
+    })
+
+    const rootSpans = spans.filter((s: any) => s.kind === SpanKind.INTERNAL)
+    expect(rootSpans).toHaveLength(1)
+    expect(rootSpans[0].attributes['gen_ai.usage.cost']).toBe(0.0042)
+  })
+
   test('no middleware passes content through unchanged', async ({
     page,
     testId,