TanStack · season179 · Jun 11, 2026
diff --git a/.changeset/otel-full-usage-emission.md b/.changeset/otel-full-usage-emission.md
@@ -0,0 +1,5 @@
+---
+'@tanstack/ai': minor
+---
+
+`otelMiddleware` now emits the rest of the reported `TokenUsage` on spans instead of only input/output tokens (#721). When the provider reports them, spans carry `gen_ai.usage.total_tokens`, `gen_ai.usage.cost` (provider-reported cost — cache discounts and gateway markup included, so backends like PostHog no longer re-derive cost from price tables), the official semconv cache/reasoning breakdowns (`gen_ai.usage.cache_read.input_tokens`, `gen_ai.usage.cache_creation.input_tokens`, `gen_ai.usage.reasoning.output_tokens`), and TanStack-namespaced attributes for duration-based billing (`tanstack.ai.usage.duration_seconds`) and the upstream cost split (`tanstack.ai.usage.upstream_cost` / `upstream_input_cost` / `upstream_output_cost`). All attributes are guarded — spans stay unchanged when a provider doesn't report a field. Media-oriented fields (`unitsBilled`, per-modality token breakdowns) and the provider-shaped `providerUsageDetails` bag are intentionally not emitted; media-activity observability is tracked in #720.
diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md
@@ -72,6 +72,15 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam
 | iteration | `gen_ai.request.max_tokens` | from config |
 | iteration | `gen_ai.usage.input_tokens` | per iteration |
 | iteration | `gen_ai.usage.output_tokens` | per iteration |
+| root / iteration | `gen_ai.usage.total_tokens` | provider-reported total |
+| root / iteration | `gen_ai.usage.cost` | provider-reported cost, when available |
+| root / iteration | `gen_ai.usage.cache_read.input_tokens` | cached prompt tokens, when reported |
+| root / iteration | `gen_ai.usage.cache_creation.input_tokens` | cache-write prompt tokens, when reported |
+| root / iteration | `gen_ai.usage.reasoning.output_tokens` | reasoning/thinking tokens, when reported |
+| root / iteration | `tanstack.ai.usage.duration_seconds` | duration-based billing (e.g. transcription), when reported |
+| root / iteration | `tanstack.ai.usage.upstream_cost` | gateway upstream cost (e.g. OpenRouter), when reported |
+| root / iteration | `tanstack.ai.usage.upstream_input_cost` | upstream input cost split, when reported |
+| root / iteration | `tanstack.ai.usage.upstream_output_cost` | upstream output cost split, when reported |
 | iteration | `gen_ai.response.finish_reasons` | `[stop]`, `[tool_calls]`, ... |
 | root | `gen_ai.usage.input_tokens` | rolled up |
 | root | `gen_ai.usage.output_tokens` | rolled up |
@@ -81,6 +90,8 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam
 | tool | `gen_ai.tool.type` | `function` |
 | tool | `tanstack.ai.tool.outcome` | `success` / `error` |
 
+Usage attributes beyond input/output tokens are emitted only when the provider reports them, so spans stay clean otherwise. Cache and reasoning breakdowns use the official GenAI semconv names; `gen_ai.usage.cost` and `gen_ai.usage.total_tokens` are de-facto extensions consumed directly by backends like PostHog — without them, backends re-derive cost from their own price tables and lose cache discounts and gateway markup. Fields with no established convention (duration-based billing, the upstream cost split) are TanStack-namespaced.
+
 ### Metrics
 
 Two GenAI-standard histograms:

diff --git a/docs/config.json b/docs/config.json
@@ -280,7 +280,8 @@
         {
           "label": "OpenTelemetry",
           "to": "advanced/otel",
-          "addedAt": "2026-05-08"
+          "addedAt": "2026-05-08",
+          "updatedAt": "2026-06-11"
         }
       ]
     },

diff --git a/packages/ai/src/middlewares/otel.ts b/packages/ai/src/middlewares/otel.ts
@@ -20,6 +20,7 @@ import type {
   ChatMiddleware,
   ChatMiddlewareContext,
 } from '../activities/chat/middleware/types'
+import type { TokenUsage } from '../types'
 
 /**
  * Scope (role) of an OTel span emitted by this middleware.
@@ -179,6 +180,59 @@ function firstNumber(...candidates: Array<unknown>): number | undefined {
   return undefined
 }
 
+/**
+ * Build the full set of `gen_ai.usage.*` span attributes from a `TokenUsage`.
+ *
+ * Beyond input/output tokens, this emits provider-reported cost, total tokens,
+ * cache and reasoning breakdowns, and duration-based billing — every field is
+ * guarded so spans stay clean when a provider doesn't report it. Cache and
+ * reasoning use the official GenAI semconv names; `gen_ai.usage.cost` and
+ * `gen_ai.usage.total_tokens` are de-facto extensions consumed by backends
+ * like PostHog (which otherwise re-derive cost from their own price tables,
+ * losing cache discounts and gateway markup). Fields with no semconv or
+ * de-facto convention (`costDetails`, `durationSeconds`) are
+ * TanStack-namespaced. Deliberately not emitted: `unitsBilled`,
+ * `providerUsageDetails`, and the per-modality token breakdowns — those are
+ * media-oriented; media-activity observability is tracked in #720.
+ */
+function usageAttributes(usage: TokenUsage): Record<string, AttributeValue> {
+  const attrs: Record<string, AttributeValue> = {
+    'gen_ai.usage.input_tokens': usage.promptTokens,
+    'gen_ai.usage.output_tokens': usage.completionTokens,
+  }
+  const optional: Array<[key: string, value: unknown]> = [
+    ['gen_ai.usage.total_tokens', usage.totalTokens],
+    ['gen_ai.usage.cost', usage.cost],
+    [
+      'gen_ai.usage.cache_read.input_tokens',
+      usage.promptTokensDetails?.cachedTokens,
+    ],
+    [
+      'gen_ai.usage.cache_creation.input_tokens',
+      usage.promptTokensDetails?.cacheWriteTokens,
+    ],
+    [
+      'gen_ai.usage.reasoning.output_tokens',
+      usage.completionTokensDetails?.reasoningTokens,
+    ],
+    ['tanstack.ai.usage.duration_seconds', usage.durationSeconds],
+    ['tanstack.ai.usage.upstream_cost', usage.costDetails?.upstreamCost],
+    [
+      'tanstack.ai.usage.upstream_input_cost',
+      usage.costDetails?.upstreamInputCost,
+    ],
+    [
+      'tanstack.ai.usage.upstream_output_cost',
+      usage.costDetails?.upstreamOutputCost,
+    ],
+  ]
+  for (const [key, value] of optional) {
+    const num = firstNumber(value)
+    if (num !== undefined) attrs[key] = num
+  }
+  return attrs
+}
+
 function errorMessage(err: unknown): string | undefined {
   if (err instanceof Error) return err.message
   if (typeof err === 'string') return err
@@ -524,10 +578,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         // `runOnUsage` when `chunk.usage` is present, and `onUsage` is the
         // canonical place for the metric. Recording in both would double-count.
         if (chunk.usage) {
-          span.setAttributes({
-            'gen_ai.usage.input_tokens': chunk.usage.promptTokens,
-            'gen_ai.usage.output_tokens': chunk.usage.completionTokens,
-          })
+          span.setAttributes(usageAttributes(chunk.usage))
         }
 
         if (captureContent && state.assistantTextBuffer.length > 0) {
@@ -584,10 +635,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         }
 
         const span = state.currentIterationSpan ?? state.rootSpan
-        span.setAttributes({
-          'gen_ai.usage.input_tokens': usage.promptTokens,
-          'gen_ai.usage.output_tokens': usage.completionTokens,
-        })
+        span.setAttributes(usageAttributes(usage))
       })
     },
 
@@ -905,10 +953,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware {
         }
 
         if (info.usage) {
-          state.rootSpan.setAttributes({
-            'gen_ai.usage.input_tokens': info.usage.promptTokens,
-            'gen_ai.usage.output_tokens': info.usage.completionTokens,
-          })
+          state.rootSpan.setAttributes(usageAttributes(info.usage))
         }
         if (info.finishReason) {
           state.rootSpan.setAttribute('gen_ai.response.finish_reasons', [

diff --git a/packages/ai/tests/middlewares/otel.test.ts b/packages/ai/tests/middlewares/otel.test.ts
@@ -307,6 +307,151 @@ describe('otelMiddleware — duration histogram and rollup', () => {
   })
 })
 
+describe('otelMiddleware — full usage emission', () => {
+  // Everything `TokenUsage` carries beyond input/output tokens: cost,
+  // totals, cache/reasoning breakdowns, duration-based billing, and the
+  // upstream cost split. Backends like PostHog consume `gen_ai.usage.cost`
+  // directly; without it they re-derive cost from their own price tables
+  // and lose cache discounts / gateway markup (OpenRouter).
+  const fullUsage = {
+    promptTokens: 100,
+    completionTokens: 50,
+    totalTokens: 165,
+    promptTokensDetails: { cachedTokens: 80, cacheWriteTokens: 10 },
+    completionTokensDetails: { reasoningTokens: 15 },
+    durationSeconds: 2.5,
+    cost: 0.0123,
+    costDetails: {
+      upstreamCost: 0.01,
+      upstreamInputCost: 0.004,
+      upstreamOutputCost: 0.006,
+    },
+  }
+
+  const expectFullUsageAttrs = (span: FakeSpan) => {
+    expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100)
+    expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50)
+    expect(span.attributes['gen_ai.usage.total_tokens']).toBe(165)
+    expect(span.attributes['gen_ai.usage.cost']).toBe(0.0123)
+    expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(80)
+    expect(span.attributes['gen_ai.usage.cache_creation.input_tokens']).toBe(
+      10,
+    )
+    expect(span.attributes['gen_ai.usage.reasoning.output_tokens']).toBe(15)
+    expect(span.attributes['tanstack.ai.usage.duration_seconds']).toBe(2.5)
+    expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBe(0.01)
+    expect(span.attributes['tanstack.ai.usage.upstream_input_cost']).toBe(
+      0.004,
+    )
+    expect(span.attributes['tanstack.ai.usage.upstream_output_cost']).toBe(
+      0.006,
+    )
+  }
+
+  it('emits cost, totals, and detail breakdowns from RUN_FINISHED chunk.usage', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onChunk?.(ctx, {
+      ...ev.runFinished('stop'),
+      model: 'gpt-4o',
+      usage: fullUsage,
+    })
+
+    expectFullUsageAttrs(spans[1]!)
+  })
+
+  it('emits cost, totals, and detail breakdowns from onUsage', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, fullUsage)
+
+    expectFullUsageAttrs(spans[1]!)
+  })
+
+  it('rolls up cost, totals, and detail breakdowns onto the root span on onFinish', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' })
+    await mw.onFinish?.(ctx, {
+      finishReason: 'stop',
+      duration: 1250,
+      content: '',
+      usage: fullUsage,
+    })
+
+    expectFullUsageAttrs(spans[0]!)
+  })
+
+  it('omits optional usage attributes when the provider does not report them', async () => {
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, {
+      promptTokens: 100,
+      completionTokens: 50,
+      totalTokens: 150,
+    })
+
+    const span = spans[1]!
+    expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100)
+    expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50)
+    expect(span.attributes['gen_ai.usage.total_tokens']).toBe(150)
+    expect(span.attributes['gen_ai.usage.cost']).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.cache_read.input_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.cache_creation.input_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['gen_ai.usage.reasoning.output_tokens'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.duration_seconds'],
+    ).toBeUndefined()
+    expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.upstream_input_cost'],
+    ).toBeUndefined()
+    expect(
+      span.attributes['tanstack.ai.usage.upstream_output_cost'],
+    ).toBeUndefined()
+  })
+
+  it('emits zero-valued usage fields instead of dropping them', async () => {
+    // cost 0 is a real report (OpenRouter free models), and the OpenRouter
+    // extractor deliberately preserves it. Pin that the presence guard is
+    // `!== undefined`, not truthiness — a truthy guard would drop zeros.
+    const { tracer, spans } = createFakeTracer()
+    const mw = otelMiddleware({ tracer })
+    const ctx = makeCtx()
+
+    await runToIterationStart(mw, ctx)
+    await mw.onUsage?.(ctx, {
+      promptTokens: 100,
+      completionTokens: 50,
+      totalTokens: 150,
+      cost: 0,
+      promptTokensDetails: { cachedTokens: 0 },
+    })
+
+    const span = spans[1]!
+    expect(span.attributes['gen_ai.usage.cost']).toBe(0)
+    expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(0)
+  })
+})
+
 describe('otelMiddleware — tool spans', () => {
   it('creates a tool span as child of the iteration span (including after RUN_FINISHED)', async () => {
     const { tracer, spans } = createFakeTracer()

diff --git a/testing/e2e/src/routeTree.gen.ts b/testing/e2e/src/routeTree.gen.ts
@@ -26,6 +26,7 @@ import { Route as ApiTranscriptionRouteImport } from './routes/api.transcription
 import { Route as ApiToolsTestRouteImport } from './routes/api.tools-test'
 import { Route as ApiToolCallLifecycleWireRouteImport } from './routes/api.tool-call-lifecycle-wire'
 import { Route as ApiSummarizeRouteImport } from './routes/api.summarize'
+import { Route as ApiOtelUsageRouteImport } from './routes/api.otel-usage'
 import { Route as ApiOpenrouterWebToolsWireRouteImport } from './routes/api.openrouter-web-tools-wire'
 import { Route as ApiOpenrouterCostRouteImport } from './routes/api.openrouter-cost'
 import { Route as ApiOpenaiUsageDetailsRouteImport } from './routes/api.openai-usage-details'
@@ -136,6 +137,11 @@ const ApiSummarizeRoute = ApiSummarizeRouteImport.update({
   path: '/api/summarize',
   getParentRoute: () => rootRouteImport,
 } as any)
+const ApiOtelUsageRoute = ApiOtelUsageRouteImport.update({
+  id: '/api/otel-usage',
+  path: '/api/otel-usage',
+  getParentRoute: () => rootRouteImport,
+} as any)
 const ApiOpenrouterWebToolsWireRoute =
   ApiOpenrouterWebToolsWireRouteImport.update({
     id: '/api/openrouter-web-tools-wire',
@@ -284,6 +290,7 @@ export interface FileRoutesByFullPath {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -326,6 +333,7 @@ export interface FileRoutesByTo {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -369,6 +377,7 @@ export interface FileRoutesById {
   '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute
   '/api/openrouter-cost': typeof ApiOpenrouterCostRoute
   '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute
+  '/api/otel-usage': typeof ApiOtelUsageRoute
   '/api/summarize': typeof ApiSummarizeRoute
   '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute
   '/api/tools-test': typeof ApiToolsTestRoute
@@ -413,6 +422,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -455,6 +465,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -497,6 +508,7 @@ export interface FileRouteTypes {
     | '/api/openai-usage-details'
     | '/api/openrouter-cost'
     | '/api/openrouter-web-tools-wire'
+    | '/api/otel-usage'
     | '/api/summarize'
     | '/api/tool-call-lifecycle-wire'
     | '/api/tools-test'
@@ -540,6 +552,7 @@ export interface RootRouteChildren {
   ApiOpenaiUsageDetailsRoute: typeof ApiOpenaiUsageDetailsRoute
   ApiOpenrouterCostRoute: typeof ApiOpenrouterCostRoute
   ApiOpenrouterWebToolsWireRoute: typeof ApiOpenrouterWebToolsWireRoute
+  ApiOtelUsageRoute: typeof ApiOtelUsageRoute
   ApiSummarizeRoute: typeof ApiSummarizeRoute
   ApiToolCallLifecycleWireRoute: typeof ApiToolCallLifecycleWireRoute
   ApiToolsTestRoute: typeof ApiToolsTestRoute
@@ -670,6 +683,13 @@ declare module '@tanstack/react-router' {
       preLoaderRoute: typeof ApiSummarizeRouteImport
       parentRoute: typeof rootRouteImport
     }
+    '/api/otel-usage': {
+      id: '/api/otel-usage'
+      path: '/api/otel-usage'
+      fullPath: '/api/otel-usage'
+      preLoaderRoute: typeof ApiOtelUsageRouteImport
+      parentRoute: typeof rootRouteImport
+    }
     '/api/openrouter-web-tools-wire': {
       id: '/api/openrouter-web-tools-wire'
       path: '/api/openrouter-web-tools-wire'
@@ -921,6 +941,7 @@ const rootRouteChildren: RootRouteChildren = {
   ApiOpenaiUsageDetailsRoute: ApiOpenaiUsageDetailsRoute,
   ApiOpenrouterCostRoute: ApiOpenrouterCostRoute,
   ApiOpenrouterWebToolsWireRoute: ApiOpenrouterWebToolsWireRoute,
+  ApiOtelUsageRoute: ApiOtelUsageRoute,
   ApiSummarizeRoute: ApiSummarizeRoute,
   ApiToolCallLifecycleWireRoute: ApiToolCallLifecycleWireRoute,
   ApiToolsTestRoute: ApiToolsTestRoute,