From c7df2a33a8742e5544bb210020d6547094e6fd8a Mon Sep 17 00:00:00 2001 From: Season Date: Thu, 11 Jun 2026 22:56:41 +0800 Subject: [PATCH] feat(ai): emit full usage on otel spans (cost, totals, cache/reasoning details) otelMiddleware only emitted gen_ai.usage.input_tokens/output_tokens even though TokenUsage already carries provider-reported cost, total tokens, cache/reasoning breakdowns, and duration-based billing. Backends like PostHog had to re-derive cost from their own price tables, losing cache discounts and gateway markup (OpenRouter), and duration-billed activities had no cost signal at all. A shared usageAttributes() helper now builds the full guarded attribute set at all three emission sites (RUN_FINISHED chunk, onUsage, onFinish rollup): - gen_ai.usage.total_tokens / gen_ai.usage.cost (de-facto extensions consumed directly by PostHog and LiteLLM-style backends) - gen_ai.usage.cache_read.input_tokens, cache_creation.input_tokens, reasoning.output_tokens (official GenAI semconv names) - tanstack.ai.usage.duration_seconds and the upstream cost split (no semconv equivalent exists) E2E: new /api/otel-usage route drives the existing openai-usage-details and openrouter-cost aimock mounts through otelMiddleware with a local capture tracer; middleware.spec.ts asserts the attributes land on iteration and root spans. Fixes #721 --- .changeset/otel-full-usage-emission.md | 5 + docs/advanced/otel.md | 11 ++ docs/config.json | 3 +- packages/ai/src/middlewares/otel.ts | 69 +++++++-- packages/ai/tests/middlewares/otel.test.ts | 145 ++++++++++++++++++ testing/e2e/src/routeTree.gen.ts | 21 +++ testing/e2e/src/routes/api.otel-usage.ts | 167 +++++++++++++++++++++ testing/e2e/tests/middleware.spec.ts | 71 +++++++++ 8 files changed, 479 insertions(+), 13 deletions(-) create mode 100644 .changeset/otel-full-usage-emission.md create mode 100644 testing/e2e/src/routes/api.otel-usage.ts diff --git a/.changeset/otel-full-usage-emission.md b/.changeset/otel-full-usage-emission.md new file mode 100644 index 000000000..bda815e31 --- /dev/null +++ b/.changeset/otel-full-usage-emission.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai': minor +--- + +`otelMiddleware` now emits the rest of the reported `TokenUsage` on spans instead of only input/output tokens (#721). When the provider reports them, spans carry `gen_ai.usage.total_tokens`, `gen_ai.usage.cost` (provider-reported cost — cache discounts and gateway markup included, so backends like PostHog no longer re-derive cost from price tables), the official semconv cache/reasoning breakdowns (`gen_ai.usage.cache_read.input_tokens`, `gen_ai.usage.cache_creation.input_tokens`, `gen_ai.usage.reasoning.output_tokens`), and TanStack-namespaced attributes for duration-based billing (`tanstack.ai.usage.duration_seconds`) and the upstream cost split (`tanstack.ai.usage.upstream_cost` / `upstream_input_cost` / `upstream_output_cost`). All attributes are guarded — spans stay unchanged when a provider doesn't report a field. Media-oriented fields (`unitsBilled`, per-modality token breakdowns) and the provider-shaped `providerUsageDetails` bag are intentionally not emitted; media-activity observability is tracked in #720. diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md index cf5f3ac2c..03af9daa8 100644 --- a/docs/advanced/otel.md +++ b/docs/advanced/otel.md @@ -72,6 +72,15 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam | iteration | `gen_ai.request.max_tokens` | from config | | iteration | `gen_ai.usage.input_tokens` | per iteration | | iteration | `gen_ai.usage.output_tokens` | per iteration | +| root / iteration | `gen_ai.usage.total_tokens` | provider-reported total | +| root / iteration | `gen_ai.usage.cost` | provider-reported cost, when available | +| root / iteration | `gen_ai.usage.cache_read.input_tokens` | cached prompt tokens, when reported | +| root / iteration | `gen_ai.usage.cache_creation.input_tokens` | cache-write prompt tokens, when reported | +| root / iteration | `gen_ai.usage.reasoning.output_tokens` | reasoning/thinking tokens, when reported | +| root / iteration | `tanstack.ai.usage.duration_seconds` | duration-based billing (e.g. transcription), when reported | +| root / iteration | `tanstack.ai.usage.upstream_cost` | gateway upstream cost (e.g. OpenRouter), when reported | +| root / iteration | `tanstack.ai.usage.upstream_input_cost` | upstream input cost split, when reported | +| root / iteration | `tanstack.ai.usage.upstream_output_cost` | upstream output cost split, when reported | | iteration | `gen_ai.response.finish_reasons` | `[stop]`, `[tool_calls]`, ... | | root | `gen_ai.usage.input_tokens` | rolled up | | root | `gen_ai.usage.output_tokens` | rolled up | @@ -81,6 +90,8 @@ Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the sam | tool | `gen_ai.tool.type` | `function` | | tool | `tanstack.ai.tool.outcome` | `success` / `error` | +Usage attributes beyond input/output tokens are emitted only when the provider reports them, so spans stay clean otherwise. Cache and reasoning breakdowns use the official GenAI semconv names; `gen_ai.usage.cost` and `gen_ai.usage.total_tokens` are de-facto extensions consumed directly by backends like PostHog — without them, backends re-derive cost from their own price tables and lose cache discounts and gateway markup. Fields with no established convention (duration-based billing, the upstream cost split) are TanStack-namespaced. + ### Metrics Two GenAI-standard histograms: diff --git a/docs/config.json b/docs/config.json index e3fc3b712..180fa7905 100644 --- a/docs/config.json +++ b/docs/config.json @@ -280,7 +280,8 @@ { "label": "OpenTelemetry", "to": "advanced/otel", - "addedAt": "2026-05-08" + "addedAt": "2026-05-08", + "updatedAt": "2026-06-11" } ] }, diff --git a/packages/ai/src/middlewares/otel.ts b/packages/ai/src/middlewares/otel.ts index c953e3f2f..5d789bb40 100644 --- a/packages/ai/src/middlewares/otel.ts +++ b/packages/ai/src/middlewares/otel.ts @@ -20,6 +20,7 @@ import type { ChatMiddleware, ChatMiddlewareContext, } from '../activities/chat/middleware/types' +import type { TokenUsage } from '../types' /** * Scope (role) of an OTel span emitted by this middleware. @@ -179,6 +180,59 @@ function firstNumber(...candidates: Array): number | undefined { return undefined } +/** + * Build the full set of `gen_ai.usage.*` span attributes from a `TokenUsage`. + * + * Beyond input/output tokens, this emits provider-reported cost, total tokens, + * cache and reasoning breakdowns, and duration-based billing — every field is + * guarded so spans stay clean when a provider doesn't report it. Cache and + * reasoning use the official GenAI semconv names; `gen_ai.usage.cost` and + * `gen_ai.usage.total_tokens` are de-facto extensions consumed by backends + * like PostHog (which otherwise re-derive cost from their own price tables, + * losing cache discounts and gateway markup). Fields with no semconv or + * de-facto convention (`costDetails`, `durationSeconds`) are + * TanStack-namespaced. Deliberately not emitted: `unitsBilled`, + * `providerUsageDetails`, and the per-modality token breakdowns — those are + * media-oriented; media-activity observability is tracked in #720. + */ +function usageAttributes(usage: TokenUsage): Record { + const attrs: Record = { + 'gen_ai.usage.input_tokens': usage.promptTokens, + 'gen_ai.usage.output_tokens': usage.completionTokens, + } + const optional: Array<[key: string, value: unknown]> = [ + ['gen_ai.usage.total_tokens', usage.totalTokens], + ['gen_ai.usage.cost', usage.cost], + [ + 'gen_ai.usage.cache_read.input_tokens', + usage.promptTokensDetails?.cachedTokens, + ], + [ + 'gen_ai.usage.cache_creation.input_tokens', + usage.promptTokensDetails?.cacheWriteTokens, + ], + [ + 'gen_ai.usage.reasoning.output_tokens', + usage.completionTokensDetails?.reasoningTokens, + ], + ['tanstack.ai.usage.duration_seconds', usage.durationSeconds], + ['tanstack.ai.usage.upstream_cost', usage.costDetails?.upstreamCost], + [ + 'tanstack.ai.usage.upstream_input_cost', + usage.costDetails?.upstreamInputCost, + ], + [ + 'tanstack.ai.usage.upstream_output_cost', + usage.costDetails?.upstreamOutputCost, + ], + ] + for (const [key, value] of optional) { + const num = firstNumber(value) + if (num !== undefined) attrs[key] = num + } + return attrs +} + function errorMessage(err: unknown): string | undefined { if (err instanceof Error) return err.message if (typeof err === 'string') return err @@ -524,10 +578,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware { // `runOnUsage` when `chunk.usage` is present, and `onUsage` is the // canonical place for the metric. Recording in both would double-count. if (chunk.usage) { - span.setAttributes({ - 'gen_ai.usage.input_tokens': chunk.usage.promptTokens, - 'gen_ai.usage.output_tokens': chunk.usage.completionTokens, - }) + span.setAttributes(usageAttributes(chunk.usage)) } if (captureContent && state.assistantTextBuffer.length > 0) { @@ -584,10 +635,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware { } const span = state.currentIterationSpan ?? state.rootSpan - span.setAttributes({ - 'gen_ai.usage.input_tokens': usage.promptTokens, - 'gen_ai.usage.output_tokens': usage.completionTokens, - }) + span.setAttributes(usageAttributes(usage)) }) }, @@ -905,10 +953,7 @@ export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware { } if (info.usage) { - state.rootSpan.setAttributes({ - 'gen_ai.usage.input_tokens': info.usage.promptTokens, - 'gen_ai.usage.output_tokens': info.usage.completionTokens, - }) + state.rootSpan.setAttributes(usageAttributes(info.usage)) } if (info.finishReason) { state.rootSpan.setAttribute('gen_ai.response.finish_reasons', [ diff --git a/packages/ai/tests/middlewares/otel.test.ts b/packages/ai/tests/middlewares/otel.test.ts index 95bc3d6bf..68f5ec56c 100644 --- a/packages/ai/tests/middlewares/otel.test.ts +++ b/packages/ai/tests/middlewares/otel.test.ts @@ -307,6 +307,151 @@ describe('otelMiddleware — duration histogram and rollup', () => { }) }) +describe('otelMiddleware — full usage emission', () => { + // Everything `TokenUsage` carries beyond input/output tokens: cost, + // totals, cache/reasoning breakdowns, duration-based billing, and the + // upstream cost split. Backends like PostHog consume `gen_ai.usage.cost` + // directly; without it they re-derive cost from their own price tables + // and lose cache discounts / gateway markup (OpenRouter). + const fullUsage = { + promptTokens: 100, + completionTokens: 50, + totalTokens: 165, + promptTokensDetails: { cachedTokens: 80, cacheWriteTokens: 10 }, + completionTokensDetails: { reasoningTokens: 15 }, + durationSeconds: 2.5, + cost: 0.0123, + costDetails: { + upstreamCost: 0.01, + upstreamInputCost: 0.004, + upstreamOutputCost: 0.006, + }, + } + + const expectFullUsageAttrs = (span: FakeSpan) => { + expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100) + expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50) + expect(span.attributes['gen_ai.usage.total_tokens']).toBe(165) + expect(span.attributes['gen_ai.usage.cost']).toBe(0.0123) + expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(80) + expect(span.attributes['gen_ai.usage.cache_creation.input_tokens']).toBe( + 10, + ) + expect(span.attributes['gen_ai.usage.reasoning.output_tokens']).toBe(15) + expect(span.attributes['tanstack.ai.usage.duration_seconds']).toBe(2.5) + expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBe(0.01) + expect(span.attributes['tanstack.ai.usage.upstream_input_cost']).toBe( + 0.004, + ) + expect(span.attributes['tanstack.ai.usage.upstream_output_cost']).toBe( + 0.006, + ) + } + + it('emits cost, totals, and detail breakdowns from RUN_FINISHED chunk.usage', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, { + ...ev.runFinished('stop'), + model: 'gpt-4o', + usage: fullUsage, + }) + + expectFullUsageAttrs(spans[1]!) + }) + + it('emits cost, totals, and detail breakdowns from onUsage', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onUsage?.(ctx, fullUsage) + + expectFullUsageAttrs(spans[1]!) + }) + + it('rolls up cost, totals, and detail breakdowns onto the root span on onFinish', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 1250, + content: '', + usage: fullUsage, + }) + + expectFullUsageAttrs(spans[0]!) + }) + + it('omits optional usage attributes when the provider does not report them', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onUsage?.(ctx, { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + }) + + const span = spans[1]! + expect(span.attributes['gen_ai.usage.input_tokens']).toBe(100) + expect(span.attributes['gen_ai.usage.output_tokens']).toBe(50) + expect(span.attributes['gen_ai.usage.total_tokens']).toBe(150) + expect(span.attributes['gen_ai.usage.cost']).toBeUndefined() + expect( + span.attributes['gen_ai.usage.cache_read.input_tokens'], + ).toBeUndefined() + expect( + span.attributes['gen_ai.usage.cache_creation.input_tokens'], + ).toBeUndefined() + expect( + span.attributes['gen_ai.usage.reasoning.output_tokens'], + ).toBeUndefined() + expect( + span.attributes['tanstack.ai.usage.duration_seconds'], + ).toBeUndefined() + expect(span.attributes['tanstack.ai.usage.upstream_cost']).toBeUndefined() + expect( + span.attributes['tanstack.ai.usage.upstream_input_cost'], + ).toBeUndefined() + expect( + span.attributes['tanstack.ai.usage.upstream_output_cost'], + ).toBeUndefined() + }) + + it('emits zero-valued usage fields instead of dropping them', async () => { + // cost 0 is a real report (OpenRouter free models), and the OpenRouter + // extractor deliberately preserves it. Pin that the presence guard is + // `!== undefined`, not truthiness — a truthy guard would drop zeros. + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onUsage?.(ctx, { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + cost: 0, + promptTokensDetails: { cachedTokens: 0 }, + }) + + const span = spans[1]! + expect(span.attributes['gen_ai.usage.cost']).toBe(0) + expect(span.attributes['gen_ai.usage.cache_read.input_tokens']).toBe(0) + }) +}) + describe('otelMiddleware — tool spans', () => { it('creates a tool span as child of the iteration span (including after RUN_FINISHED)', async () => { const { tracer, spans } = createFakeTracer() diff --git a/testing/e2e/src/routeTree.gen.ts b/testing/e2e/src/routeTree.gen.ts index 23f0cc4ff..8b3dd03fe 100644 --- a/testing/e2e/src/routeTree.gen.ts +++ b/testing/e2e/src/routeTree.gen.ts @@ -26,6 +26,7 @@ import { Route as ApiTranscriptionRouteImport } from './routes/api.transcription import { Route as ApiToolsTestRouteImport } from './routes/api.tools-test' import { Route as ApiToolCallLifecycleWireRouteImport } from './routes/api.tool-call-lifecycle-wire' import { Route as ApiSummarizeRouteImport } from './routes/api.summarize' +import { Route as ApiOtelUsageRouteImport } from './routes/api.otel-usage' import { Route as ApiOpenrouterWebToolsWireRouteImport } from './routes/api.openrouter-web-tools-wire' import { Route as ApiOpenrouterCostRouteImport } from './routes/api.openrouter-cost' import { Route as ApiOpenaiUsageDetailsRouteImport } from './routes/api.openai-usage-details' @@ -136,6 +137,11 @@ const ApiSummarizeRoute = ApiSummarizeRouteImport.update({ path: '/api/summarize', getParentRoute: () => rootRouteImport, } as any) +const ApiOtelUsageRoute = ApiOtelUsageRouteImport.update({ + id: '/api/otel-usage', + path: '/api/otel-usage', + getParentRoute: () => rootRouteImport, +} as any) const ApiOpenrouterWebToolsWireRoute = ApiOpenrouterWebToolsWireRouteImport.update({ id: '/api/openrouter-web-tools-wire', @@ -284,6 +290,7 @@ export interface FileRoutesByFullPath { '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute + '/api/otel-usage': typeof ApiOtelUsageRoute '/api/summarize': typeof ApiSummarizeRoute '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute @@ -326,6 +333,7 @@ export interface FileRoutesByTo { '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute + '/api/otel-usage': typeof ApiOtelUsageRoute '/api/summarize': typeof ApiSummarizeRoute '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute @@ -369,6 +377,7 @@ export interface FileRoutesById { '/api/openai-usage-details': typeof ApiOpenaiUsageDetailsRoute '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute + '/api/otel-usage': typeof ApiOtelUsageRoute '/api/summarize': typeof ApiSummarizeRoute '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute @@ -413,6 +422,7 @@ export interface FileRouteTypes { | '/api/openai-usage-details' | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' + | '/api/otel-usage' | '/api/summarize' | '/api/tool-call-lifecycle-wire' | '/api/tools-test' @@ -455,6 +465,7 @@ export interface FileRouteTypes { | '/api/openai-usage-details' | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' + | '/api/otel-usage' | '/api/summarize' | '/api/tool-call-lifecycle-wire' | '/api/tools-test' @@ -497,6 +508,7 @@ export interface FileRouteTypes { | '/api/openai-usage-details' | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' + | '/api/otel-usage' | '/api/summarize' | '/api/tool-call-lifecycle-wire' | '/api/tools-test' @@ -540,6 +552,7 @@ export interface RootRouteChildren { ApiOpenaiUsageDetailsRoute: typeof ApiOpenaiUsageDetailsRoute ApiOpenrouterCostRoute: typeof ApiOpenrouterCostRoute ApiOpenrouterWebToolsWireRoute: typeof ApiOpenrouterWebToolsWireRoute + ApiOtelUsageRoute: typeof ApiOtelUsageRoute ApiSummarizeRoute: typeof ApiSummarizeRoute ApiToolCallLifecycleWireRoute: typeof ApiToolCallLifecycleWireRoute ApiToolsTestRoute: typeof ApiToolsTestRoute @@ -670,6 +683,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof ApiSummarizeRouteImport parentRoute: typeof rootRouteImport } + '/api/otel-usage': { + id: '/api/otel-usage' + path: '/api/otel-usage' + fullPath: '/api/otel-usage' + preLoaderRoute: typeof ApiOtelUsageRouteImport + parentRoute: typeof rootRouteImport + } '/api/openrouter-web-tools-wire': { id: '/api/openrouter-web-tools-wire' path: '/api/openrouter-web-tools-wire' @@ -921,6 +941,7 @@ const rootRouteChildren: RootRouteChildren = { ApiOpenaiUsageDetailsRoute: ApiOpenaiUsageDetailsRoute, ApiOpenrouterCostRoute: ApiOpenrouterCostRoute, ApiOpenrouterWebToolsWireRoute: ApiOpenrouterWebToolsWireRoute, + ApiOtelUsageRoute: ApiOtelUsageRoute, ApiSummarizeRoute: ApiSummarizeRoute, ApiToolCallLifecycleWireRoute: ApiToolCallLifecycleWireRoute, ApiToolsTestRoute: ApiToolsTestRoute, diff --git a/testing/e2e/src/routes/api.otel-usage.ts b/testing/e2e/src/routes/api.otel-usage.ts new file mode 100644 index 000000000..0171fdd2b --- /dev/null +++ b/testing/e2e/src/routes/api.otel-usage.ts @@ -0,0 +1,167 @@ +import { createFileRoute } from '@tanstack/react-router' +import { chat, createChatOptions } from '@tanstack/ai' +import { otelMiddleware } from '@tanstack/ai/middlewares/otel' +import { createOpenaiChatCompletions } from '@tanstack/ai-openai' +import { createOpenRouterText } from '@tanstack/ai-openrouter' +import type { + AttributeValue, + Context, + Span, + SpanContext, + Tracer, +} from '@opentelemetry/api' + +const LLMOCK_DEFAULT_BASE = process.env.LLMOCK_URL || 'http://127.0.0.1:4010' +const DUMMY_KEY = 'sk-e2e-test-dummy-key' + +interface CapturedSpan { + name: string + kind?: number + attributes: Record + ended: boolean +} + +/** + * Single-request in-memory tracer. Unlike the per-testId capture in + * `api.middleware-test.ts`, everything here happens inside one POST, so spans + * collect into a local array returned directly in the response body. + */ +function createLocalCaptureTracer(): { + tracer: Tracer + spans: Array +} { + const spans: Array = [] + let spanSeq = 0 + const tracer: Tracer = { + startSpan(name, options = {}, _ctx?: Context): Span { + const id = `span-${spanSeq++}` + const attributes: Record = {} + for (const [k, v] of Object.entries(options.attributes ?? {})) { + if (v !== undefined) attributes[k] = v as AttributeValue + } + const captured: CapturedSpan = { + name, + kind: options.kind, + attributes, + ended: false, + } + spans.push(captured) + const span: Span = { + spanContext(): SpanContext { + return { traceId: 'otel-usage-trace', spanId: id, traceFlags: 1 } + }, + setAttribute(key, value) { + captured.attributes[key] = value as AttributeValue + return span + }, + setAttributes(next) { + for (const [k, v] of Object.entries(next)) { + captured.attributes[k] = v as AttributeValue + } + return span + }, + addEvent() { + return span + }, + addLink() { + return span + }, + addLinks() { + return span + }, + setStatus() { + return span + }, + updateName(next) { + captured.name = next + return span + }, + end() { + captured.ended = true + }, + isRecording() { + return !captured.ended + }, + recordException() {}, + } + return span + }, + // Minimal implementation — otelMiddleware never calls startActiveSpan. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + startActiveSpan(...args: Array) { + const fn = args[args.length - 1] as (span: Span) => unknown + const name = args[0] as string + const span = tracer.startSpan(name, {}) + try { + return fn(span) + } finally { + span.end() + } + }, + } + return { tracer, spans } +} + +/** + * Drives a chat adapter with `otelMiddleware` against the existing + * hand-crafted aimock mounts that report rich usage, and returns the captured + * spans. Companion E2E proof for full-usage span emission (#721): + * + * - `provider: 'openai'` → `/openai-usage-details` mount, whose trailing usage + * chunk carries `total_tokens`, `prompt_tokens_details.cached_tokens`, and + * `completion_tokens_details.reasoning_tokens`. + * - `provider: 'openrouter'` → `/openrouter-cost` mount, whose trailing usage + * chunk carries `cost` / `cost_details`. + * + * The spec asserts the corresponding `gen_ai.usage.*` / `tanstack.ai.usage.*` + * attributes land on the iteration and root spans. + */ +export const Route = createFileRoute('/api/otel-usage')({ + server: { + handlers: { + POST: async ({ request }) => { + let provider = 'openai' + try { + const body = (await request.json()) as { provider?: string } + if (typeof body.provider === 'string') provider = body.provider + } catch { + // No/invalid body — default provider. + } + + const adapter = + provider === 'openrouter' + ? createOpenRouterText('openai/gpt-4o' as never, DUMMY_KEY, { + serverURL: `${LLMOCK_DEFAULT_BASE}/openrouter-cost/v1`, + }) + : createOpenaiChatCompletions('gpt-4o', DUMMY_KEY, { + baseURL: `${LLMOCK_DEFAULT_BASE}/openai-usage-details/v1`, + }) + + const { tracer, spans } = createLocalCaptureTracer() + + try { + for await (const _chunk of chat({ + ...createChatOptions({ adapter }), + messages: [{ role: 'user', content: 'hi' }], + middleware: [otelMiddleware({ tracer })], + })) { + // Drain — the assertions live on the captured spans. + } + } catch (error) { + return new Response( + JSON.stringify({ + ok: false, + error: error instanceof Error ? error.message : String(error), + }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ) + } + + return new Response(JSON.stringify({ ok: true, spans }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + }, + }, + }, +}) diff --git a/testing/e2e/tests/middleware.spec.ts b/testing/e2e/tests/middleware.spec.ts index e039d78ab..ccfbc19a5 100644 --- a/testing/e2e/tests/middleware.spec.ts +++ b/testing/e2e/tests/middleware.spec.ts @@ -193,6 +193,77 @@ test.describe('Middleware Lifecycle', () => { } }) + test('otel middleware emits total/cache/reasoning usage details on spans', async ({ + request, + }) => { + // `/api/otel-usage` drives the OpenAI adapter against the + // `/openai-usage-details` aimock mount (total_tokens + cached_tokens + + // reasoning_tokens) with otelMiddleware attached, and returns the + // captured spans. End-to-end proof for #721: the full TokenUsage reaches + // span attributes, not just input/output tokens. + const res = await request.post('/api/otel-usage', { + data: { provider: 'openai' }, + }) + expect(res.ok()).toBe(true) + const { ok, error, spans } = await res.json() + expect(error ?? null).toBeNull() + expect(ok).toBe(true) + + const iterationSpans = spans.filter( + (s: any) => s.kind === SpanKind.CLIENT && s.ended, + ) + expect(iterationSpans.length).toBeGreaterThanOrEqual(1) + expect(iterationSpans[0].attributes).toMatchObject({ + 'gen_ai.usage.input_tokens': 100, + 'gen_ai.usage.output_tokens': 50, + 'gen_ai.usage.total_tokens': 150, + 'gen_ai.usage.cache_read.input_tokens': 80, + 'gen_ai.usage.reasoning.output_tokens': 30, + }) + + // The root span rolls up the final usage on onFinish. + const rootSpans = spans.filter((s: any) => s.kind === SpanKind.INTERNAL) + expect(rootSpans).toHaveLength(1) + expect(rootSpans[0].attributes).toMatchObject({ + 'gen_ai.usage.total_tokens': 150, + 'gen_ai.usage.cache_read.input_tokens': 80, + 'gen_ai.usage.reasoning.output_tokens': 30, + }) + }) + + test('otel middleware emits provider-reported cost on spans', async ({ + request, + }) => { + // OpenRouter adapter against the `/openrouter-cost` mount, whose trailing + // usage chunk carries `cost` / `cost_details`. Backends like PostHog read + // `gen_ai.usage.cost` directly instead of re-deriving from price tables. + const res = await request.post('/api/otel-usage', { + data: { provider: 'openrouter' }, + }) + expect(res.ok()).toBe(true) + const { ok, error, spans } = await res.json() + expect(error ?? null).toBeNull() + expect(ok).toBe(true) + + const iterationSpans = spans.filter( + (s: any) => s.kind === SpanKind.CLIENT && s.ended, + ) + expect(iterationSpans.length).toBeGreaterThanOrEqual(1) + expect(iterationSpans[0].attributes).toMatchObject({ + 'gen_ai.usage.input_tokens': 11, + 'gen_ai.usage.output_tokens': 3, + 'gen_ai.usage.total_tokens': 14, + 'gen_ai.usage.cost': 0.0042, + 'tanstack.ai.usage.upstream_cost': 0.0038, + 'tanstack.ai.usage.upstream_input_cost': 0.0012, + 'tanstack.ai.usage.upstream_output_cost': 0.0026, + }) + + const rootSpans = spans.filter((s: any) => s.kind === SpanKind.INTERNAL) + expect(rootSpans).toHaveLength(1) + expect(rootSpans[0].attributes['gen_ai.usage.cost']).toBe(0.0042) + }) + test('no middleware passes content through unchanged', async ({ page, testId,