diff --git a/.changeset/grok-audio-speech-support.md b/.changeset/grok-audio-speech-support.md new file mode 100644 index 000000000..26dde578a --- /dev/null +++ b/.changeset/grok-audio-speech-support.md @@ -0,0 +1,13 @@ +--- +'@tanstack/ai-grok': minor +--- + +feat(ai-grok): add audio and speech adapters for xAI + +Add three new tree-shakeable adapters that wrap xAI's audio APIs: + +- `grokSpeech` / `createGrokSpeech` — text-to-speech via `POST /v1/tts`. Supports the 5 xAI voices (`eve`, `ara`, `rex`, `sal`, `leo`), MP3/WAV/PCM/μ-law/A-law codecs, and the `language`, `sample_rate`, `bit_rate`, `optimize_streaming_latency`, `text_normalization` provider options. +- `grokTranscription` / `createGrokTranscription` — speech-to-text via `POST /v1/stt`. Passes through `language`, `diarize`, `multichannel`, `channels`, `audio_format`, and `sample_rate`; maps xAI's word-level timestamps to `TranscriptionResult.words`. +- `grokRealtime` / `grokRealtimeToken` — Voice Agent (realtime) adapter for `wss://api.x.ai/v1/realtime` with ephemeral tokens via `/v1/realtime/client_secrets`. Supports the `grok-voice-fast-1.0` and `grok-voice-think-fast-1.0` models. + +New model identifier exports: `GROK_TTS_MODELS`, `GROK_TRANSCRIPTION_MODELS`, `GROK_REALTIME_MODELS` and their corresponding types. diff --git a/examples/ts-react-chat/src/lib/audio-providers.ts b/examples/ts-react-chat/src/lib/audio-providers.ts index a3a27fe42..226aeb002 100644 --- a/examples/ts-react-chat/src/lib/audio-providers.ts +++ b/examples/ts-react-chat/src/lib/audio-providers.ts @@ -6,7 +6,7 @@ * and audio generation flows. */ -export type SpeechProviderId = 'openai' | 'gemini' | 'fal' +export type SpeechProviderId = 'openai' | 'gemini' | 'fal' | 'grok' export interface SpeechProviderConfig { id: SpeechProviderId @@ -55,9 +55,22 @@ export const SPEECH_PROVIDERS: ReadonlyArray = [ ], placeholder: 'Enter text to synthesize with Fal Kokoro…', }, + { + id: 'grok', + label: 'Grok TTS', + model: 'grok-tts', + voices: [ + { id: 'eve', label: 'Eve' }, + { id: 'ara', label: 'Ara' }, + { id: 'rex', label: 'Rex' }, + { id: 'sal', label: 'Sal' }, + { id: 'leo', label: 'Leo' }, + ], + placeholder: 'Enter text for Grok speech…', + }, ] -export type TranscriptionProviderId = 'openai' | 'fal' +export type TranscriptionProviderId = 'openai' | 'fal' | 'grok' export interface TranscriptionProviderConfig { id: TranscriptionProviderId @@ -80,6 +93,12 @@ export const TRANSCRIPTION_PROVIDERS: ReadonlyArray model: 'fal-ai/whisper', description: 'Fal-hosted Whisper with word-level timestamps.', }, + { + id: 'grok', + label: 'Grok STT', + model: 'grok-stt', + description: 'xAI speech-to-text with word-level timestamps.', + }, ] export type AudioProviderId = 'gemini-lyria' | 'fal-audio' | 'fal-sfx' diff --git a/examples/ts-react-chat/src/lib/server-audio-adapters.ts b/examples/ts-react-chat/src/lib/server-audio-adapters.ts index 1248c5301..77336629d 100644 --- a/examples/ts-react-chat/src/lib/server-audio-adapters.ts +++ b/examples/ts-react-chat/src/lib/server-audio-adapters.ts @@ -8,6 +8,7 @@ import { openaiSpeech, openaiTranscription } from '@tanstack/ai-openai' import { geminiAudio, geminiSpeech } from '@tanstack/ai-gemini' import { falAudio, falSpeech, falTranscription } from '@tanstack/ai-fal' +import { grokSpeech, grokTranscription } from '@tanstack/ai-grok' import type { AnyAudioAdapter, AnyTranscriptionAdapter, @@ -27,7 +28,12 @@ function findConfig( id: string, ): T { const match = list.find((entry) => entry.id === id) - if (!match) throw new Error(`Unknown provider: ${id}`) + if (!match) { + throw new UnknownProviderError( + id, + list.map((entry) => entry.id), + ) + } return match } @@ -40,6 +46,8 @@ export function buildSpeechAdapter(provider: SpeechProviderId): AnyTTSAdapter { return geminiSpeech(config.model as 'gemini-2.5-flash-preview-tts') case 'fal': return falSpeech(config.model) + case 'grok': + return grokSpeech(config.model as 'grok-tts') } } @@ -52,6 +60,8 @@ export function buildTranscriptionAdapter( return openaiTranscription(config.model as 'whisper-1') case 'fal': return falTranscription(config.model) + case 'grok': + return grokTranscription(config.model as 'grok-stt') } } @@ -72,15 +82,65 @@ export function buildAudioAdapter( } } +/** + * Thrown when a caller supplies a `modelOverride` that is not present in the + * provider's allowed model list. HTTP routes map this to a 400 response so the + * user sees a clear rejection instead of silently getting output from the + * default model. + */ +export class InvalidModelOverrideError extends Error { + readonly code = 'invalid_model_override' as const + readonly providerId: string + readonly requestedModel: string + readonly allowedModels: ReadonlyArray + + constructor( + providerId: string, + requestedModel: string, + allowedModels: ReadonlyArray, + ) { + super( + `Invalid model override "${requestedModel}" for provider "${providerId}". Allowed models: ${ + allowedModels.length > 0 ? allowedModels.join(', ') : '(none)' + }`, + ) + this.name = 'InvalidModelOverrideError' + this.providerId = providerId + this.requestedModel = requestedModel + this.allowedModels = allowedModels + } +} + +/** + * Thrown when `findConfig` is called with a provider id that isn't in the + * allowed list. In practice the route-level Zod enum schema already rejects + * unknown providers before we ever reach this builder, so this is + * defense-in-depth for callers that bypass Zod validation (e.g. server-fns + * whose input schemas could drift from the provider registries). + */ +export class UnknownProviderError extends Error { + readonly code = 'unknown_provider' as const + readonly providerId: string + readonly allowedProviders: ReadonlyArray + + constructor(providerId: string, allowedProviders: ReadonlyArray) { + super( + `Unknown provider "${providerId}". Allowed providers: ${ + allowedProviders.length > 0 ? allowedProviders.join(', ') : '(none)' + }`, + ) + this.name = 'UnknownProviderError' + this.providerId = providerId + this.allowedProviders = allowedProviders + } +} + function resolveModel( config: (typeof AUDIO_PROVIDERS)[number], modelOverride: string | undefined, ): string { if (!modelOverride) return config.model - const allowed = config.models?.some((m) => m.id === modelOverride) - if (allowed) return modelOverride - console.warn( - `[audio] rejected model override "${modelOverride}" for provider "${config.id}"; falling back to "${config.model}"`, - ) - return config.model + const allowedModels = config.models?.map((m) => m.id) ?? [] + if (allowedModels.includes(modelOverride)) return modelOverride + throw new InvalidModelOverrideError(config.id, modelOverride, allowedModels) } diff --git a/examples/ts-react-chat/src/lib/server-fns.ts b/examples/ts-react-chat/src/lib/server-fns.ts index 156db532e..b1e5d9e59 100644 --- a/examples/ts-react-chat/src/lib/server-fns.ts +++ b/examples/ts-react-chat/src/lib/server-fns.ts @@ -12,14 +12,67 @@ import { } from '@tanstack/ai' import { openaiImage, openaiSummarize, openaiVideo } from '@tanstack/ai-openai' import { + InvalidModelOverrideError, + UnknownProviderError, buildAudioAdapter, buildSpeechAdapter, buildTranscriptionAdapter, } from './server-audio-adapters' -const SPEECH_PROVIDER_SCHEMA = z.enum(['openai', 'gemini', 'fal']).optional() +/** + * Server-fn error with a stable `code` property clients can switch on. + * + * TanStack Start's `createServerFn` surfaces thrown errors as a generic 500 + * without a structured payload. We can't influence the status code from here, + * so we attach a `code` field the client can read to distinguish well-known + * failure modes (invalid_model_override, unknown_provider) from truly + * unexpected errors. + */ +class ServerFnError extends Error { + readonly code: string + readonly details?: Record -const TRANSCRIPTION_PROVIDER_SCHEMA = z.enum(['openai', 'fal']).optional() + constructor( + code: string, + message: string, + details?: Record, + ) { + super(message) + this.name = 'ServerFnError' + this.code = code + this.details = details + } +} + +/** + * Translate the typed audio-adapter errors into a `ServerFnError` with a stable + * `code`. Any other error is re-thrown untouched so the framework's default + * 500 path handles it. + */ +function rethrowAudioAdapterError(err: unknown): never { + if (err instanceof InvalidModelOverrideError) { + throw new ServerFnError('invalid_model_override', err.message, { + providerId: err.providerId, + requestedModel: err.requestedModel, + allowedModels: err.allowedModels, + }) + } + if (err instanceof UnknownProviderError) { + throw new ServerFnError('unknown_provider', err.message, { + providerId: err.providerId, + allowedProviders: err.allowedProviders, + }) + } + throw err +} + +const SPEECH_PROVIDER_SCHEMA = z + .enum(['openai', 'gemini', 'fal', 'grok']) + .optional() + +const TRANSCRIPTION_PROVIDER_SCHEMA = z + .enum(['openai', 'fal', 'grok']) + .optional() const AUDIO_PROVIDER_SCHEMA = z .enum(['gemini-lyria', 'fal-audio', 'fal-sfx']) @@ -56,8 +109,17 @@ export const generateSpeechFn = createServerFn({ method: 'POST' }) }), ) .handler(async ({ data }) => { + // `buildSpeechAdapter` can throw `UnknownProviderError` (defense-in-depth; + // Zod should catch this first). Translate into a `ServerFnError` so + // clients can distinguish it from a generic failure via the stable `code`. + let adapter + try { + adapter = buildSpeechAdapter(data.provider ?? 'openai') + } catch (err) { + rethrowAudioAdapterError(err) + } return generateSpeech({ - adapter: buildSpeechAdapter(data.provider ?? 'openai'), + adapter, text: data.text, voice: data.voice, format: data.format, @@ -73,8 +135,18 @@ export const transcribeFn = createServerFn({ method: 'POST' }) }), ) .handler(async ({ data }) => { + // `buildTranscriptionAdapter` can throw `UnknownProviderError` + // (defense-in-depth; Zod should catch this first). Translate into a + // `ServerFnError` so clients can distinguish it from a generic failure + // via the stable `code`. + let adapter + try { + adapter = buildTranscriptionAdapter(data.provider ?? 'openai') + } catch (err) { + rethrowAudioAdapterError(err) + } return generateTranscription({ - adapter: buildTranscriptionAdapter(data.provider ?? 'openai'), + adapter, audio: data.audio, language: data.language, }) @@ -90,8 +162,18 @@ export const generateAudioFn = createServerFn({ method: 'POST' }) }), ) .handler(async ({ data }) => { + // `buildAudioAdapter` can throw `InvalidModelOverrideError` (unknown + // model id) or `UnknownProviderError` (defense-in-depth; Zod should + // catch this first). Translate both into a `ServerFnError` so clients + // can distinguish them from a generic failure via the stable `code`. + let adapter + try { + adapter = buildAudioAdapter(data.provider ?? 'gemini-lyria', data.model) + } catch (err) { + rethrowAudioAdapterError(err) + } return generateAudio({ - adapter: buildAudioAdapter(data.provider ?? 'gemini-lyria', data.model), + adapter, prompt: data.prompt, duration: data.duration, }) @@ -195,9 +277,18 @@ export const generateSpeechStreamFn = createServerFn({ method: 'POST' }) }), ) .handler(({ data }) => { + // `buildSpeechAdapter` can throw `UnknownProviderError` (defense-in-depth; + // Zod should catch this first). Translate into a `ServerFnError` so + // clients can distinguish it from a generic failure via the stable `code`. + let adapter + try { + adapter = buildSpeechAdapter(data.provider ?? 'openai') + } catch (err) { + rethrowAudioAdapterError(err) + } return toServerSentEventsResponse( generateSpeech({ - adapter: buildSpeechAdapter(data.provider ?? 'openai'), + adapter, text: data.text, voice: data.voice, format: data.format, @@ -215,9 +306,19 @@ export const transcribeStreamFn = createServerFn({ method: 'POST' }) }), ) .handler(({ data }) => { + // `buildTranscriptionAdapter` can throw `UnknownProviderError` + // (defense-in-depth; Zod should catch this first). Translate into a + // `ServerFnError` so clients can distinguish it from a generic failure + // via the stable `code`. + let adapter + try { + adapter = buildTranscriptionAdapter(data.provider ?? 'openai') + } catch (err) { + rethrowAudioAdapterError(err) + } return toServerSentEventsResponse( generateTranscription({ - adapter: buildTranscriptionAdapter(data.provider ?? 'openai'), + adapter, audio: data.audio, language: data.language, stream: true, diff --git a/examples/ts-react-chat/src/lib/use-realtime.ts b/examples/ts-react-chat/src/lib/use-realtime.ts index 848c702ca..620c2804f 100644 --- a/examples/ts-react-chat/src/lib/use-realtime.ts +++ b/examples/ts-react-chat/src/lib/use-realtime.ts @@ -6,9 +6,10 @@ import { elevenlabsRealtime, elevenlabsRealtimeToken, } from '@tanstack/ai-elevenlabs' +import { grokRealtime, grokRealtimeToken } from '@tanstack/ai-grok' import { realtimeClientTools } from '@/lib/realtime-tools' -type Provider = 'openai' | 'elevenlabs' +type Provider = 'openai' | 'elevenlabs' | 'grok' const getRealtimeTokenFn = createServerFn({ method: 'POST' }) .inputValidator((data: { provider: Provider; agentId?: string }) => { @@ -36,12 +37,30 @@ const getRealtimeTokenFn = createServerFn({ method: 'POST' }) }) } + if (data.provider === 'grok') { + return realtimeToken({ + adapter: grokRealtimeToken({ model: 'grok-voice-fast-1.0' }), + }) + } + throw new Error(`Unknown provider: ${data.provider}`) }) +function adapterForProvider(provider: Provider) { + switch (provider) { + case 'openai': + return openaiRealtime() + case 'elevenlabs': + return elevenlabsRealtime() + case 'grok': + return grokRealtime() + } +} + export function useRealtime({ provider, agentId, + voice, outputModalities, temperature, maxOutputTokens, @@ -49,14 +68,12 @@ export function useRealtime({ }: { provider: Provider agentId: string + voice?: string outputModalities?: Array<'audio' | 'text'> temperature?: number maxOutputTokens?: number | 'inf' semanticEagerness?: 'low' | 'medium' | 'high' }) { - const adapter = - provider === 'openai' ? openaiRealtime() : elevenlabsRealtime() - return useRealtimeChat({ getToken: () => getRealtimeTokenFn({ @@ -65,7 +82,7 @@ export function useRealtime({ ...(provider === 'elevenlabs' && agentId ? { agentId } : {}), }, }), - adapter, + adapter: adapterForProvider(provider), instructions: `You are a helpful, friendly voice assistant with access to several tools. You can: @@ -78,7 +95,7 @@ Keep your responses concise and conversational since this is a voice interface. When using tools, briefly explain what you're doing and then share the results naturally. If the user sends an image, describe what you see and answer any questions about it. Be friendly and engaging!`, - voice: 'alloy', + voice: voice ?? (provider === 'grok' ? 'eve' : 'alloy'), tools: realtimeClientTools, outputModalities, temperature, diff --git a/examples/ts-react-chat/src/routes/api.generate.audio.ts b/examples/ts-react-chat/src/routes/api.generate.audio.ts index 23282cc0e..aade04c8b 100644 --- a/examples/ts-react-chat/src/routes/api.generate.audio.ts +++ b/examples/ts-react-chat/src/routes/api.generate.audio.ts @@ -1,7 +1,11 @@ import { createFileRoute } from '@tanstack/react-router' import { generateAudio, toServerSentEventsResponse } from '@tanstack/ai' import { z } from 'zod' -import { buildAudioAdapter } from '../lib/server-audio-adapters' +import { + InvalidModelOverrideError, + UnknownProviderError, + buildAudioAdapter, +} from '../lib/server-audio-adapters' const AUDIO_PROVIDER_SCHEMA = z .enum(['gemini-lyria', 'fal-audio', 'fal-sfx']) @@ -66,6 +70,28 @@ export const Route = createFileRoute('/api/generate/audio')({ return toServerSentEventsResponse(stream) } catch (err) { + if (err instanceof InvalidModelOverrideError) { + return jsonError(400, { + error: 'invalid_model_override', + message: err.message, + provider: err.providerId, + requestedModel: err.requestedModel, + allowedModels: err.allowedModels, + }) + } + // Defense-in-depth: the Zod enum schema above should already reject + // unknown providers, but surface a typed 400 here in case that + // validation drifts or is bypassed. + if (err instanceof UnknownProviderError) { + return jsonError(400, { + error: 'unknown_provider', + message: err.message, + // Use `provider` consistently with the invalid_model_override + // branch and the request body's `provider` field. + provider: err.providerId, + allowedProviders: err.allowedProviders, + }) + } return jsonError(500, { error: 'generation_failed', message: diff --git a/examples/ts-react-chat/src/routes/api.generate.speech.ts b/examples/ts-react-chat/src/routes/api.generate.speech.ts index 12a621d3b..92057ad4f 100644 --- a/examples/ts-react-chat/src/routes/api.generate.speech.ts +++ b/examples/ts-react-chat/src/routes/api.generate.speech.ts @@ -1,9 +1,15 @@ import { createFileRoute } from '@tanstack/react-router' import { generateSpeech, toServerSentEventsResponse } from '@tanstack/ai' import { z } from 'zod' -import { buildSpeechAdapter } from '../lib/server-audio-adapters' +import { + InvalidModelOverrideError, + UnknownProviderError, + buildSpeechAdapter, +} from '../lib/server-audio-adapters' -const SPEECH_PROVIDER_SCHEMA = z.enum(['openai', 'gemini', 'fal']).optional() +const SPEECH_PROVIDER_SCHEMA = z + .enum(['openai', 'gemini', 'fal', 'grok']) + .optional() const SPEECH_BODY_SCHEMA = z.object({ text: z.string().min(1), @@ -65,6 +71,26 @@ export const Route = createFileRoute('/api/generate/speech')({ return toServerSentEventsResponse(stream) } catch (err) { + if (err instanceof InvalidModelOverrideError) { + return jsonError(400, { + error: 'invalid_model_override', + message: err.message, + provider: err.providerId, + requestedModel: err.requestedModel, + allowedModels: err.allowedModels, + }) + } + // Defense-in-depth: the Zod enum schema above should already reject + // unknown providers, but surface a typed 400 here in case that + // validation drifts or is bypassed. + if (err instanceof UnknownProviderError) { + return jsonError(400, { + error: 'unknown_provider', + message: err.message, + provider: err.providerId, + allowedProviders: err.allowedProviders, + }) + } return jsonError(500, { error: 'generation_failed', message: diff --git a/examples/ts-react-chat/src/routes/api.transcribe.ts b/examples/ts-react-chat/src/routes/api.transcribe.ts index 37e76ea7f..e6131ad32 100644 --- a/examples/ts-react-chat/src/routes/api.transcribe.ts +++ b/examples/ts-react-chat/src/routes/api.transcribe.ts @@ -1,9 +1,15 @@ import { createFileRoute } from '@tanstack/react-router' import { generateTranscription, toServerSentEventsResponse } from '@tanstack/ai' import { z } from 'zod' -import { buildTranscriptionAdapter } from '../lib/server-audio-adapters' +import { + InvalidModelOverrideError, + UnknownProviderError, + buildTranscriptionAdapter, +} from '../lib/server-audio-adapters' -const TRANSCRIPTION_PROVIDER_SCHEMA = z.enum(['openai', 'fal']).optional() +const TRANSCRIPTION_PROVIDER_SCHEMA = z + .enum(['openai', 'fal', 'grok']) + .optional() const TRANSCRIBE_BODY_SCHEMA = z.object({ audio: z.string().min(1), @@ -63,6 +69,26 @@ export const Route = createFileRoute('/api/transcribe')({ return toServerSentEventsResponse(stream) } catch (err) { + if (err instanceof InvalidModelOverrideError) { + return jsonError(400, { + error: 'invalid_model_override', + message: err.message, + provider: err.providerId, + requestedModel: err.requestedModel, + allowedModels: err.allowedModels, + }) + } + // Defense-in-depth: the Zod enum schema above should already reject + // unknown providers, but surface a typed 400 here in case that + // validation drifts or is bypassed. + if (err instanceof UnknownProviderError) { + return jsonError(400, { + error: 'unknown_provider', + message: err.message, + provider: err.providerId, + allowedProviders: err.allowedProviders, + }) + } return jsonError(500, { error: 'transcription_failed', message: diff --git a/examples/ts-react-chat/src/routes/realtime.tsx b/examples/ts-react-chat/src/routes/realtime.tsx index 3225249e2..bed289da1 100644 --- a/examples/ts-react-chat/src/routes/realtime.tsx +++ b/examples/ts-react-chat/src/routes/realtime.tsx @@ -13,14 +13,18 @@ import { import { AudioSparkline } from '@/components/AudioSparkline' import { useRealtime } from '@/lib/use-realtime' -type Provider = 'openai' | 'elevenlabs' +type Provider = 'openai' | 'elevenlabs' | 'grok' type OutputMode = 'audio+text' | 'text-only' | 'audio-only' const PROVIDER_OPTIONS: Array<{ value: Provider; label: string }> = [ { value: 'openai', label: 'OpenAI Realtime' }, { value: 'elevenlabs', label: 'ElevenLabs' }, + { value: 'grok', label: 'Grok Voice Agent' }, ] +const GROK_VOICES = ['eve', 'ara', 'rex', 'sal', 'leo'] as const +type GrokVoice = (typeof GROK_VOICES)[number] + const OUTPUT_MODE_OPTIONS: Array<{ value: OutputMode; label: string }> = [ { value: 'audio+text', label: 'Audio + Text' }, { value: 'text-only', label: 'Text Only' }, @@ -45,6 +49,7 @@ function outputModeToModalities( function RealtimePage() { const [provider, setProvider] = useState('openai') const [agentId, setAgentId] = useState('') + const [grokVoice, setGrokVoice] = useState('eve') const [textInput, setTextInput] = useState('') const [outputMode, setOutputMode] = useState('audio+text') const [temperature, setTemperature] = useState(0.8) @@ -73,6 +78,7 @@ function RealtimePage() { } = useRealtime({ provider, agentId, + voice: provider === 'grok' ? grokVoice : undefined, outputModalities: outputModeToModalities(outputMode), temperature, semanticEagerness, @@ -81,21 +87,69 @@ function RealtimePage() { // Handle image file selection const handleImageUpload = (e: React.ChangeEvent) => { const file = e.target.files?.[0] - if (!file) return + // Always reset the input up front so the same file can be selected + // again even if we bail below. + const resetInput = () => { + e.target.value = '' + } + if (!file) { + resetInput() + return + } + + // Bail if the file has no MIME type — in practice an empty `type` is + // a sign of a corrupt file or a browser that couldn't sniff it, and + // the OpenAI-compatible realtime API requires an explicit mime. + if (!file.type) { + // eslint-disable-next-line no-console + console.error('[realtime] Cannot send image: file has no MIME type', file) + window.alert( + 'Could not determine the image type. Please try a different file.', + ) + resetInput() + return + } const reader = new FileReader() + reader.onerror = () => { + // eslint-disable-next-line no-console + console.error('[realtime] FileReader failed', reader.error) + window.alert( + `Failed to read image file: ${reader.error?.message ?? 'Unknown error'}`, + ) + resetInput() + } reader.onload = () => { - const result = reader.result as string - // Extract base64 data (remove data:image/xxx;base64, prefix) - const base64 = result.split(',')[1] - if (base64) { - sendImage(base64, file.type) + const result = reader.result + // `result` is null on abort/error, and is an ArrayBuffer (not a + // string) if someone changes the readAs* method later. Guard both. + if (result == null || typeof result !== 'string') { + // eslint-disable-next-line no-console + console.error('[realtime] FileReader result was not a string', result) + window.alert('Failed to read image file: unexpected reader output.') + resetInput() + return } + // Extract base64 data (remove data:image/xxx;base64, prefix). A + // malformed data URL (no comma, or empty payload after the comma) + // means there's nothing sendable — surface it instead of silently + // no-op'ing. + const parts = result.split(',') + const base64 = parts[1] + if (!base64) { + // eslint-disable-next-line no-console + console.error( + '[realtime] FileReader produced a malformed data URL', + result.slice(0, 64), + ) + window.alert('Failed to read image file: malformed image data.') + resetInput() + return + } + sendImage(base64, file.type) + resetInput() } reader.readAsDataURL(file) - - // Reset input so the same file can be selected again - e.target.value = '' } // Auto-scroll to bottom when messages change @@ -195,8 +249,29 @@ function RealtimePage() { )} - {/* Output mode selector (OpenAI only) */} - {provider === 'openai' && ( + {/* Grok voice selector */} + {provider === 'grok' && ( +
+ + +
+ )} + + {/* Output mode selector (OpenAI-compatible realtime) */} + {(provider === 'openai' || provider === 'grok') && (