From 334101938e242efa4561325fc79076b9fb72aad4 Mon Sep 17 00:00:00 2001 From: gaurav0107 Date: Sun, 7 Jun 2026 02:26:21 +0530 Subject: [PATCH] feat(web): provider-grouped ModelPicker + widen Luna provider check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Static MODEL_CATALOG per provider in web/src/lib/models.ts (4-6 models each: anthropic/openai/gemini/mistral/deepseek/groq). New renders a grouped setProvider(e.target.value as LunaJudgeRow["provider"])} - > - {PROVIDER_OPTIONS.map((p) => ( - - ))} - - - - setModel(e.target.value)} - /> - - + { + setProvider(p); + setModel(m); + }} + /> +

+ tip: pick ‘Custom…’ and type{" "} + stub/echo for the deterministic test path that + bypasses LiteLLM. +

per provider with the curated catalog plus a + * "custom..." option that flips to a free-text input. Uncontrolled + * value below the catalog is preserved as-is (so a typed + * `gemini/gemini-experimental-1206` round-trips even if it's not in + * the catalog yet). The escape hatch is the point: we curate for + * discoverability, not enforcement. + * + * The actual gateway dispatches `/`. Legacy bare + * names (`gpt-4o`, `claude-sonnet-4`) still work for back-compat with + * existing playground sessions; the api's _resolve_provider routes + * them. New picks via this component are always fully-qualified. + */ +export function ModelPicker({ + value, + onChange, + label, + ariaLabel, +}: { + value: string; + onChange: (next: string) => void; + /** Optional label for the field wrapper. If omitted, render just the inputs. */ + label?: string; + ariaLabel?: string; +}) { + // If the current value isn't in the catalog, expose a free-text mode + // so the user can edit it directly without losing it. + const inCatalog = ALL_MODELS.some((m) => m.value === value); + const initialCustom = value !== "" && !inCatalog; + const [customMode, setCustomMode] = useState(initialCustom); + + const select = ( + + ); + + const customInput = customMode ? ( + onChange(e.target.value)} + placeholder="provider/model-id" + className="mono" + style={{ width: "100%", marginTop: 6, fontSize: 13 }} + /> + ) : null; + + const inferredProvider = providerFromValue(value); + const opt = findModelOption(value); + const meta = customMode ? ( + + {inferredProvider + ? `routed to ${inferredProvider}` + : "use 'provider/model-id' so the gateway can route correctly"} + + ) : opt?.hint ? ( + + {opt.provider} · {opt.hint} + + ) : null; + + if (label) { + return ( + + ); + } + + return ( +
+ {select} + {customInput} + {meta} +
+ ); +} + +/** + * Compact provider+model split. Used by judge config / variant config + * shapes that store {provider, model} as separate columns. Dispatches + * the bare model id (no slash prefix) on the wire. Internally this is + * the same picker UI; we just split the chosen value. + */ +export function ProviderModelPicker({ + provider, + model, + onChange, +}: { + provider: Provider | "" | null; + model: string; + onChange: (next: { provider: Provider; model: string }) => void; +}) { + // Compose the catalog-shaped value; if the model has no provider + // prefix, pair it with the supplied provider for select rendering. + const composed = + model && provider + ? model.startsWith(`${provider}/`) + ? model + : `${provider}/${model}` + : ""; + return ( + { + const slash = next.indexOf("/"); + if (slash > 0) { + const p = next.slice(0, slash) as Provider; + const m = next.slice(slash + 1); + if (PROVIDERS.some((pp) => pp.value === p)) { + onChange({ provider: p, model: m }); + return; + } + } + // Custom value with no recognizable prefix: keep the existing + // provider, dispatch the raw model. The api will return + // bad_model if it's truly unknown, surfaced on the row. + if (provider) onChange({ provider, model: next }); + }} + /> + ); +} diff --git a/web/src/components/PlaygroundClient.tsx b/web/src/components/PlaygroundClient.tsx index 24160bf..6fa92f5 100644 --- a/web/src/components/PlaygroundClient.tsx +++ b/web/src/components/PlaygroundClient.tsx @@ -5,6 +5,8 @@ import Link from "next/link"; import { useRouter } from "next/navigation"; import { useMemo, useState, useTransition } from "react"; +import { ModelPicker } from "@/components/ModelPicker"; + /** * Interactive Playground canvas. * @@ -53,15 +55,6 @@ export interface PlaygroundSessionOut { const VAR_RE = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}/g; -const PRESET_MODELS: { value: string; label: string }[] = [ - { value: "claude-opus-4-7", label: "claude-opus-4-7" }, - { value: "claude-sonnet-4-6", label: "claude-sonnet-4-6" }, - { value: "claude-haiku-4-5-20251001", label: "claude-haiku-4-5" }, - { value: "gpt-4o", label: "gpt-4o" }, - { value: "gpt-4o-mini", label: "gpt-4o-mini" }, - { value: "stub-echo", label: "stub-echo (no api key)" }, -]; - function extractVariables(template: string): string[] { const out = new Set(); let match: RegExpExecArray | null; @@ -89,8 +82,8 @@ export function PlaygroundComposer({ const [variables, setVariables] = useState>({ text: "tracebility is a self-hosted LLM observability platform.", }); - const [model, setModel] = useState("stub-echo"); - const [modelB, setModelB] = useState("claude-haiku-4-5-20251001"); + const [model, setModel] = useState("anthropic/claude-sonnet-4-6"); + const [modelB, setModelB] = useState("openai/gpt-4o-mini"); const [temperature, setTemperature] = useState("0.7"); const [maxTokens, setMaxTokens] = useState("1024"); const [result, setResult] = useState(null); @@ -499,25 +492,13 @@ function ModelCard({ gap: 12, }} > - - - + {mode === "compare" ? ( - - - + ) : null} /`. This + * catalog is what the UI shows in pickers — a curated set that's known + * to work end-to-end through LiteLLM. New models can be typed in by + * hand (the picker has a "custom..." escape hatch); the catalog is + * about discoverability, not a hard allow-list. + * + * The `value` field is what gets sent to the api. For the playground's + * legacy bare-name path (e.g. `gpt-4o`, `claude-sonnet-4`) the gateway + * accepts both forms and `_resolve_provider` derives the provider from + * the prefix. + */ + +export type Provider = + | "anthropic" + | "openai" + | "gemini" + | "mistral" + | "deepseek" + | "groq"; + +export interface ModelOption { + /** What gets dispatched. `/`. */ + value: string; + /** What the user sees. Short. */ + label: string; + /** One-line tier hint. ≤32 chars. */ + hint?: string; +} + +export const PROVIDERS: { value: Provider; label: string }[] = [ + { value: "anthropic", label: "Anthropic" }, + { value: "openai", label: "OpenAI" }, + { value: "gemini", label: "Gemini" }, + { value: "mistral", label: "Mistral" }, + { value: "deepseek", label: "DeepSeek" }, + { value: "groq", label: "Groq" }, +]; + +/** + * Curated model catalog. Keep this list short — 4-6 per provider, the + * ones a researcher actually picks between. New releases get added here + * by hand (~quarterly). The "custom..." escape hatch in the picker + * covers anything not on the list. + */ +export const MODEL_CATALOG: Record = { + anthropic: [ + { value: "anthropic/claude-opus-4-7", label: "claude-opus-4-7", hint: "frontier reasoning" }, + { value: "anthropic/claude-sonnet-4-6", label: "claude-sonnet-4-6", hint: "balanced" }, + { value: "anthropic/claude-haiku-4-5-20251001", label: "claude-haiku-4-5", hint: "fast / cheap" }, + ], + openai: [ + { value: "openai/gpt-4o", label: "gpt-4o", hint: "flagship multimodal" }, + { value: "openai/gpt-4o-mini", label: "gpt-4o-mini", hint: "cheap" }, + { value: "openai/o3", label: "o3", hint: "deep reasoning" }, + { value: "openai/o4-mini", label: "o4-mini", hint: "reasoning, cheap" }, + ], + gemini: [ + { value: "gemini/gemini-2.5-pro", label: "gemini-2.5-pro", hint: "long context" }, + { value: "gemini/gemini-2.5-flash", label: "gemini-2.5-flash", hint: "fast" }, + { value: "gemini/gemini-1.5-pro", label: "gemini-1.5-pro", hint: "1M-token window" }, + { value: "gemini/gemini-1.5-flash", label: "gemini-1.5-flash", hint: "cheap" }, + ], + mistral: [ + { value: "mistral/mistral-large-latest", label: "mistral-large", hint: "flagship" }, + { value: "mistral/mistral-small-latest", label: "mistral-small", hint: "fast" }, + { value: "mistral/codestral-latest", label: "codestral", hint: "code" }, + ], + deepseek: [ + { value: "deepseek/deepseek-chat", label: "deepseek-chat", hint: "general" }, + { value: "deepseek/deepseek-reasoner", label: "deepseek-reasoner", hint: "reasoning" }, + ], + groq: [ + { value: "groq/llama-3.3-70b-versatile", label: "llama-3.3-70b", hint: "fast hosted" }, + { value: "groq/llama-3.1-8b-instant", label: "llama-3.1-8b-instant", hint: "cheap" }, + { value: "groq/mixtral-8x7b-32768", label: "mixtral-8x7b", hint: "32k context" }, + ], +}; + +/** Flattened view: [{provider, ...ModelOption}, ...] */ +export const ALL_MODELS: (ModelOption & { provider: Provider })[] = + PROVIDERS.flatMap((p) => + MODEL_CATALOG[p.value].map((m) => ({ ...m, provider: p.value })), + ); + +/** Return the option matching `value`, or undefined if none. */ +export function findModelOption( + value: string, +): (ModelOption & { provider: Provider }) | undefined { + return ALL_MODELS.find((m) => m.value === value); +} + +/** Extract provider from a `/` string. Returns null + * for legacy bare names (`gpt-4o`, `claude-sonnet-4`) since those + * are routed by the api's _resolve_provider, not by us. */ +export function providerFromValue(value: string): Provider | null { + const slash = value.indexOf("/"); + if (slash <= 0) return null; + const prefix = value.slice(0, slash); + return PROVIDERS.some((p) => p.value === prefix) ? (prefix as Provider) : null; +}