diff --git a/packages/tale_knowledge/src/tale_knowledge/extraction/__init__.py b/packages/tale_knowledge/src/tale_knowledge/extraction/__init__.py
index ca72963dfd..52065d4405 100644
--- a/packages/tale_knowledge/src/tale_knowledge/extraction/__init__.py
+++ b/packages/tale_knowledge/src/tale_knowledge/extraction/__init__.py
@@ -1,5 +1,5 @@
"""File text extraction modules."""
-from .router import extract_text
+from .router import ProgressCallback, extract_text
-__all__ = ["extract_text"]
+__all__ = ["ProgressCallback", "extract_text"]
diff --git a/packages/tale_knowledge/src/tale_knowledge/extraction/pdf.py b/packages/tale_knowledge/src/tale_knowledge/extraction/pdf.py
index 1bef86ae24..b48da93548 100644
--- a/packages/tale_knowledge/src/tale_knowledge/extraction/pdf.py
+++ b/packages/tale_knowledge/src/tale_knowledge/extraction/pdf.py
@@ -8,6 +8,7 @@
from __future__ import annotations
import asyncio
+from collections.abc import Callable
from functools import partial
from typing import TYPE_CHECKING
@@ -19,6 +20,8 @@
if TYPE_CHECKING:
from tale_knowledge.vision.client import VisionClient
+ProgressCallback = Callable[[int, int], None] # (pages_done, total_pages)
+
LARGE_IMAGE_RATIO = 0.5
MAX_PAGES = 2000
DEFAULT_PAGE_CONCURRENCY = 8
@@ -139,6 +142,7 @@ async def extract_text_from_pdf_bytes(
vision_client: VisionClient | None = None,
process_images: bool = True,
max_pages: int = MAX_PAGES,
+ on_progress: ProgressCallback | None = None,
) -> tuple[str, bool]:
"""Extract text from PDF bytes.
@@ -148,6 +152,8 @@ async def extract_text_from_pdf_bytes(
vision_client: Optional VisionClient for OCR/image description.
process_images: Whether to extract and describe embedded images.
max_pages: Maximum number of pages to process.
+ on_progress: Optional callback ``(pages_done, total_pages)`` invoked
+ after each page completes. Safe to call from concurrent tasks.
Returns:
Tuple of (extracted_text, vision_was_used).
@@ -182,7 +188,10 @@ async def extract_text_from_pdf_bytes(
finally:
doc.close()
+ pages_done = 0
+
async def process_page(page_num: int, page_bytes: bytes) -> tuple[int, str, bool]:
+ nonlocal pages_done
async with page_semaphore:
content, vis_used = await _extract_page_with_layout(
page_bytes,
@@ -191,6 +200,9 @@ async def process_page(page_num: int, page_bytes: bytes) -> tuple[int, str, bool
vision_client,
process_images,
)
+ pages_done += 1
+ if on_progress is not None:
+ on_progress(pages_done, pages_to_process)
return page_num, f"--- Page {page_num + 1} ---\n{content}", vis_used
tasks = [process_page(pn, pb) for pn, pb in page_data]
diff --git a/packages/tale_knowledge/src/tale_knowledge/extraction/router.py b/packages/tale_knowledge/src/tale_knowledge/extraction/router.py
index 5aa11d9e62..e16e2cdcd7 100644
--- a/packages/tale_knowledge/src/tale_knowledge/extraction/router.py
+++ b/packages/tale_knowledge/src/tale_knowledge/extraction/router.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+from collections.abc import Callable
from pathlib import Path
from typing import TYPE_CHECKING
@@ -33,12 +34,16 @@ def is_supported(filename: str) -> bool:
return Path(filename).suffix.lower() in ALL_SUPPORTED_EXTENSIONS
+ProgressCallback = Callable[[int, int], None]
+
+
async def extract_text(
file_bytes: bytes,
filename: str,
*,
vision_client: VisionClient | None = None,
process_images: bool = True,
+ on_progress: ProgressCallback | None = None,
) -> tuple[str, bool]:
"""Extract text from file bytes, routing to the correct extractor.
@@ -47,6 +52,8 @@ async def extract_text(
filename: Original filename (used to determine file type).
vision_client: Optional VisionClient for OCR/image description.
process_images: Whether to extract and describe embedded images.
+ on_progress: Optional callback ``(done, total)`` for page-level progress
+ (currently only used by PDF extraction).
Returns:
Tuple of (extracted_text, vision_was_used).
@@ -65,6 +72,7 @@ async def extract_text(
filename,
vision_client=vision_client,
process_images=process_images,
+ on_progress=on_progress,
)
if suffix in DOCX_EXTENSIONS:
diff --git a/services/crawler/app/models.py b/services/crawler/app/models.py
index 1c81162a2c..9d5a51c31d 100644
--- a/services/crawler/app/models.py
+++ b/services/crawler/app/models.py
@@ -207,6 +207,18 @@ class HtmlToDocxRequest(BaseModel):
# ==================== PPTX Models ====================
+class MarkdownToPptxRequest(BaseModel):
+ """Request to convert Markdown to PPTX."""
+
+ content: str = Field(..., description="Markdown content to convert")
+
+
+class HtmlToPptxRequest(BaseModel):
+ """Request to convert HTML to PPTX."""
+
+ html: str = Field(..., description="HTML content to convert")
+
+
class TableData(BaseModel):
"""Table data for PPTX generation."""
diff --git a/services/crawler/app/routers/pptx.py b/services/crawler/app/routers/pptx.py
index 9652f85500..9c512a73d0 100644
--- a/services/crawler/app/routers/pptx.py
+++ b/services/crawler/app/routers/pptx.py
@@ -6,11 +6,14 @@
import json
from fastapi import APIRouter, File, Form, HTTPException, UploadFile, status
+from fastapi.responses import Response
from loguru import logger
from app.models import (
FileMetadataResponse,
GeneratePptxResponse,
+ HtmlToPptxRequest,
+ MarkdownToPptxRequest,
ParseFileResponse,
)
from app.services.file_parser_service import get_file_parser_service
@@ -133,6 +136,88 @@ async def generate_pptx_from_json(
)
+_PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+
+
+@router.post("/from-markdown")
+async def convert_markdown_to_pptx(request: MarkdownToPptxRequest):
+ """
+ Convert Markdown content to PPTX.
+
+ Parses markdown into HTML, then extracts slide structure (headings become
+ slide titles, lists become bullet points, etc.) and generates a PowerPoint.
+
+ Args:
+ request: Markdown content
+
+ Returns:
+ PPTX file as binary response
+ """
+ try:
+ from app.services.base_converter import BaseConverterService
+ from app.services.html_to_pptx_converter import html_to_slides
+
+ converter = BaseConverterService()
+ html = await converter.markdown_to_html(request.content)
+ slides_content = html_to_slides(html)
+
+ template_service = get_template_service()
+ pptx_bytes = await template_service.generate_pptx_from_content(
+ slides_content=slides_content,
+ )
+
+ return Response(
+ content=pptx_bytes,
+ media_type=_PPTX_CONTENT_TYPE,
+ headers={"Content-Disposition": "attachment; filename=presentation.pptx"},
+ )
+
+ except Exception:
+ logger.exception("Error converting markdown to PPTX")
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail="Failed to convert markdown to PPTX",
+ ) from None
+
+
+@router.post("/from-html")
+async def convert_html_to_pptx(request: HtmlToPptxRequest):
+ """
+ Convert HTML content to PPTX.
+
+ Parses HTML to extract slide structure (h1/h2 headings become slide titles,
+ lists become bullet points, tables preserved) and generates a PowerPoint.
+
+ Args:
+ request: HTML content
+
+ Returns:
+ PPTX file as binary response
+ """
+ try:
+ from app.services.html_to_pptx_converter import html_to_slides
+
+ slides_content = html_to_slides(request.html)
+
+ template_service = get_template_service()
+ pptx_bytes = await template_service.generate_pptx_from_content(
+ slides_content=slides_content,
+ )
+
+ return Response(
+ content=pptx_bytes,
+ media_type=_PPTX_CONTENT_TYPE,
+ headers={"Content-Disposition": "attachment; filename=presentation.pptx"},
+ )
+
+ except Exception:
+ logger.exception("Error converting HTML to PPTX")
+ raise HTTPException(
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+ detail="Failed to convert HTML to PPTX",
+ ) from None
+
+
@router.post("/parse", response_model=ParseFileResponse)
async def parse_pptx_file(
file: UploadFile = _FILE_UPLOAD,
diff --git a/services/crawler/app/services/html_to_pptx_converter.py b/services/crawler/app/services/html_to_pptx_converter.py
new file mode 100644
index 0000000000..3979bbc89f
--- /dev/null
+++ b/services/crawler/app/services/html_to_pptx_converter.py
@@ -0,0 +1,235 @@
+"""
+HTML to PPTX slide converter.
+
+Parses HTML content and converts it into structured slide dicts
+that can be passed to PptxService.generate_pptx_from_content().
+
+Uses BeautifulSoup for HTML parsing. Each top-level heading (h1/h2)
+starts a new slide; content between headings becomes bullet points
+or text content on that slide.
+"""
+
+import logging
+import re
+from typing import Any
+
+from bs4 import BeautifulSoup, NavigableString, Tag
+
+logger = logging.getLogger(__name__)
+
+# Heading tags that start a new slide
+_SLIDE_BREAK_TAGS = {"h1", "h2"}
+
+# Tags to skip entirely
+_SKIP_TAGS = {"script", "style", "meta", "link", "head"}
+
+
+def _get_text(element: Tag) -> str:
+ """Extract clean text from an element, collapsing whitespace."""
+ text = element.get_text(separator=" ", strip=True)
+ return re.sub(r"\s+", " ", text).strip()
+
+
+def _parse_list_items(list_tag: Tag) -> list[str]:
+ """Extract text from
children of a list tag."""
+ items: list[str] = []
+ for li in list_tag.find_all("li", recursive=False):
+ text = _get_text(li)
+ if text:
+ items.append(text)
+ return items
+
+
+def _parse_table(table_tag: Tag) -> dict[str, Any] | None:
+ """Parse an HTML table into headers and rows."""
+ headers: list[str] = []
+ rows: list[list[str]] = []
+
+ thead = table_tag.find("thead")
+ if thead:
+ for th in thead.find_all("th"):
+ headers.append(_get_text(th))
+
+ tbody = table_tag.find("tbody") or table_tag
+ for tr in tbody.find_all("tr", recursive=False):
+ cells = tr.find_all(["td", "th"])
+ if not cells:
+ continue
+
+ if not headers and all(cell.name == "th" for cell in cells):
+ headers = [_get_text(cell) for cell in cells]
+ continue
+
+ row = [_get_text(cell) for cell in cells]
+ rows.append(row)
+
+ if not headers and not rows:
+ return None
+
+ if not headers and rows:
+ col_count = max(len(r) for r in rows)
+ headers = [f"Column {i + 1}" for i in range(col_count)]
+
+ for i, row in enumerate(rows):
+ if len(row) < len(headers):
+ rows[i] = row + [""] * (len(headers) - len(row))
+ elif len(row) > len(headers):
+ rows[i] = row[: len(headers)]
+
+ return {"headers": headers, "rows": rows}
+
+
+def _flush_slide(
+ slides: list[dict[str, Any]],
+ title: str | None,
+ subtitle: str | None,
+ text_content: list[str],
+ bullet_points: list[str],
+ tables: list[dict[str, Any]],
+) -> None:
+ """Flush accumulated content into a slide dict."""
+ if not title and not text_content and not bullet_points and not tables:
+ return
+
+ slide: dict[str, Any] = {}
+ if title:
+ slide["title"] = title
+ if subtitle:
+ slide["subtitle"] = subtitle
+ if text_content:
+ slide["textContent"] = text_content
+ if bullet_points:
+ slide["bulletPoints"] = bullet_points
+ if tables:
+ slide["tables"] = tables
+
+ slides.append(slide)
+
+
+def _collect_content(
+ element: Tag,
+ text_content: list[str],
+ bullet_points: list[str],
+ tables: list[dict[str, Any]],
+) -> None:
+ """Collect content from an element into the appropriate lists."""
+ tag_name = element.name.lower()
+
+ if tag_name in _SKIP_TAGS:
+ return
+
+ # Lists become bullet points
+ if tag_name in ("ul", "ol"):
+ items = _parse_list_items(element)
+ bullet_points.extend(items)
+ return
+
+ # Tables
+ if tag_name == "table":
+ table_data = _parse_table(element)
+ if table_data:
+ tables.append(table_data)
+ return
+
+ # Container tags — recurse into children
+ if tag_name in ("div", "section", "article", "main", "header", "footer", "nav", "aside"):
+ for child in element.children:
+ if isinstance(child, NavigableString):
+ text = child.strip()
+ if text:
+ text_content.append(text)
+ elif isinstance(child, Tag):
+ _collect_content(child, text_content, bullet_points, tables)
+ return
+
+ # Sub-headings (h3-h6) become bold text content within a slide
+ if tag_name in ("h3", "h4", "h5", "h6"):
+ text = _get_text(element)
+ if text:
+ text_content.append(text)
+ return
+
+ # Code blocks
+ if tag_name == "pre":
+ code_tag = element.find("code")
+ text = code_tag.get_text() if code_tag else element.get_text()
+ if text.strip():
+ text_content.append(text.strip())
+ return
+
+ # Paragraph and everything else with text
+ text = _get_text(element)
+ if text:
+ text_content.append(text)
+
+
+def html_to_slides(html: str) -> list[dict[str, Any]]:
+ """
+ Convert HTML content to a list of slide content dicts for PptxService.
+
+ Each h1/h2 heading starts a new slide. Content between headings
+ becomes textContent or bulletPoints on that slide.
+
+ Returns:
+ List of slide dicts with title, subtitle, textContent, bulletPoints, tables.
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ body = soup.find("body") or soup
+
+ slides: list[dict[str, Any]] = []
+
+ # Current slide accumulation
+ current_title: str | None = None
+ current_subtitle: str | None = None
+ current_text: list[str] = []
+ current_bullets: list[str] = []
+ current_tables: list[dict[str, Any]] = []
+
+ for child in body.children:
+ if isinstance(child, NavigableString):
+ text = child.strip()
+ if text:
+ current_text.append(text)
+ continue
+
+ if not isinstance(child, Tag):
+ continue
+
+ tag_name = child.name.lower()
+
+ if tag_name in _SKIP_TAGS:
+ continue
+
+ # h1/h2 starts a new slide
+ if tag_name in _SLIDE_BREAK_TAGS:
+ # Flush previous slide
+ _flush_slide(slides, current_title, current_subtitle, current_text, current_bullets, current_tables)
+ current_title = _get_text(child)
+ current_subtitle = None
+ current_text = []
+ current_bullets = []
+ current_tables = []
+ continue
+
+ # h3 right after a title with no content yet becomes subtitle
+ if tag_name == "h3" and current_title and not current_text and not current_bullets and not current_subtitle:
+ current_subtitle = _get_text(child)
+ continue
+
+ _collect_content(child, current_text, current_bullets, current_tables)
+
+ # Flush final slide
+ _flush_slide(slides, current_title, current_subtitle, current_text, current_bullets, current_tables)
+
+ # If no slides were created (no headings found), create a single slide from all content
+ if not slides and (current_text or current_bullets or current_tables):
+ slide: dict[str, Any] = {"title": "Untitled Slide"}
+ if current_text:
+ slide["textContent"] = current_text
+ if current_bullets:
+ slide["bulletPoints"] = current_bullets
+ if current_tables:
+ slide["tables"] = current_tables
+ slides.append(slide)
+
+ return slides
diff --git a/services/db/init-scripts/03-create-knowledge-database.sql b/services/db/init-scripts/03-create-knowledge-database.sql
index 06d9749c34..d9d73d1398 100644
--- a/services/db/init-scripts/03-create-knowledge-database.sql
+++ b/services/db/init-scripts/03-create-knowledge-database.sql
@@ -171,6 +171,8 @@ CREATE TABLE IF NOT EXISTS private_knowledge.documents (
status TEXT NOT NULL DEFAULT 'processing' CHECK (status IN ('processing', 'completed', 'failed')),
error TEXT,
chunks_count INTEGER NOT NULL DEFAULT 0,
+ progress_phase TEXT,
+ progress_detail TEXT,
source_created_at TIMESTAMPTZ,
source_modified_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
diff --git a/services/db/migrations/db/migrations/20260408000001_create_semantic_cache_table.sql b/services/db/migrations/db/migrations/20260408000001_create_semantic_cache_table.sql
index 65d7a53f47..0d6459ef38 100644
--- a/services/db/migrations/db/migrations/20260408000001_create_semantic_cache_table.sql
+++ b/services/db/migrations/db/migrations/20260408000001_create_semantic_cache_table.sql
@@ -5,7 +5,7 @@
CREATE TABLE IF NOT EXISTS private_knowledge.semantic_cache (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
query_text TEXT NOT NULL,
- query_embedding vector NOT NULL,
+ query_embedding vector,
response_text TEXT NOT NULL,
metadata JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
@@ -14,10 +14,8 @@ CREATE TABLE IF NOT EXISTS private_knowledge.semantic_cache (
file_ids TEXT[] DEFAULT '{}'
);
--- HNSW index for fast cosine similarity lookups
-CREATE INDEX IF NOT EXISTS idx_semantic_cache_embedding
- ON private_knowledge.semantic_cache
- USING hnsw (query_embedding vector_cosine_ops);
+-- NOTE: HNSW index on query_embedding is created at runtime by the RAG
+-- service once the embedding dimensions are known (same pattern as chunks).
-- B-tree index for TTL cleanup
CREATE INDEX IF NOT EXISTS idx_semantic_cache_expires_at
diff --git a/services/db/migrations/db/migrations/20260411000001_add_document_progress_columns.sql b/services/db/migrations/db/migrations/20260411000001_add_document_progress_columns.sql
new file mode 100644
index 0000000000..619be703bb
--- /dev/null
+++ b/services/db/migrations/db/migrations/20260411000001_add_document_progress_columns.sql
@@ -0,0 +1,12 @@
+-- migrate:up
+-- Add progress tracking columns for document indexing status.
+
+ALTER TABLE private_knowledge.documents
+ ADD COLUMN IF NOT EXISTS progress_phase TEXT,
+ ADD COLUMN IF NOT EXISTS progress_detail TEXT;
+
+-- migrate:down
+
+ALTER TABLE private_knowledge.documents
+ DROP COLUMN IF EXISTS progress_phase,
+ DROP COLUMN IF EXISTS progress_detail;
diff --git a/services/platform/app/features/chat/components/chat-input.tsx b/services/platform/app/features/chat/components/chat-input.tsx
index cf08e087d9..ea4637e41b 100644
--- a/services/platform/app/features/chat/components/chat-input.tsx
+++ b/services/platform/app/features/chat/components/chat-input.tsx
@@ -43,6 +43,11 @@ interface ChatInputProps extends Omit<
uploadFiles: (files: File[]) => Promise;
removeAttachment: (fileId: Id<'_storage'>) => void;
clearAttachments: () => FileAttachment[];
+ isIndexing?: boolean;
+ indexingStatuses?: Map<
+ Id<'_storage'>,
+ { status?: string; error?: string; progress?: string }
+ >;
}
export function ChatInput({
@@ -60,6 +65,8 @@ export function ChatInput({
uploadFiles,
removeAttachment,
clearAttachments,
+ isIndexing = false,
+ indexingStatuses,
...restProps
}: ChatInputProps) {
const { t: tChat } = useT('chat');
@@ -84,7 +91,8 @@ export function ChatInput({
(!value.trim() && attachments.length === 0) ||
isLoading ||
disabled ||
- isUploading
+ isUploading ||
+ isIndexing
)
return;
@@ -224,13 +232,58 @@ export function ChatInput({
{middleEllipsis(attachment.fileName, 28)}
-
- {formatFileSize(attachment.fileSize)}
-
+ {(() => {
+ const info = indexingStatuses?.get(attachment.fileId);
+ const ragStatus = info?.status;
+ if (ragStatus === 'queued' || ragStatus === 'running') {
+ const raw = info?.progress;
+ // Convert "extracting 42/108" → "39%"
+ let progressLabel = tChat('indexing');
+ if (raw) {
+ const match = /(\d+)\/(\d+)/.exec(raw);
+ if (match) {
+ const pct = Math.round(
+ (Number(match[1]) / Number(match[2])) * 100,
+ );
+ progressLabel = `${pct}%`;
+ } else {
+ progressLabel = raw;
+ }
+ }
+ return (
+
+
+
+ {progressLabel}
+
+
+ );
+ }
+ if (ragStatus === 'failed') {
+ return (
+
+ {tChat('indexingFailed')}
+
+ );
+ }
+ return (
+
+ {formatFileSize(attachment.fileSize)}
+
+ );
+ })()}
)}
diff --git a/services/platform/app/features/chat/components/message-bubble.tsx b/services/platform/app/features/chat/components/message-bubble.tsx
index f2b263b8b3..7efe5fc751 100644
--- a/services/platform/app/features/chat/components/message-bubble.tsx
+++ b/services/platform/app/features/chat/components/message-bubble.tsx
@@ -137,7 +137,7 @@ function MessageBubbleComponent({
const contentRef = useRef(null);
const copyTimeoutRef = useRef(null);
- const { metadata } = useMessageMetadata(message.id);
+ const { metadata } = useMessageMetadata(message.id, message.threadId);
const { citations, hasCitations } = useCitations(metadata?.toolsUsage);
const citationNumbers = useMemo(() => new Set(citations.keys()), [citations]);
const citationsContextValue = useMemo(() => ({ citations }), [citations]);
diff --git a/services/platform/app/features/chat/components/source-cards.tsx b/services/platform/app/features/chat/components/source-cards.tsx
index d09d5ae83c..0468219968 100644
--- a/services/platform/app/features/chat/components/source-cards.tsx
+++ b/services/platform/app/features/chat/components/source-cards.tsx
@@ -1,17 +1,15 @@
'use client';
import { FileText, Globe, ChevronDown, ChevronUp } from 'lucide-react';
-import { memo, useState, useMemo, useCallback } from 'react';
+import { memo, useState, useCallback } from 'react';
-import { useConvexQuery } from '@/app/hooks/use-convex-query';
-import { useOrganizationId } from '@/app/hooks/use-organization-id';
-import { api } from '@/convex/_generated/api';
-import type { Id } from '@/convex/_generated/dataModel';
+import { ViewDialog } from '@/app/components/ui/dialog/view-dialog';
+import { Tooltip } from '@/app/components/ui/overlays/tooltip';
import { useT } from '@/lib/i18n/client';
-import { DocumentPreviewDialog } from '../../documents/components/document-preview-dialog';
import type { CitationInfo } from '../hooks/use-citations';
-import { getUniqueCitations } from '../hooks/use-citations';
+import type { SourceGroup } from '../hooks/use-citations';
+import { getUniqueSources } from '../hooks/use-citations';
const COLLAPSED_LIMIT = 3;
@@ -24,43 +22,159 @@ function getDomain(url: string): string {
}
interface SourceCardProps {
- citation: CitationInfo;
+ source: SourceGroup;
onClick: () => void;
}
-function SourceCard({ citation, onClick }: SourceCardProps) {
- const isWeb = citation.type === 'web';
+function SourceCard({ source, onClick }: SourceCardProps) {
+ const { t } = useT('chat');
+ const isWeb = source.type === 'web';
const Icon = isWeb ? Globe : FileText;
const title =
- citation.filename ??
- (citation.url ? getDomain(citation.url) : `Source ${citation.number}`);
- const subtitle = isWeb
- ? citation.url
- ? getDomain(citation.url)
- : undefined
- : citation.page != null
- ? `p. ${citation.page}`
- : undefined;
+ source.filename ??
+ (source.url
+ ? getDomain(source.url)
+ : t('citations.source', { number: String(source.number) }));
+ const chunkCount = source.chunkNumbers.length;
+
+ const tooltipContent = (
+
+ {title}
+ {chunkCount > 1 && (
+ {t('citations.chunkCount', { count: chunkCount })}
+ )}
+ {source.relevance != null && (
+
+ {t('citations.relevance', {
+ score: String(Math.round(source.relevance)),
+ })}
+
+ )}
+
+ );
+
+ return (
+
+
+
+ );
+}
+
+interface SourceDetailDialogProps {
+ source: SourceGroup | null;
+ onClose: () => void;
+}
+
+/**
+ * Normalize chunk content for display:
+ * - Convert literal `\n` sequences to real newlines
+ * - Collapse 3+ consecutive blank lines into 2
+ */
+function normalizeContent(raw: string): string {
+ return raw
+ .replace(/\\n/g, '\n')
+ .replace(/\n{3,}/g, '\n\n')
+ .trim();
+}
+
+function SourceDetailDialog({ source, onClose }: SourceDetailDialogProps) {
+ const { t } = useT('chat');
+ if (!source) return null;
+
+ const title =
+ source.filename ??
+ (source.url
+ ? getDomain(source.url)
+ : t('citations.source', { number: String(source.number) }));
+
+ const chunkCount = source.chunks.length;
return (
-