reactome · adamjohnwright · Jan 21, 2025 · Nov 29, 2024 · Dec 23, 2024 · Dec 24, 2024
diff --git a/.config.schema.yaml b/.config.schema.yaml
@@ -1,6 +1,19 @@
 $schema: "https://json-schema.org/draft/2020-12/schema"
 type: object
 properties:
+  features:
+    type: object
+    properties:
+      postprocessing:
+        type: object
+        properties:
+          enabled:
+            type: boolean
+          user_group:
+            type: string
+            enum: ["all", "logged_in"]
+        required: ["enabled"]
+    required: ["postprocessing"]
   messages:
     type: object
     additionalProperties:
@@ -38,4 +51,4 @@ properties:
             - required: ["event"]
             - required: ["after_messages"]
       required: ["message", "trigger"]
-required: ["messages"]
+required: ["features", "messages"]
diff --git a/bin/chat-chainlit.py b/bin/chat-chainlit.py
@@ -11,7 +11,7 @@
 
 from conversational_chain.graph import RAGGraphWithMemory
 from retreival_chain import create_retrieval_chain
-from util.chainlit_helpers import static_messages
+from util.chainlit_helpers import is_feature_enabled, static_messages
 from util.config_yml import Config, TriggerEvent
 from util.embedding_environment import EmbeddingEnvironment
 from util.logging import logging
@@ -87,11 +87,23 @@ async def main(message: cl.Message) -> None:
         stream_final_answer=True,
         force_stream_final_answer=True,  # we're not using prefix tokens
     )
+    enable_postprocess: bool = is_feature_enabled(config, "postprocessing")
     result: dict[str, Any] = await llm_graph.ainvoke(
         message.content,
         callbacks=[cb],
         thread_id=thread_id,
+        enable_postprocess=enable_postprocess,
     )
-    if len(result["additional_text"]) > 0:
-        await cl.Message(content=result["additional_text"]).send()
+    if (
+        enable_postprocess
+        and cb.final_stream
+        and len(result["additional_content"]["search_results"]) > 0
+    ):
+        sent_message: cl.Message = cb.final_stream
+        search_results_element = cl.CustomElement(
+            name="SearchResults",
+            props={"results": result["additional_content"]["search_results"]},
+        )
+        sent_message.elements = [search_results_element]  # type: ignore
+        await sent_message.update()
     await static_messages(config, after_messages=message_count)
diff --git a/bin/chat-fastapi.py b/bin/chat-fastapi.py
@@ -71,7 +71,7 @@ async def verify_captcha_middleware(request: Request, call_next):
         response = await call_next(request)
         return response
 
-    host = request.headers.get('referer')
+    host = request.headers.get("referer")
     if host and host.startswith("http:"):
         url = request.url.replace(scheme="https")
         return RedirectResponse(url=str(url))

diff --git a/config_default.yml b/config_default.yml
@@ -1,5 +1,10 @@
 # yaml-language-server: $schema=./.config.schema.yaml
 
+features:
+  postprocessing:  # external web search feature
+    enabled: true
+    user_group: all
+
 messages:
 
   welcome:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -18,6 +18,7 @@ services:
       - CHAINLIT_AUTH_SECRET=${CHAINLIT_AUTH_SECRET}
       - CHAINLIT_URI=${CHAINLIT_URI}
       - CHAINLIT_URL=${CHAINLIT_URL}
+      - TAVILY_API_KEY=${TAVILY_API_KEY}
     ports:
       - "8000:8000"
     depends_on:
@@ -40,6 +41,8 @@ services:
       - CLOUDFLARE_SECRET_KEY=${CLOUDFLARE_SECRET_KEY}
       - CLOUDFLARE_SITE_KEY=${CLOUDFLARE_SITE_KEY}
       - CHAINLIT_URI=${CHAINLIT_URI_NO_LOGIN}
+      - CHAINLIT_URL=${CHAINLIT_URL}
+      - TAVILY_API_KEY=${TAVILY_API_KEY}
     ports:
       - "8001:8000"
     depends_on:

diff --git a/poetry.lock b/poetry.lock
diff --git a/public/elements/SearchResults.jsx b/public/elements/SearchResults.jsx
@@ -0,0 +1,51 @@
+const getDomainFromUrl = (url) => {
+    const { hostname } = new URL(url);
+    return hostname;
+};
+
+const SearchResults = () => {
+    return (
+        <div>
+            <div class="prose lg:prose-xl">
+                <p class="leading-7 [&amp;:not(:first-child)]:mt-4 whitespace-pre-wrap break-words">
+                    Here are some external resources you may find helpful:
+                </p>
+            </div>
+            <div className="flex flex-col gap-2 p-4 pt-0">
+                {props.results.map((result) => (
+                    <a
+                        key={result.id}
+                        href={result.url}
+                        className="flex flex-col items-start gap-2 rounded-lg border p-3 text-left text-sm transition-all hover:bg-accent"
+                    >
+                        <div className="flex w-full flex-col gap-1">
+                            <div className="flex items-center">
+                                <div className="flex items-center gap-2">
+                                    <div className="font-semibold">
+                                        {result.title}
+                                    </div>
+                                </div>
+                                <div className="ml-auto text-xs text-muted-foreground">
+                                    {getDomainFromUrl(result.url)}
+                                </div>
+                            </div>
+                            <div
+                                className="text-xs text-muted-foreground"
+                                style={{  // line-clamp-2 class not working for some reason
+                                    display: '-webkit-box',
+                                    WebkitBoxOrient: 'vertical',
+                                    WebkitLineClamp: 2,
+                                    overflow: 'hidden',
+                                }}
+                            >
+                                {result.content.substring(0, 300)}
+                            </div>
+                        </div>
+                    </a>
+                ))}
+            </div>
+        </div>
+    )
+};
+
+export default SearchResults;
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ rank-bm25 = "^0.2.2"
 psycopg = {extras = ["binary"], version = "^3.2.3"}
 pydantic = "^2.10.5"
 pyyaml = "^6.0.2"
+tavily-python = "^0.5.0"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "^0.7.1"
@@ -54,6 +55,8 @@ isort = "^5.13.2"
 pandas-stubs = "^2.2.3.241009"
 types-requests = "^2.32.0.20241016"
 types-pyyaml = "^6.0.12.20241230"
+datasets = "^3.2.0"
+ragas = "^0.2.11"
 
 [[tool.poetry.source]]
 name = "PyPI"

diff --git a/src/conversational_chain/graph.py b/src/conversational_chain/graph.py
@@ -17,6 +17,8 @@
 from psycopg_pool import AsyncConnectionPool
 
 from conversational_chain.chain import create_rag_chain
+from external_search.state import WebSearchResult
+from external_search.workflow import create_search_workflow
 from util.logging import logging
 
 LANGGRAPH_DB_URI = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@postgres:5432/{os.getenv('POSTGRES_LANGGRAPH_DB')}?sslmode=disable"
@@ -30,21 +32,25 @@
     logging.warning("POSTGRES_LANGGRAPH_DB undefined; falling back to MemorySaver.")
 
 
-class ChatResponse(TypedDict):
-    chat_history: Annotated[Sequence[BaseMessage], add_messages]
-    context: list[Document]
-    answer: str  # primary LLM response that is streamed to the user
+class AdditionalContent(TypedDict):
+    search_results: list[WebSearchResult]
 
 
-class ChatState(ChatResponse):
+class ChatState(TypedDict):
     input: str
-    additional_text: str  # additional text to send after graph completes
+    chat_history: Annotated[Sequence[BaseMessage], add_messages]
+    context: list[Document]
+    answer: str  # primary LLM response that is streamed to the user
+    additional_content: (
+        AdditionalContent  # additional content to send after graph completes
+    )
 
 
 class RAGGraphWithMemory:
     def __init__(self, retriever: BaseRetriever, llm: BaseChatModel) -> None:
         # Set up runnables
         self.rag_chain: Runnable = create_rag_chain(llm, retriever)
+        self.search_workflow: CompiledStateGraph = create_search_workflow(llm)
 
         # Create graph
         state_graph: StateGraph = StateGraph(ChatState)
@@ -91,35 +97,48 @@ async def close_pool(self) -> None:
 
     async def call_model(
         self, state: ChatState, config: RunnableConfig
-    ) -> ChatResponse:
-        response = await self.rag_chain.ainvoke(state, config)
+    ) -> dict[str, Any]:
+        result = await self.rag_chain.ainvoke(state, config)
         return {
             "chat_history": [
                 HumanMessage(state["input"]),
-                AIMessage(response["answer"]),
+                AIMessage(result["answer"]),
             ],
-            "context": response["context"],
-            "answer": response["answer"],
+            "context": result["context"],
+            "answer": result["answer"],
         }
 
     async def postprocess(
-        self, state: ChatResponse, config: RunnableConfig
-    ) -> dict[str, str]:
-        # TODO: add completeness checking flow here
+        self, state: ChatState, config: RunnableConfig
+    ) -> dict[str, dict[str, list[WebSearchResult]]]:
+        search_results: list[WebSearchResult] = []
+        if config["configurable"]["enable_postprocess"]:
+            result: dict[str, Any] = await self.search_workflow.ainvoke(
+                {"question": state["input"], "generation": state["answer"]},
+            )
+            search_results = result["search_results"]
         return {
-            "additional_text": "",
+            "additional_content": {"search_results": search_results},
         }
 
     async def ainvoke(
-        self, user_input: str, callbacks: Callbacks, thread_id: str
+        self,
+        user_input: str,
+        *,
+        callbacks: Callbacks,
+        thread_id: str,
+        enable_postprocess: bool = True,
     ) -> dict[str, Any]:
         if self.graph is None:
             self.graph = await self.initialize()
-        response: dict[str, Any] = await self.graph.ainvoke(
+        result: dict[str, Any] = await self.graph.ainvoke(
             {"input": user_input},
             config=RunnableConfig(
                 callbacks=callbacks,
-                configurable={"thread_id": thread_id},
+                configurable={
+                    "thread_id": thread_id,
+                    "enable_postprocess": enable_postprocess,
+                },
             ),
         )
-        return response
+        return result