diff --git a/.gitignore b/.gitignore index b3369e5..f881919 100644 --- a/.gitignore +++ b/.gitignore @@ -159,6 +159,13 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +.files/ +.ruff_cache/ +reactome_github/ +reactomegit/ +get-pip.py +.DS_Store + .chainlit/translations/* !.chainlit/translations/en-US.json csv_files/ diff --git a/bin/chat-fastapi.py b/bin/chat-fastapi.py index b812a6e..b10696d 100644 --- a/bin/chat-fastapi.py +++ b/bin/chat-fastapi.py @@ -279,7 +279,7 @@ async def landing_page():
Choose Guest Access to try the chatbot out. Log In will give an increased query allowance and securely stores your chat history so you can save and continue conversations.
diff --git a/src/conversational_chain/graph.py b/src/conversational_chain/graph.py index 79ac262..e1e41de 100644 --- a/src/conversational_chain/graph.py +++ b/src/conversational_chain/graph.py @@ -37,8 +37,8 @@ class AdditionalContent(TypedDict): class ChatState(TypedDict): - input: str # User input text - query: str # LLM-generated query from user input + user_input: str # User input text + rephrased_input: str # LLM-generated query from user input chat_history: Annotated[list[BaseMessage], add_messages] context: list[Document] answer: str # primary LLM response that is streamed to the user @@ -103,21 +103,22 @@ async def preprocess( self, state: ChatState, config: RunnableConfig ) -> dict[str, str]: query: str = await self.rephrase_chain.ainvoke(state, config) - return {"query": query} + return {"rephrased_input": query} async def call_model( self, state: ChatState, config: RunnableConfig ) -> dict[str, Any]: result: dict[str, Any] = await self.rag_chain.ainvoke( { - "input": state["query"], + "input": state["rephrased_input"], + "user_input": state["user_input"], "chat_history": state["chat_history"], }, config, ) return { "chat_history": [ - HumanMessage(state["input"]), + HumanMessage(state["user_input"]), AIMessage(result["answer"]), ], "context": result["context"], @@ -130,7 +131,7 @@ async def postprocess( search_results: list[WebSearchResult] = [] if config["configurable"]["enable_postprocess"]: result: dict[str, Any] = await self.search_workflow.ainvoke( - {"question": state["query"], "generation": state["answer"]}, + {"question": state["rephrased_input"], "generation": state["answer"]}, config=RunnableConfig(callbacks=config["callbacks"]), ) search_results = result["search_results"] @@ -149,7 +150,7 @@ async def ainvoke( if self.graph is None: self.graph = await self.initialize() result: dict[str, Any] = await self.graph.ainvoke( - {"input": user_input}, + {"user_input": user_input}, config=RunnableConfig( callbacks=callbacks, configurable={ diff --git a/src/evaluation/evaluator.py b/src/evaluation/evaluator.py index 9f96b23..45b5abe 100644 --- a/src/evaluation/evaluator.py +++ b/src/evaluation/evaluator.py @@ -11,7 +11,8 @@ from langchain_community.retrievers import BM25Retriever from langchain_openai import ChatOpenAI, OpenAIEmbeddings from ragas import evaluate -from ragas.metrics import answer_relevancy, context_utilization, faithfulness +from ragas.metrics import (answer_relevancy, context_recall, + context_utilization, faithfulness) from conversational_chain.chain import create_rag_chain from reactome.metadata_info import descriptions_info, field_info @@ -34,6 +35,12 @@ def parse_arguments(): default="gpt-4o-mini", help="Language model to use for evaluation", ) + parser.add_argument( + "--rag_type", + choices=["basic", "advanced"], + required=True, + help="Type of RAG system to use for evaluation", + ) return parser.parse_args() @@ -50,8 +57,8 @@ def load_dataset(testset_path): raise ValueError(f"Error reading the Excel file: {e}") -def initialize_rag_chain(embeddings_directory, model_name): - """Initialize the RAG chain system.""" +def initialize_rag_chain_with_memory(embeddings_directory, model_name, rag_type): + """Initialize the RAGChainWithMemory system.""" llm = ChatOpenAI(temperature=0.0, verbose=True, model=model_name) retriever_list = [] @@ -60,7 +67,7 @@ def initialize_rag_chain(embeddings_directory, model_name): ) data = loader.load() bm25_retriever = BM25Retriever.from_documents(data) - bm25_retriever.k = 15 + bm25_retriever.k = 7 # Set up vectorstore SelfQuery retriever embedding = OpenAIEmbeddings(model="text-embedding-3-large") @@ -69,17 +76,22 @@ def initialize_rag_chain(embeddings_directory, model_name): embedding_function=embedding, ) + vectordb_retriever = vectordb.as_retriever(search_kwargs={"k": 7}) + selfq_retriever = SelfQueryRetriever.from_llm( llm=llm, vectorstore=vectordb, document_contents=descriptions_info["summations"], metadata_field_info=field_info["summations"], - search_kwargs={"k": 15}, + search_kwargs={"k": 7}, ) rrf_retriever = EnsembleRetriever( retrievers=[bm25_retriever, selfq_retriever], weights=[0.2, 0.8] ) - retriever_list.append(rrf_retriever) + if rag_type == "basic": + retriever_list.append(vectordb_retriever) + elif rag_type == "advanced": + retriever_list.append(rrf_retriever) reactome_retriever = MergerRetriever(retrievers=retriever_list) @@ -91,11 +103,15 @@ def initialize_rag_chain(embeddings_directory, model_name): def process_testset( - testset_path, qa_system, embeddings_directory, response_dir, eval_dir, model_name + testset_path, + qa_system, + embeddings_directory, + response_dir, + eval_dir, + model_name, + rag_type, ): """Process a single testset file.""" - args = parse_arguments() - testset = load_dataset(testset_path) questions = [item["question"] for item in testset] ground_truths = [item["ground_truth"] for item in testset] @@ -108,6 +124,11 @@ def process_testset( answers.append(response["answer"]) contexts.append([context.page_content for context in response["context"]]) + rag_response_dir = os.path.join(response_dir, rag_type) + rag_eval_dir = os.path.join(eval_dir, rag_type) + os.makedirs(rag_response_dir, exist_ok=True) + os.makedirs(rag_eval_dir, exist_ok=True) + # Save responses to an Excel file data = { "question": questions, @@ -117,8 +138,8 @@ def process_testset( } df_ans = pd.DataFrame(data) response_filename = os.path.join( - response_dir, - f"{os.path.splitext(os.path.basename(testset_path))[0]}_{args.model}_responses.xlsx", + rag_response_dir, + f"{os.path.splitext(os.path.basename(testset_path))[0]}_{model_name}_responses_{rag_type}.xlsx", ) df_ans.to_excel(response_filename, index=False) print(f"Responses saved to {response_filename}") @@ -128,13 +149,13 @@ def process_testset( result = evaluate( llm=ChatOpenAI(temperature=0.0, verbose=True, model="gpt-4o"), dataset=dataset, - metrics=[answer_relevancy, context_utilization, faithfulness], + metrics=[answer_relevancy, context_utilization, faithfulness, context_recall], ) # Save evaluation results to an Excel file evaluation_filename = os.path.join( - eval_dir, - f"{os.path.splitext(os.path.basename(testset_path))[0]}_{args.model}_evaluation.xlsx", + rag_eval_dir, + f"{os.path.splitext(os.path.basename(testset_path))[0]}_{model_name}_evaluation_{rag_type}.xlsx", ) df_eval = result.to_pandas() df_eval.to_excel(evaluation_filename, index=False) @@ -144,6 +165,7 @@ def process_testset( def main(): args = parse_arguments() model_name = args.model + rag_type = args.rag_type response_dir = os.path.join(args.testset_dir, "response") eval_dir = os.path.join(args.testset_dir, "evals") os.makedirs(response_dir, exist_ok=True) @@ -151,7 +173,9 @@ def main(): # Initialize RAG Chain embeddings_directory = "/Users/hmohammadi/Desktop/react_to_me_github/reactome_chatbot/embeddings/openai/text-embedding-3-large/reactome/Release90/summations" - qa_system = initialize_rag_chain(embeddings_directory, model_name) + qa_system = initialize_rag_chain_with_memory( + embeddings_directory, model_name, rag_type + ) # Iterate over all .xlsx files in the directory for filename in os.listdir(args.testset_dir): @@ -166,6 +190,7 @@ def main(): response_dir, eval_dir, model_name, + rag_type, ) diff --git a/src/retreival_chain.py b/src/retreival_chain.py index e37350a..ed7d43c 100644 --- a/src/retreival_chain.py +++ b/src/retreival_chain.py @@ -108,7 +108,7 @@ def create_retrieval_chain( loader = CSVLoader(file_path=reactome_csvs_dir / csv_file_name) data = loader.load() bm25_retriever = BM25Retriever.from_documents(data) - bm25_retriever.k = 15 + bm25_retriever.k = 10 # set up vectorstore SelfQuery retriever embedding = embedding_callable() @@ -123,7 +123,7 @@ def create_retrieval_chain( vectorstore=vectordb, document_contents=descriptions_info[subdirectory], metadata_field_info=field_info[subdirectory], - search_kwargs={"k": 15}, + search_kwargs={"k": 10}, ) rrf_retriever = EnsembleRetriever( retrievers=[bm25_retriever, selfq_retriever], weights=[0.2, 0.8] diff --git a/src/system_prompt/reactome_prompt.py b/src/system_prompt/reactome_prompt.py index 754f1c3..0984708 100644 --- a/src/system_prompt/reactome_prompt.py +++ b/src/system_prompt/reactome_prompt.py @@ -3,11 +3,14 @@ # Contextualize question prompt contextualize_q_system_prompt = """ You are an expert in question formulation with deep expertise in molecular biology and experience as a Reactome curator. Your task is to analyze the conversation history and the user’s latest query to fully understand their intent and what they seek to learn. -Reformulate the user’s question into a standalone version that retains its full meaning without requiring prior context. The reformulated question should be:** -Clear, concise, and precise -Optimized for both vector search (semantic meaning) and case-sensitive keyword search -Faithful to the user’s intent and scientific accuracy -If the user’s question is already self-contained and well-formed, return it as is. +If the user's question is not in English, reformulate the question and translate it to English, ensuring the meaning and intent are preserved. +Reformulate the user’s question into a standalone version that retains its full meaning without requiring prior context. The reformulated question should be: + - Clear, concise, and precise + - Optimized for both vector search (semantic meaning) and case-sensitive keyword search + - Faithful to the user’s intent and scientific accuracy + +the returned question should always be in English. +If the user’s question is already in English, self-contained and well-formed, return it as is. Do NOT answer the question or provide explanations. """ @@ -15,7 +18,7 @@ [ ("system", contextualize_q_system_prompt), MessagesPlaceholder(variable_name="chat_history"), - ("human", "{input}"), + ("human", "{user_input}"), ] ) @@ -46,6 +49,6 @@ [ ("system", qa_system_prompt), MessagesPlaceholder(variable_name="chat_history"), - ("user", "Context:\n{context}\n\nQuestion: {input}"), + ("user", "Context:\n{context}\n\nQuestion: {user_input}"), ] ) diff --git a/src/util/chainlit_helpers.py b/src/util/chainlit_helpers.py index afd07d3..c4b1bd5 100644 --- a/src/util/chainlit_helpers.py +++ b/src/util/chainlit_helpers.py @@ -83,9 +83,7 @@ async def message_rate_limited(config: Config | None) -> bool: quota_message = "Our servers are currently overloaded.\n" login_uri: str | None = os.getenv("CHAINLIT_URI_LOGIN", "") if login_uri: - quota_message += ( - f"Please [log in]({login_uri}) to continue chatting and enjoy features like saved chat history and fewer limits." - ) + quota_message += f"Please [log in]({login_uri}) to continue chatting and enjoy features like saved chat history and fewer limits." else: quota_message += "Please try again later." await send_messages([quota_message])