Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 9 additions & 51 deletions .chainlit/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ user_env = []
# Duration (in seconds) during which the session is saved when the connection is lost
session_timeout = 3600

# Duration (in seconds) of the user session expiry
user_session_timeout = 1296000 # 15 days

# Enable third parties caching (e.g LangChain cache)
cache = false

# Authorized origins
allow_origins = ["*"]

# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
# follow_symlink = false

[features]
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
unsafe_allow_html = true
Expand All @@ -39,29 +39,20 @@ edit_message = true
max_size_mb = 500

[features.audio]
# Threshold for audio recording
min_decibels = -45
# Delay for the user to start speaking in MS
initial_silence_timeout = 3000
# Delay for the user to continue speaking in MS. If the user stops speaking for this duration, the recording will stop.
silence_timeout = 1500
# Above this duration (MS), the recording will forcefully stop.
max_duration = 15000
# Duration of the audio chunks in MS
chunk_duration = 1000
# Sample rate of the audio
sample_rate = 44100
sample_rate = 24000

[UI]
# Name of the assistant.
name = "React-to-me"

# default_theme = "dark"

# layout = "wide"

# Description of the assistant. This is used for HTML tags.
# description = ""

# Large size content are by default collapsed for a cleaner ui
default_collapse_content = true

# Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full".
cot = "hidden"

Expand All @@ -76,9 +67,6 @@ github = "https://github.com/reactome/reactome_chatbot/issues"
# The Javascript file can be served from the public directory.
# custom_js = "/public/test.js"

# Specify a custom font url.
# custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"

# Specify a custom meta image url.
# custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png"

Expand All @@ -87,35 +75,5 @@ github = "https://github.com/reactome/reactome_chatbot/issues"
# Be careful: If this is a relative path, it should not start with a slash.
# custom_build = "./public/build"

[UI.theme]
default = "dark"
#layout = "wide"
#font_family = "Inter, sans-serif"
# Override default MUI light theme. (Check theme.ts)
[UI.theme.light]
#background = "#FAFAFA"
#paper = "#FFFFFF"

[UI.theme.light.primary]
main = "#2F9EC2"
dark = "#0F5462"
light = "#E1F4FC"
[UI.theme.light.text]
#primary = "#212121"
#secondary = "#616161"

# Override default MUI dark theme. (Check theme.ts)
[UI.theme.dark]
#background = "#FAFAFA"
#paper = "#FFFFFF"

[UI.theme.dark.primary]
main = "#2F9EC2"
dark = "#0F5462"
light = "#E1F4FC"
[UI.theme.dark.text]
#primary = "#EEEEEE"
#secondary = "#BDBDBD"

[meta]
generated_by = "1.3.1"
generated_by = "2.0.3"
41 changes: 41 additions & 0 deletions .config.schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
$schema: "https://json-schema.org/draft/2020-12/schema"
type: object
properties:
messages:
type: object
additionalProperties:
type: object
properties:
message:
type: string
enabled:
type: boolean
recipients:
type: array
items:
type: string
oneOf:
- pattern: "@.+\\..+"
- enum: ["all", "logged_in", "guests"]
trigger:
type: object
properties:
event:
type: string
enum: ["on_chat_start", "on_chat_end", "on_chat_resume", "on_message"]
after_messages:
type: integer
start:
type: string
format: date-time
end:
type: string
format: date-time
freq_max:
type: string
pattern: "^[0-9]+[smhdw]$"
anyOf:
- required: ["event"]
- required: ["after_messages"]
required: ["message", "trigger"]
required: ["messages"]
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,4 @@ cython_debug/
csv_files/
embeddings/
records/
config.yml
29 changes: 20 additions & 9 deletions bin/chat-chainlit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

import os
from typing import Any

import chainlit as cl
import chainlit.data as cl_data
Expand All @@ -10,10 +11,13 @@

from conversational_chain.graph import RAGGraphWithMemory
from retreival_chain import create_retrieval_chain
from util.chainlit_helpers import static_messages
from util.config_yml import Config, TriggerEvent
from util.embedding_environment import EmbeddingEnvironment
from util.logging import logging

load_dotenv()
config: Config | None = Config.from_yaml()

ENV = os.getenv("CHAT_ENV", "reactome")
logging.info(f"Selected environment: {ENV}")
Expand Down Expand Up @@ -58,29 +62,36 @@ async def chat_profile() -> list[cl.ChatProfile]:
async def start() -> None:
thread_id: str = cl.user_session.get("id")
cl.user_session.set("thread_id", thread_id)

chat_profile: str = cl.user_session.get("chat_profile")
initial_message = (
f"Welcome to {chat_profile}, your interactive chatbot for exploring Reactome!"
" Ask me about biological pathways and processes."
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved this to config.yml

)
await cl.Message(content=initial_message).send()
await static_messages(config, TriggerEvent.on_chat_start)


@cl.on_chat_resume
async def resume(thread: ThreadDict) -> None:
pass # ChainLit/LangGraph Postgres integrations handle everything
await static_messages(config, TriggerEvent.on_chat_resume)


@cl.on_chat_end
async def end() -> None:
await static_messages(config, TriggerEvent.on_chat_end)


@cl.on_message
async def main(message: cl.Message) -> None:
await static_messages(config, TriggerEvent.on_message)

message_count: int = cl.user_session.get("message_count", 0) + 1
cl.user_session.set("message_count", message_count)

thread_id: str = cl.user_session.get("thread_id")
cb = cl.AsyncLangchainCallbackHandler(
stream_final_answer=True,
force_stream_final_answer=True, # we're not using prefix tokens
)
await llm_graph.ainvoke(
result: dict[str, Any] = await llm_graph.ainvoke(
message.content,
callbacks=[cb],
thread_id=thread_id,
)
if len(result["additional_text"]) > 0:
await cl.Message(content=result["additional_text"]).send()
await static_messages(config, after_messages=message_count)
52 changes: 42 additions & 10 deletions bin/chat-fastapi.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import hashlib
import hmac
import os
from string import Template

import requests
from chainlit.utils import mount_chainlit
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request, Response
from fastapi import FastAPI, Request, Response
from fastapi.responses import RedirectResponse

load_dotenv()
Expand All @@ -17,6 +18,20 @@
CLOUDFLARE_SECRET_KEY = os.getenv("CLOUDFLARE_SECRET_KEY")
CLOUDFLARE_SITE_KEY = os.getenv("CLOUDFLARE_SITE_KEY")

ERROR_PAGE_TEMPLATE = Template(
f"""
<html>
<body>
<h1>$error_title</h1>
<p>If you believe this to be in error, please contact the maintainers to report an issue: help@reactome.org</p>
<form action="{CHAINLIT_URI}" method="get">
<button type="submit">Try again</button>
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking for feedback on this message. Should we include an email or link?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now it could be help@reactome.org

</form>
</body>
</html>
"""
)


def make_signature(value: str) -> str:
if CLOUDFLARE_SECRET_KEY is None:
Expand Down Expand Up @@ -56,6 +71,10 @@ async def verify_captcha_middleware(request: Request, call_next):
response = await call_next(request)
return response

if request.url.scheme == "http":
url = request.url.replace(scheme="https")
return RedirectResponse(url=str(url))

# Check if the user has completed the CAPTCHA verification
captcha_verified = request.cookies.get("captcha_verified")

Expand All @@ -69,18 +88,14 @@ async def verify_captcha_middleware(request: Request, call_next):

# Serve the CAPTCHA verification page (basic HTML form)
@app.get(f"{CHAINLIT_URI}/verify_captcha_page")
async def captcha_page(request: Request):

host = request.headers.get('X-Forwarded-Host', request.headers.get('Host'))
form_action = f"https://{host}{CHAINLIT_URI}/verify_captcha"
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted this since the new HTTPS middleware redirect should handle this

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true


async def captcha_page():
html_content = f"""
<html>
<head>
<script src="https://challenges.cloudflare.com/turnstile/v0/api.js" async defer></script>
</head>
<body>
<form id="captcha-form" action="{form_action}" method="post">
<form id="captcha-form" action="{CHAINLIT_URI}/verify_captcha" method="post">
<div class="cf-turnstile" data-sitekey="{os.getenv('CLOUDFLARE_SITE_KEY')}" data-callback="onSubmit"></div>
</form>
<script>
Expand All @@ -107,14 +122,28 @@ async def verify_captcha(request: Request):
form_data = await request.form()
cf_turnstile_response = form_data.get("cf-turnstile-response")
if not isinstance(cf_turnstile_response, str):
raise HTTPException(status_code=400, detail="CAPTCHA response is invalid")
error_html = ERROR_PAGE_TEMPLATE.substitute(
error_title="CAPTCHA response is invalid",
)
return Response(content=error_html, media_type="text/html", status_code=400)

client_ip: str
if request.client:
client_ip = request.client.host
elif "X-Forwarded-For" in request.headers:
client_ip = request.headers["X-Forwarded-For"].split(",")[0]
else:
error_html = ERROR_PAGE_TEMPLATE.substitute(
error_title="Could not determine client host",
)
return Response(content=error_html, media_type="text/html", status_code=400)

# Verify the CAPTCHA with Cloudflare
url = "https://challenges.cloudflare.com/turnstile/v0/siteverify"
data = {
"secret": os.getenv("CLOUDFLARE_SECRET_KEY"),
"response": cf_turnstile_response,
"remoteip": request.client.host if request.client else "127.0.0.1",
"remoteip": client_ip,
}

# Perform request to Cloudflare Turnstile verification endpoint
Expand All @@ -123,7 +152,10 @@ async def verify_captcha(request: Request):

# If CAPTCHA validation fails, return an error
if not result.get("success"):
raise HTTPException(status_code=400, detail="CAPTCHA verification failed")
error_html = ERROR_PAGE_TEMPLATE.substitute(
error_title="CAPTCHA verification failed",
)
return Response(content=error_html, media_type="text/html", status_code=400)

# Set a signed cookie to mark CAPTCHA as verified
cookie_value = create_secure_cookie(cf_turnstile_response)
Expand Down
31 changes: 22 additions & 9 deletions bin/export_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,27 @@ def build_query(since_timestamp: str | None) -> str:
if since_timestamp is None:
since_timestamp = ""
query = f"""
SELECT "threadId", "createdAt", name, type, output
SELECT
steps."threadId",
steps."createdAt",
steps.name,
steps.type,
steps.output,
feedbacks.value,
feedbacks.comment
FROM steps
LEFT JOIN
feedbacks ON steps."parentId" = feedbacks."forId"
WHERE
type IN ('user_message', 'assistant_message') AND
"createdAt" > '{since_timestamp}'
ORDER BY (
SELECT MIN("createdAt")
FROM steps s
WHERE s."threadId" = steps."threadId"
), "createdAt";
steps.type IN ('user_message', 'assistant_message') AND
steps."createdAt" > '{since_timestamp}'
ORDER BY
(
SELECT MIN(s."createdAt")
FROM steps s
WHERE s."threadId" = steps."threadId"
),
steps."createdAt";
"""
return query

Expand All @@ -47,6 +58,7 @@ def main(records_dir: Path):
with psycopg.connect(CHAINLIT_DB_URI) as conn:
with conn.cursor() as cur:
cur.execute(query)
header = [col.name for col in cur.description] if cur.description else None
records = cur.fetchall()

if len(records) == 0:
Expand All @@ -59,7 +71,8 @@ def main(records_dir: Path):

with open(record_file, mode="w", newline="") as file:
writer = csv.writer(file, lineterminator="\n")
writer.writerow(["threadId", "createdAt", "name", "type", "output"])
if header:
writer.writerow(header)
writer.writerows(records)

print("Wrote", record_file)
Expand Down
Loading