From 9d317b8bbfac9774d2ff31e7a658a1ee86ce4ffb Mon Sep 17 00:00:00 2001 From: louie-tsai Date: Fri, 27 Dec 2024 12:35:24 -0800 Subject: [PATCH 1/3] Enable OpenTelemetry Tracing for ChatQnA on Gaudi with TGI serving Signed-off-by: louie-tsai --- ChatQnA/README.md | 19 ++++++++++++++ .../intel/hpu/gaudi/compose.yaml | 26 ++++++++++++++++--- .../docker_compose/intel/hpu/gaudi/set_env.sh | 4 +++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/ChatQnA/README.md b/ChatQnA/README.md index a99391f27c..08cfa29d37 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -344,3 +344,22 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards ![chatqna dashboards](./assets/img/chatqna_dashboards.png) ![tgi dashboard](./assets/img/tgi_dashboard.png) + +## Tracing Services with OpenTelemetry Tracing and Jaeger + +> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature. + +OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed. + +Tracing data is exported to http://{EXTERNAL_IP}:4318/v1/traces via Jaeger. +Users could also get the external IP via below command. + +```bash +ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+' +``` + +For TGI serving on Gaudi, users could see different services like opea, TEI and TGI. +![Screenshot from 2024-12-27 11-58-18](https://github.com/user-attachments/assets/6126fa70-e830-4780-bd3f-83cb6eff064e) + +Here is a screenshot for one tracing of TGI serving request. +![Screenshot from 2024-12-27 11-26-25](https://github.com/user-attachments/assets/3a7c51c6-f422-41eb-8e82-c3df52cd48b8) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 170ab54353..2c16406a7e 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -25,6 +25,7 @@ services: INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-gaudi-server @@ -37,7 +38,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} container_name: retriever-redis-server @@ -55,9 +56,11 @@ services: INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT} restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/tei-gaudi:1.5.0 + privileged: true container_name: tei-reranking-gaudi-server ports: - "8808:80" @@ -76,9 +79,10 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${RERANK_MODEL_ID} --auto-truncate + command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + privileged: true container_name: tgi-gaudi-server ports: - "8005:80" @@ -101,7 +105,22 @@ services: cap_add: - SYS_NICE ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 + command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "9411:9411" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + COLLECTOR_ZIPKIN_HOST_PORT: 9411 + restart: unless-stopped chatqna-gaudi-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-gaudi-backend-server @@ -127,6 +146,7 @@ services: - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} - LLM_MODEL=${LLM_MODEL_ID} - LOGFLAG=${LOGFLAG} + - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} ipc: host restart: always chatqna-gaudi-ui-server: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh index e38174d6a7..fc4074e618 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -14,3 +14,7 @@ export INDEX_NAME="rag-redis" # Set it as a non-null string, such as true, if you want to enable logging facility, # otherwise, keep it as "" to disable it. export LOGFLAG="" +# Set OpenTelemetry Tracing Endpoint +export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') +export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 +export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces From 9383f72ebb02fd14818bb481db13f745a92f246c Mon Sep 17 00:00:00 2001 From: "Tsai, Louie" Date: Tue, 7 Jan 2025 00:11:45 -0800 Subject: [PATCH 2/3] fix for CI Signed-off-by: Tsai, Louie --- ChatQnA/tests/test_compose_on_gaudi.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index d81513ed35..9cfe519b87 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -36,6 +36,9 @@ function start_services() { export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 + export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces # Start Docker Containers sed -i "s|container_name: chatqna-gaudi-backend-server|container_name: chatqna-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml From 003995b71b82e3a45305f5019b841312ac011db8 Mon Sep 17 00:00:00 2001 From: "Tsai, Louie" Date: Wed, 8 Jan 2025 16:47:30 -0800 Subject: [PATCH 3/3] removed privilege mode due to security concern Signed-off-by: Tsai, Louie --- ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 2c16406a7e..8748a31b44 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -60,7 +60,6 @@ services: restart: unless-stopped tei-reranking-service: image: ghcr.io/huggingface/tei-gaudi:1.5.0 - privileged: true container_name: tei-reranking-gaudi-server ports: - "8808:80" @@ -82,7 +81,6 @@ services: command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.6 - privileged: true container_name: tgi-gaudi-server ports: - "8005:80"