From e370e636a565a7826739865dd9ff7830c127580c Mon Sep 17 00:00:00 2001 From: letonghan Date: Mon, 17 Mar 2025 17:05:18 +0800 Subject: [PATCH 01/13] Refactor Translation example and merge with CodeTrans, update docker compose of rocm and xeon part Signed-off-by: letonghan --- .../docker_compose/amd/gpu/rocm/README.md | 30 ++++- .../docker_compose/amd/gpu/rocm/compose.yaml | 4 +- .../docker_compose/intel/cpu/xeon/README.md | 108 +++++++++++++++++- .../intel/cpu/xeon/compose.yaml | 24 ++-- .../intel/cpu/xeon/compose_tgi.yaml | 97 ++++++++++++++++ Translation/docker_compose/set_env.sh | 2 +- Translation/docker_image_build/build.yaml | 12 ++ Translation/translation.py | 52 +++++---- 8 files changed, 284 insertions(+), 45 deletions(-) create mode 100644 Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml diff --git a/Translation/docker_compose/amd/gpu/rocm/README.md b/Translation/docker_compose/amd/gpu/rocm/README.md index 38b0176349..ae5e5794ee 100644 --- a/Translation/docker_compose/amd/gpu/rocm/README.md +++ b/Translation/docker_compose/amd/gpu/rocm/README.md @@ -79,7 +79,9 @@ cd GenAIExamples/Translation/docker_compose/amd/gpu/rocm ### Set environments -In the file "GenAIExamples/Translation/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command +In the file "GenAIExamples/Translation/docker_compose/amd/gpu/rocm/set_env.sh " it is necessary to set the required values. Parameter assignments are specified in the comments for each variable setting command. + +if you need to start a Translation service for code (instead of texts), change the `TRANSLATION_LLM_MODEL_ID` in `set_env.sh` to "Qwen/Qwen2.5-Coder-7B-Instruct". ```bash chmod +x set_env.sh @@ -97,32 +99,54 @@ docker compose up -d ## Validate TGI service ```bash +# text translation curl http://${TRANSLATION_HOST_IP}:${TRANSLATIONS_TGI_SERVICE_PORT}/generate \ -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' +# code translation +curl http://${TRANSLATION_HOST_IP}:${TRANSLATIONS_TGI_SERVICE_PORT}/generate \ + -X POST \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' ``` ## Validate LLM service ```bash +# text translation curl http://${TRANSLATION_HOST_IP}:9000/v1/chat/completions \ -X POST \ -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' \ -H 'Content-Type: application/json' +# code translation +curl http://${TRANSLATION_HOST_IP}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' \ + -H 'Content-Type: application/json' ``` ## Validate MegaService ```bash +# text translation curl http://${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation -H "Content-Type: application/json" -d '{ - "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' +# code translation +curl http://${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` ## Validate Nginx service ```bash +# text translation curl http://${TRANSLATION_HOST_IP}:${TRANSLATION_NGINX_PORT}/v1/translation \ -H "Content-Type: application/json" \ - -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' +# code translation +curl http://${TRANSLATION_HOST_IP}:${TRANSLATION_NGINX_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` diff --git a/Translation/docker_compose/amd/gpu/rocm/compose.yaml b/Translation/docker_compose/amd/gpu/rocm/compose.yaml index 2ee2a9c2cb..399c59e81b 100644 --- a/Translation/docker_compose/amd/gpu/rocm/compose.yaml +++ b/Translation/docker_compose/amd/gpu/rocm/compose.yaml @@ -47,7 +47,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${TRANSLATION_TGI_LLM_ENDPOINT} + LLM_ENDPOINT: "http://translation-tgi-service" LLM_MODEL_ID: ${TRANSLATION_LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 @@ -66,7 +66,7 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${TRANSLATION_MEGA_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${TRANSLATION_LLM_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP="translation-llm" ipc: host restart: always translation-ui-server: diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md index 4a41cb5385..fb2e64e262 100644 --- a/Translation/docker_compose/intel/cpu/xeon/README.md +++ b/Translation/docker_compose/intel/cpu/xeon/README.md @@ -82,6 +82,37 @@ By default, the LLM model is set to a default value as listed below: Change the `LLM_MODEL_ID` below for your needs. +For users in China who are unable to download models directly from Huggingface, you can use [ModelScope](https://www.modelscope.cn/models) or a Huggingface mirror to download models. The vLLM/TGI can load the models either online or offline as described below: + +1. Online + + ```bash + export HF_TOKEN=${your_hf_token} + export HF_ENDPOINT="https://hf-mirror.com" + model_name="haoranxu/ALMA-13B" + # Start vLLM LLM Service + docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80 + # Start TGI LLM Service + docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name + ``` + +2. Offline + + - Search your model name in ModelScope. For example, check [this page](https://www.modelscope.cn/models/rubraAI/Mistral-7B-Instruct-v0.3/files) for model `haoranxu/ALMA-13B`. + + - Click on `Download this model` button, and choose one way to download the model to your local path `/path/to/model`. + + - Run the following command to start the LLM service. + + ```bash + export HF_TOKEN=${your_hf_token} + export model_path="/path/to/model" + # Start vLLM LLM Service + docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80 + # Start TGI LLM Service + docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data + ``` + ### Setup Environment Variables 1. Set the required environment variables: @@ -105,6 +136,8 @@ Change the `LLM_MODEL_ID` below for your needs. 3. Set up other environment variables: + If you want to start a code translation service (instead of text translation), change the `LLM_MODEL_ID` to "mistralai/Mistral-7B-Instruct-v0.3" in `set_env.sh`. + ```bash cd ../../../ source set_env.sh @@ -113,7 +146,19 @@ Change the `LLM_MODEL_ID` below for your needs. ### Start Microservice Docker Containers ```bash -docker compose up -d +cd GenAIExamples/Translation/docker_compose/intel/cpu/xeon +``` + +If use vLLM as the LLM serving backend. + +```bash +docker compose -f compose.yaml up -d +``` + +If use TGI as the LLM serving backend. + +```bash +docker compose -f compose_tgi.yaml up -d ``` > Note: The docker images will be automatically downloaded from `docker hub`: @@ -127,43 +172,98 @@ docker pull opea/nginx:latest ### Validate Microservices -1. TGI Service +1. LLM backend Service + + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready. + + Try the command below to check whether the LLM serving is ready. ```bash + # vLLM service + docker logs translation-xeon-vllm-service 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + ```bash + # TGI service + docker logs translation-xeon-tgi-service | grep Connected + # If the service is ready, you will get the response like below. + 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected + ``` + + Then try the `cURL` command below to validate services. + + ```bash + # either vLLM or TGI service + # text translation curl http://${host_ip}:8008/generate \ -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' + # code translation + curl http://${host_ip}:8008/v1/chat/completions \ + -X POST \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' ``` 2. LLM Microservice ```bash + # text translation curl http://${host_ip}:9000/v1/chat/completions \ -X POST \ -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' \ -H 'Content-Type: application/json' + # code translation + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' \ + -H 'Content-Type: application/json' ``` 3. MegaService ```bash + # text translation curl http://${host_ip}:8888/v1/translation -H "Content-Type: application/json" -d '{ - "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + # code translation + curl http://${host_ip}:8888/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` 4. Nginx Service ```bash + # text translation + curl http://${host_ip}:${NGINX_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + # code translation curl http://${host_ip}:${NGINX_PORT}/v1/translation \ -H "Content-Type: application/json" \ - -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service. ## 🚀 Launch the UI +### Launch with origin port + Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + ![project-screenshot](../../../../assets/img/trans_ui_init.png) ![project-screenshot](../../../../assets/img/trans_ui_select.png) + + +### Launch with Nginx + +If you want to launch the UI using Nginx, open this URL: `http://{host_ip}:{NGINX_PORT}` in your browser to access the frontend. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/71214938-819c-4979-89cb-c03d937cd7b5) + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/be543e96-ddcd-4ee0-9f2c-4e99fee77e37) diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 4b77d84484..9e8f038e4a 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -2,31 +2,30 @@ # SPDX-License-Identifier: Apache-2.0 services: - tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: translation-xeon-vllm-service ports: - "8008:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - host_ip: ${host_ip} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" healthcheck: test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] interval: 10s timeout: 10s retries: 100 - volumes: - - "${MODEL_CACHE:-./data}:/data" - shm_size: 1g - command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - container_name: llm-textgen-server + container_name: translation-xeon-llm-server depends_on: tgi-service: condition: service_healthy @@ -37,8 +36,9 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 diff --git a/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml new file mode 100644 index 0000000000..083715dbe7 --- /dev/null +++ b/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -0,0 +1,97 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + container_name: translation-xeon-tgi-service + ports: + - "8008:80" + volumes: + - "${MODEL_CACHE}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: translation-xeon-llm-server + depends_on: + tgi-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + restart: unless-stopped + translation-xeon-backend-server: + image: ${REGISTRY:-opea}/translation:${TAG:-latest} + container_name: translation-xeon-backend-server + depends_on: + - tgi-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + translation-xeon-ui-server: + image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest} + container_name: translation-xeon-ui-server + depends_on: + - translation-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + translation-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: translation-xeon-nginx-server + depends_on: + - translation-xeon-backend-server + - translation-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always +networks: + default: + driver: bridge diff --git a/Translation/docker_compose/set_env.sh b/Translation/docker_compose/set_env.sh index aa4b428f6e..723c39da10 100644 --- a/Translation/docker_compose/set_env.sh +++ b/Translation/docker_compose/set_env.sh @@ -8,7 +8,7 @@ popd > /dev/null export LLM_MODEL_ID="haoranxu/ALMA-13B" -export TGI_LLM_ENDPOINT="http://${host_ip}:8008" +export LLM_ENDPOINT="http://${host_ip}:8008" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} diff --git a/Translation/docker_image_build/build.yaml b/Translation/docker_image_build/build.yaml index 1dad29cb7c..4d4c16fa4f 100644 --- a/Translation/docker_image_build/build.yaml +++ b/Translation/docker_image_build/build.yaml @@ -23,6 +23,18 @@ services: dockerfile: comps/llms/src/text-generation/Dockerfile extends: translation image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + vllm: + build: + context: vllm + dockerfile: Dockerfile.cpu + extends: codetrans + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + vllm-gaudi: + build: + context: vllm-fork + dockerfile: Dockerfile.hpu + extends: codetrans + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} nginx: build: context: GenAIComps diff --git a/Translation/translation.py b/Translation/translation.py index 8a5d8aad6a..a5884fe0d7 100644 --- a/Translation/translation.py +++ b/Translation/translation.py @@ -1,18 +1,6 @@ -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import asyncio +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + import os from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType @@ -53,17 +41,35 @@ async def handle_request(self, request: Request): data = await request.json() language_from = data["language_from"] language_to = data["language_to"] - source_language = data["source_language"] - prompt_template = """ - Translate this from {language_from} to {language_to}: + source_data = data["source_data"] + translate_type = data["translate_type"] + if translate_type == "code": + prompt_template = """ + ### System: Please translate the following {language_from} codes into {language_to} codes. + + ### Original codes: + '''{language_from} + + {source_data} + + ''' + + ### Translated codes: + """ + elif translate_type == "text": + prompt_template = """ + Translate this from {language_from} to {language_to}: - {language_from}: - {source_language} + {language_from}: + {source_data} - {language_to}: - """ + {language_to}: + """ + else: + raise ValueError("Invalid translate_type") + prompt = prompt_template.format( - language_from=language_from, language_to=language_to, source_language=source_language + language_from=language_from, language_to=language_to, source_data=source_data ) result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"query": prompt}) for node, response in result_dict.items(): From becac5ed5c6d51c82caf32c5165f33c888f45a29 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Mar 2025 09:09:16 +0000 Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- Translation/docker_compose/intel/cpu/xeon/README.md | 3 +-- Translation/translation.py | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md index fb2e64e262..3b81ac4e35 100644 --- a/Translation/docker_compose/intel/cpu/xeon/README.md +++ b/Translation/docker_compose/intel/cpu/xeon/README.md @@ -136,7 +136,7 @@ For users in China who are unable to download models directly from Huggingface, 3. Set up other environment variables: - If you want to start a code translation service (instead of text translation), change the `LLM_MODEL_ID` to "mistralai/Mistral-7B-Instruct-v0.3" in `set_env.sh`. + If you want to start a code translation service (instead of text translation), change the `LLM_MODEL_ID` to "mistralai/Mistral-7B-Instruct-v0.3" in `set_env.sh`. ```bash cd ../../../ @@ -259,7 +259,6 @@ Open this URL `http://{host_ip}:5173` in your browser to access the frontend. ![project-screenshot](../../../../assets/img/trans_ui_init.png) ![project-screenshot](../../../../assets/img/trans_ui_select.png) - ### Launch with Nginx If you want to launch the UI using Nginx, open this URL: `http://{host_ip}:{NGINX_PORT}` in your browser to access the frontend. diff --git a/Translation/translation.py b/Translation/translation.py index a5884fe0d7..ed496fdc83 100644 --- a/Translation/translation.py +++ b/Translation/translation.py @@ -67,10 +67,8 @@ async def handle_request(self, request: Request): """ else: raise ValueError("Invalid translate_type") - - prompt = prompt_template.format( - language_from=language_from, language_to=language_to, source_data=source_data - ) + + prompt = prompt_template.format(language_from=language_from, language_to=language_to, source_data=source_data) result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs={"query": prompt}) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LLM. From de251750acb7d564412a3d3a66c64a574f6bdec2 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 10:46:56 +0800 Subject: [PATCH 03/13] modify test scripts of compose on xeon Signed-off-by: letonghan --- Translation/docker_image_build/build.yaml | 4 +- Translation/tests/test_compose_on_xeon.sh | 35 ++-- Translation/tests/test_compose_tgi_on_xeon.sh | 191 ++++++++++++++++++ 3 files changed, 209 insertions(+), 21 deletions(-) create mode 100644 Translation/tests/test_compose_tgi_on_xeon.sh diff --git a/Translation/docker_image_build/build.yaml b/Translation/docker_image_build/build.yaml index 4d4c16fa4f..f06c143e92 100644 --- a/Translation/docker_image_build/build.yaml +++ b/Translation/docker_image_build/build.yaml @@ -27,13 +27,13 @@ services: build: context: vllm dockerfile: Dockerfile.cpu - extends: codetrans + extends: translation image: ${REGISTRY:-opea}/vllm:${TAG:-latest} vllm-gaudi: build: context: vllm-fork dockerfile: Dockerfile.hpu - extends: codetrans + extends: translation image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} nginx: build: diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index 9e2ac58cb7..4ed56a5043 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -30,9 +30,14 @@ function build_docker_images() { cd $WORKPATH/docker_image_build git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone https://github.com/vllm-project/vllm.git && cd vllm + VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" + echo "Check out vLLM tag ${VLLM_VER}" + git checkout ${VLLM_VER} &> /dev/null + cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="translation translation-ui llm-textgen nginx" + service_list="translation translation-ui llm-textgen vllm nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu @@ -42,6 +47,8 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export http_proxy=${http_proxy} + export https_proxy=${http_proxy} export LLM_MODEL_ID="haoranxu/ALMA-13B" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} @@ -64,11 +71,11 @@ function start_services() { n=0 # wait long for llm model download until [[ "$n" -ge 500 ]]; do - docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log - if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then + docker logs translation-xeon-vllm-service > ${LOG_PATH}/vllm_service_start.log + if grep -q complete ${LOG_PATH}/vllm_service_start.log; then break fi - sleep 10s + sleep 5s n=$((n+1)) done } @@ -98,33 +105,23 @@ function validate_services() { docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 fi - sleep 1s + sleep 5s } function validate_microservices() { - # Check if the microservices are running correctly. - - # tgi for llm service - validate_services \ - "${ip_address}:8008/generate" \ - "generated_text" \ - "tgi" \ - "tgi-service" \ - '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' - # llm microservice validate_services \ "${ip_address}:9000/v1/chat/completions" \ "data: " \ "llm" \ - "llm-textgen-server" \ + "translation-xeon-llm-server" \ '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' } function validate_megaservice() { # Curl the Mega Service validate_services \ - "${ip_address}:8888/v1/translation" \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "mega-translation" \ "translation-xeon-backend-server" \ @@ -132,7 +129,7 @@ function validate_megaservice() { # test the megeservice via nginx validate_services \ - "${ip_address}:80/v1/translation" \ + "${ip_address}:${NGINX_PORT}/v1/translation" \ "translation" \ "mega-translation-nginx" \ "translation-xeon-nginx-server" \ @@ -169,7 +166,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f + docker compose -f compose.yaml stop && docker compose rm -f } function main() { diff --git a/Translation/tests/test_compose_tgi_on_xeon.sh b/Translation/tests/test_compose_tgi_on_xeon.sh new file mode 100644 index 0000000000..5d94a7832d --- /dev/null +++ b/Translation/tests/test_compose_tgi_on_xeon.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="translation translation-ui llm-textgen nginx" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + + export LLM_MODEL_ID="haoranxu/ALMA-13B" + export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" + export NGINX_PORT=80 + export FRONTEND_SERVICE_IP=${ip_address} + export FRONTEND_SERVICE_PORT=5173 + export BACKEND_SERVICE_NAME=translation + export BACKEND_SERVICE_IP=${ip_address} + export BACKEND_SERVICE_PORT=8888 + export host_ip=${ip_address} + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + # wait long for llm model download + until [[ "$n" -ge 500 ]]; do + docker logs translation-xeon-tgi-service > ${LOG_PATH}/tgi_service_start.log + if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 5s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tgi for llm service + validate_services \ + "${ip_address}:8008/generate" \ + "generated_text" \ + "tgi" \ + "translation-xeon-tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "translation-xeon-llm-server" \ + '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "translation" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "translation" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_tgi.yaml stop && docker compose rm -f +} + +function main() { + + stop_docker + + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_microservices + validate_megaservice + validate_frontend + + stop_docker + echo y | docker system prune + +} + +main From 3fdeeb5ec9adfbd8ecc11929814f202b366a5af9 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 11:11:05 +0800 Subject: [PATCH 04/13] fix xeon test issues and update rocm files Signed-off-by: letonghan --- .../docker_compose/intel/cpu/xeon/compose.yaml | 4 ++-- Translation/tests/test_compose_on_rocm.sh | 11 +++++++---- Translation/tests/test_compose_on_xeon.sh | 6 +++--- Translation/tests/test_compose_tgi_on_xeon.sh | 6 +++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 9e8f038e4a..25756fec56 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -27,7 +27,7 @@ services: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: translation-xeon-llm-server depends_on: - tgi-service: + vllm-service: condition: service_healthy ports: - "9000:9000" @@ -47,7 +47,7 @@ services: image: ${REGISTRY:-opea}/translation:${TAG:-latest} container_name: translation-xeon-backend-server depends_on: - - tgi-service + - vllm-service - llm ports: - "8888:8888" diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index 44bfc52cd1..287555effa 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -41,9 +41,12 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export http_proxy=${http_proxy} + export https_proxy=${http_proxy} + export TRANSLATION_TGI_SERVICE_PORT=8008 export TRANSLATION_HOST_IP=${ip_address} export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B" - export TRANSLATION_TGI_LLM_ENDPOINT="http://${TRANSLATION_HOST_IP}:8008" + export TRANSLATION_TGI_LLM_ENDPOINT="http://${TRANSLATION_HOST_IP}:${TRANSLATION_TGI_SERVICE_PORT}" export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export TRANSLATION_MEGA_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} export TRANSLATION_LLM_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} @@ -106,7 +109,7 @@ function validate_microservices() { # tgi for llm service validate_services \ - "${TRANSLATION_HOST_IP}:8008/generate" \ + "${TRANSLATION_HOST_IP}:${TRANSLATION_TGI_SERVICE_PORT}/generate" \ "generated_text" \ "translation-tgi-service" \ "translation-tgi-service" \ @@ -128,7 +131,7 @@ function validate_megaservice() { "translation" \ "translation-backend-server" \ "translation-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ @@ -136,7 +139,7 @@ function validate_megaservice() { "translation" \ "translation-nginx-server" \ "translation-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' } function validate_frontend() { diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index 4ed56a5043..f7a1725fee 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -50,7 +50,7 @@ function start_services() { export http_proxy=${http_proxy} export https_proxy=${http_proxy} export LLM_MODEL_ID="haoranxu/ALMA-13B" - export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_ENDPOINT="http://${ip_address}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} @@ -125,7 +125,7 @@ function validate_megaservice() { "translation" \ "mega-translation" \ "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ @@ -133,7 +133,7 @@ function validate_megaservice() { "translation" \ "mega-translation-nginx" \ "translation-xeon-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' } function validate_frontend() { diff --git a/Translation/tests/test_compose_tgi_on_xeon.sh b/Translation/tests/test_compose_tgi_on_xeon.sh index 5d94a7832d..882cc4fe60 100644 --- a/Translation/tests/test_compose_tgi_on_xeon.sh +++ b/Translation/tests/test_compose_tgi_on_xeon.sh @@ -43,7 +43,7 @@ function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export LLM_MODEL_ID="haoranxu/ALMA-13B" - export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_ENDPOINT="http://${ip_address}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} @@ -128,7 +128,7 @@ function validate_megaservice() { "translation" \ "mega-translation" \ "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ @@ -136,7 +136,7 @@ function validate_megaservice() { "translation" \ "mega-translation-nginx" \ "translation-xeon-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' } function validate_frontend() { From 42a3604b5994654fac79096f3cc1b53dae276670 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 14:38:03 +0800 Subject: [PATCH 05/13] fix minor issues Signed-off-by: letonghan --- .../intel/cpu/xeon/compose.yaml | 4 +- Translation/docker_compose/set_env.sh | 7 +- Translation/tests/test_compose_on_rocm.sh | 33 +++++---- Translation/tests/test_compose_on_xeon.sh | 74 ++++++++++--------- 4 files changed, 64 insertions(+), 54 deletions(-) diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index 25756fec56..6f6ce0ef77 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -50,7 +50,7 @@ services: - vllm-service - llm ports: - - "8888:8888" + - "${BACKEND_SERVICE_PORT:-8888}:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} @@ -65,7 +65,7 @@ services: depends_on: - translation-xeon-backend-server ports: - - "5173:5173" + - "${FRONTEND_SERVICE_PORT:-5173}:5173" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} diff --git a/Translation/docker_compose/set_env.sh b/Translation/docker_compose/set_env.sh index 723c39da10..03fff33fa9 100644 --- a/Translation/docker_compose/set_env.sh +++ b/Translation/docker_compose/set_env.sh @@ -9,13 +9,14 @@ popd > /dev/null export LLM_MODEL_ID="haoranxu/ALMA-13B" export LLM_ENDPOINT="http://${host_ip}:8008" +export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export BACKEND_SERVICE_PORT=8888 export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation" -export NGINX_PORT=80 export FRONTEND_SERVICE_IP=${host_ip} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${host_ip} -export BACKEND_SERVICE_PORT=8888 +export NGINX_PORT=80 +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/translation" diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index 287555effa..5757084cfa 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -83,25 +83,28 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - sleep 1s + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 5s } function validate_microservices() { @@ -127,7 +130,7 @@ function validate_microservices() { function validate_megaservice() { # Curl the Mega Service validate_services \ - "${TRANSLATION_HOST_IP}:8888/v1/translation" \ + "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "translation-backend-server" \ "translation-backend-server" \ diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index f7a1725fee..70f31ed3d3 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -48,19 +48,20 @@ function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export http_proxy=${http_proxy} - export https_proxy=${http_proxy} - export LLM_MODEL_ID="haoranxu/ALMA-13B" + export https_proxy=${https_proxy} + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" - export NGINX_PORT=80 export FRONTEND_SERVICE_IP=${ip_address} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export NGINX_PORT=80 export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -68,16 +69,7 @@ function start_services() { # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - n=0 - # wait long for llm model download - until [[ "$n" -ge 500 ]]; do - docker logs translation-xeon-vllm-service > ${LOG_PATH}/vllm_service_start.log - if grep -q complete ${LOG_PATH}/vllm_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done + sleep 5s } function validate_services() { @@ -87,24 +79,27 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi + sleep 5s } @@ -115,25 +110,36 @@ function validate_microservices() { "data: " \ "llm" \ "translation-xeon-llm-server" \ - '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' + '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' } function validate_megaservice() { - # Curl the Mega Service + export BACKEND_SERVICE_PORT=8887 + export NGINX_PORT=80 + + # test the megaservice for code translation + validate_services \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + + # test the megaservice for text translation validate_services \ "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "mega-translation" \ "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ - "${ip_address}:${NGINX_PORT}/v1/translation" \ - "translation" \ - "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } function validate_frontend() { From 51c91eee055dd8178d8774a4518e708c5e501466 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 15:21:10 +0800 Subject: [PATCH 06/13] fix xeon issues Signed-off-by: letonghan --- .../intel/cpu/xeon/compose_tgi.yaml | 5 +- .../intel/hpu/gaudi/compose_tgi.yaml | 106 ++++++++++ .../tests/test_compose_on_gaudi copy.sh | 190 ++++++++++++++++++ Translation/tests/test_compose_on_rocm.sh | 10 +- Translation/tests/test_compose_on_xeon.sh | 5 +- Translation/tests/test_compose_tgi_on_xeon.sh | 58 ++++-- 6 files changed, 345 insertions(+), 29 deletions(-) create mode 100644 Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml create mode 100644 Translation/tests/test_compose_on_gaudi copy.sh diff --git a/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 083715dbe7..05470691ed 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -39,6 +39,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 @@ -50,7 +51,7 @@ services: - tgi-service - llm ports: - - "8888:8888" + - "${BACKEND_SERVICE_PORT:-8888}:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} @@ -65,7 +66,7 @@ services: depends_on: - translation-xeon-backend-server ports: - - "5173:5173" + - "${FRONTEND_SERVICE_PORT:-5173}:5173" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml new file mode 100644 index 0000000000..59145c6f70 --- /dev/null +++ b/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -0,0 +1,106 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-service: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 + container_name: tgi-gaudi-server + ports: + - "8008:80" + volumes: + - "${MODEL_CACHE}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + container_name: llm-textgen-gaudi-server + depends_on: + tgi-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + restart: unless-stopped + translation-gaudi-backend-server: + image: ${REGISTRY:-opea}/translation:${TAG:-latest} + container_name: translation-gaudi-backend-server + depends_on: + - tgi-service + - llm + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + translation-gaudi-ui-server: + image: ${REGISTRY:-opea}/translation-ui:${TAG:-latest} + container_name: translation-gaudi-ui-server + depends_on: + - translation-gaudi-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + translation-gaudi-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: translation-gaudi-nginx-server + depends_on: + - translation-gaudi-backend-server + - translation-gaudi-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/Translation/tests/test_compose_on_gaudi copy.sh b/Translation/tests/test_compose_on_gaudi copy.sh new file mode 100644 index 0000000000..63167b6e74 --- /dev/null +++ b/Translation/tests/test_compose_on_gaudi copy.sh @@ -0,0 +1,190 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + # If the opea_branch isn't main, replace the git clone branch in Dockerfile. + if [[ "${opea_branch}" != "main" ]]; then + cd $WORKPATH + OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" + NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" + find . -type f -name "Dockerfile*" | while read -r file; do + echo "Processing file: $file" + sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" + done + fi + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="translation translation-ui llm-textgen nginx" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + + export LLM_MODEL_ID="haoranxu/ALMA-13B" + export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export MEGA_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" + export NGINX_PORT=80 + export FRONTEND_SERVICE_IP=${ip_address} + export FRONTEND_SERVICE_PORT=5173 + export BACKEND_SERVICE_NAME=translation + export BACKEND_SERVICE_IP=${ip_address} + export BACKEND_SERVICE_PORT=8888 + export host_ip=${ip_address} + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log + if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tgi gaudi service + validate_services \ + "${ip_address}:8008/generate" \ + "generated_text" \ + "tgi-gaudi" \ + "tgi-gaudi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-textgen-gaudi-server" \ + '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:8888/v1/translation" \ + "translation" \ + "mega-translation" \ + "translation-gaudi-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:80/v1/translation" \ + "translation" \ + "mega-translation-nginx" \ + "translation-gaudi-nginx-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + docker compose stop && docker compose rm -f +} + +function main() { + + stop_docker + + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_microservices + validate_megaservice +# validate_frontend + + stop_docker + echo y | docker system prune + +} + +main diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index 5757084cfa..4f6cec36ea 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -128,7 +128,7 @@ function validate_microservices() { } function validate_megaservice() { - # Curl the Mega Service + # test the megaservice for text translation validate_services \ "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ @@ -136,6 +136,14 @@ function validate_megaservice() { "translation-backend-server" \ '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + # test the megaservice for code translation + validate_services \ + "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "translation-backend-server" \ + "translation-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + # test the megeservice via nginx validate_services \ "${TRANSLATION_HOST_IP}:${TRANSLATION_NGINX_PORT}/v1/translation" \ diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index 70f31ed3d3..f8d38be59f 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -55,12 +55,12 @@ function start_services() { export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" export FRONTEND_SERVICE_IP=${ip_address} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" export NGINX_PORT=80 export host_ip=${ip_address} @@ -114,9 +114,6 @@ function validate_microservices() { } function validate_megaservice() { - export BACKEND_SERVICE_PORT=8887 - export NGINX_PORT=80 - # test the megaservice for code translation validate_services \ "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ diff --git a/Translation/tests/test_compose_tgi_on_xeon.sh b/Translation/tests/test_compose_tgi_on_xeon.sh index 882cc4fe60..760f0dd27d 100644 --- a/Translation/tests/test_compose_tgi_on_xeon.sh +++ b/Translation/tests/test_compose_tgi_on_xeon.sh @@ -42,18 +42,21 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export http_proxy=${http_proxy} + export https_proxy=${https_proxy} export LLM_MODEL_ID="haoranxu/ALMA-13B" export LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" - export NGINX_PORT=80 export FRONTEND_SERVICE_IP=${ip_address} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" + export NGINX_PORT=80 export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env @@ -80,24 +83,27 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi + sleep 5s } @@ -122,21 +128,29 @@ function validate_microservices() { } function validate_megaservice() { - # Curl the Mega Service + # test the megaservice for code translation + validate_services \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + + # test the megaservice for text translation validate_services \ "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "mega-translation" \ "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ - "${ip_address}:${NGINX_PORT}/v1/translation" \ - "translation" \ - "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } function validate_frontend() { From 1ddc7f9bcd9d9d122d7748c80ab8d8be82fec081 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 15:44:09 +0800 Subject: [PATCH 07/13] modify compose and test files on gaudi Signed-off-by: letonghan --- .../intel/hpu/gaudi/compose.yaml | 36 ++++--- .../intel/hpu/gaudi/compose_tgi.yaml | 9 +- Translation/tests/test_compose_on_gaudi.sh | 98 +++++++++---------- Translation/tests/test_compose_on_rocm.sh | 20 ++-- Translation/tests/test_compose_on_xeon.sh | 32 +++--- ...i copy.sh => test_compose_tgi_on_gaudi.sh} | 77 +++++++++------ Translation/tests/test_compose_tgi_on_xeon.sh | 32 +++--- 7 files changed, 154 insertions(+), 150 deletions(-) rename Translation/tests/{test_compose_on_gaudi copy.sh => test_compose_tgi_on_gaudi.sh} (68%) diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index 9516e60ce6..368731aa05 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -2,24 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 services: - tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.3.1 - container_name: tgi-gaudi-server + vllm-service: + image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + container_name: translation-gaudi-vllm-service ports: - "8008:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true + LLM_MODEL_ID: ${LLM_MODEL_ID} + NUM_CARDS: ${NUM_CARDS} + VLLM_TORCH_PROFILER_DIR: "/mnt" healthcheck: test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] interval: 10s @@ -29,14 +28,12 @@ services: cap_add: - SYS_NICE ipc: host - volumes: - - "${MODEL_CACHE:-./data}:/data" - command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 + command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE} llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - container_name: llm-textgen-gaudi-server + container_name: translation-gaudi-llm-server depends_on: - tgi-service: + vllm-service: condition: service_healthy ports: - "9000:9000" @@ -45,8 +42,9 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 @@ -55,10 +53,10 @@ services: image: ${REGISTRY:-opea}/translation:${TAG:-latest} container_name: translation-gaudi-backend-server depends_on: - - tgi-service + - vllm-service - llm ports: - - "8888:8888" + - "${BACKEND_SERVICE_PORT:-8888}:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} @@ -73,7 +71,7 @@ services: depends_on: - translation-gaudi-backend-server ports: - - "5173:5173" + - "${FRONTEND_SERVICE_PORT:-5173}:5173" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 59145c6f70..65d8992c5e 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -4,7 +4,7 @@ services: tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.3.1 - container_name: tgi-gaudi-server + container_name: translation-gaudi-tgi-service ports: - "8008:80" volumes: @@ -34,7 +34,7 @@ services: command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048 llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - container_name: llm-textgen-gaudi-server + container_name: translation-gaudi-llm-server depends_on: tgi-service: condition: service_healthy @@ -47,6 +47,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 @@ -58,7 +59,7 @@ services: - tgi-service - llm ports: - - "8888:8888" + - "${BACKEND_SERVICE_PORT:-8888}:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} @@ -73,7 +74,7 @@ services: depends_on: - translation-gaudi-backend-server ports: - - "5173:5173" + - "${FRONTEND_SERVICE_PORT:-5173}:5173" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh index 63167b6e74..723390be4d 100644 --- a/Translation/tests/test_compose_on_gaudi.sh +++ b/Translation/tests/test_compose_on_gaudi.sh @@ -35,41 +35,35 @@ function build_docker_images() { service_list="translation translation-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker images && sleep 1s } function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - export LLM_MODEL_ID="haoranxu/ALMA-13B" - export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export http_proxy=${http_proxy} + export https_proxy=${https_proxy} + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" + export LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" - export NGINX_PORT=80 export FRONTEND_SERVICE_IP=${ip_address} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" + export NGINX_PORT=80 export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 100 ]]; do - docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log - if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + sleep 5s } function validate_services() { @@ -79,38 +73,25 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - sleep 1s + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 5s } function validate_microservices() { - # Check if the microservices are running correctly. - - # tgi gaudi service - validate_services \ - "${ip_address}:8008/generate" \ - "generated_text" \ - "tgi-gaudi" \ - "tgi-gaudi-server" \ - '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' - # llm microservice validate_services \ "${ip_address}:9000/v1/chat/completions" \ @@ -121,21 +102,30 @@ function validate_microservices() { } function validate_megaservice() { - # Curl the Mega Service + # test the megaservice for code translation validate_services \ - "${ip_address}:8888/v1/translation" \ - "translation" \ - "mega-translation" \ - "translation-gaudi-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' - # test the megeservice via nginx + # test the megaservice for text translation validate_services \ - "${ip_address}:80/v1/translation" \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ "mega-translation-nginx" \ - "translation-gaudi-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + } function validate_frontend() { @@ -168,7 +158,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose stop && docker compose rm -f + docker compose -f compose.yaml stop && docker compose rm -f } function main() { @@ -180,7 +170,7 @@ function main() { validate_microservices validate_megaservice -# validate_frontend + validate_frontend stop_docker echo y | docker system prune diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index 4f6cec36ea..b88251586b 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -130,19 +130,19 @@ function validate_microservices() { function validate_megaservice() { # test the megaservice for text translation validate_services \ - "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ - "translation" \ - "translation-backend-server" \ - "translation-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ + "translation" \ + "translation-backend-server" \ + "translation-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' # test the megaservice for code translation validate_services \ - "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ - "print" \ - "translation-backend-server" \ - "translation-backend-server" \ - '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + "${TRANSLATION_HOST_IP}:${TRANSLATION_BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "translation-backend-server" \ + "translation-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' # test the megeservice via nginx validate_services \ diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh index f8d38be59f..74e761f137 100644 --- a/Translation/tests/test_compose_on_xeon.sh +++ b/Translation/tests/test_compose_on_xeon.sh @@ -67,7 +67,7 @@ function start_services() { sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 5s } @@ -116,27 +116,27 @@ function validate_microservices() { function validate_megaservice() { # test the megaservice for code translation validate_services \ - "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ - "print" \ - "mega-translation" \ - "translation-xeon-backend-server" \ - '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' # test the megaservice for text translation validate_services \ - "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ - "translation" \ - "mega-translation" \ - "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "translation" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ - "${ip_address}:${NGINX_PORT}/v1/translation" \ - "print" \ - "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ - '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } function validate_frontend() { diff --git a/Translation/tests/test_compose_on_gaudi copy.sh b/Translation/tests/test_compose_tgi_on_gaudi.sh similarity index 68% rename from Translation/tests/test_compose_on_gaudi copy.sh rename to Translation/tests/test_compose_tgi_on_gaudi.sh index 63167b6e74..fa6e004f99 100644 --- a/Translation/tests/test_compose_on_gaudi copy.sh +++ b/Translation/tests/test_compose_tgi_on_gaudi.sh @@ -42,28 +42,31 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - export LLM_MODEL_ID="haoranxu/ALMA-13B" + export http_proxy=${http_proxy} + export https_proxy=${https_proxy} + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/translation" - export NGINX_PORT=80 export FRONTEND_SERVICE_IP=${ip_address} export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=translation export BACKEND_SERVICE_IP=${ip_address} export BACKEND_SERVICE_PORT=8888 + export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" + export NGINX_PORT=80 export host_ip=${ip_address} sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do - docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log + docker logs translation-gaudi-tgi-service > ${LOG_PATH}/tgi_service_start.log if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then break fi @@ -79,25 +82,28 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." fi - sleep 1s + + sleep 5s } function validate_microservices() { @@ -108,7 +114,7 @@ function validate_microservices() { "${ip_address}:8008/generate" \ "generated_text" \ "tgi-gaudi" \ - "tgi-gaudi-server" \ + "translation-gaudi-tgi-service" \ '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' # llm microservice @@ -116,26 +122,35 @@ function validate_microservices() { "${ip_address}:9000/v1/chat/completions" \ "data: " \ "llm" \ - "llm-textgen-gaudi-server" \ + "translation-gaudi-llm-server" \ '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' } function validate_megaservice() { - # Curl the Mega Service + # test the megaservice for code translation validate_services \ - "${ip_address}:8888/v1/translation" \ - "translation" \ - "mega-translation" \ - "translation-gaudi-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' - # test the megeservice via nginx + # test the megaservice for text translation validate_services \ - "${ip_address}:80/v1/translation" \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' + + # test the megeservice via nginx + validate_services \ + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ "mega-translation-nginx" \ - "translation-gaudi-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + } function validate_frontend() { diff --git a/Translation/tests/test_compose_tgi_on_xeon.sh b/Translation/tests/test_compose_tgi_on_xeon.sh index 760f0dd27d..0e67af9bba 100644 --- a/Translation/tests/test_compose_tgi_on_xeon.sh +++ b/Translation/tests/test_compose_tgi_on_xeon.sh @@ -44,7 +44,7 @@ function start_services() { export http_proxy=${http_proxy} export https_proxy=${https_proxy} - export LLM_MODEL_ID="haoranxu/ALMA-13B" + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export LLM_ENDPOINT="http://${ip_address}:8008" export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} @@ -130,27 +130,27 @@ function validate_microservices() { function validate_megaservice() { # test the megaservice for code translation validate_services \ - "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ - "print" \ - "mega-translation" \ - "translation-xeon-backend-server" \ - '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "print" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' # test the megaservice for text translation validate_services \ - "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ - "translation" \ - "mega-translation" \ - "translation-xeon-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ + "translation" \ + "mega-translation" \ + "translation-xeon-backend-server" \ + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx validate_services \ - "${ip_address}:${NGINX_PORT}/v1/translation" \ - "print" \ - "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ - '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' + "${ip_address}:${NGINX_PORT}/v1/translation" \ + "print" \ + "mega-translation-nginx" \ + "translation-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } function validate_frontend() { From ae3c6d2df81168916722d7aeab0587cc1fb1dcc3 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 16:44:23 +0800 Subject: [PATCH 08/13] update test scripts on gaudi Signed-off-by: letonghan --- Translation/tests/test_compose_on_gaudi.sh | 23 +++++++++++++++---- .../tests/test_compose_tgi_on_gaudi.sh | 12 +++++----- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh index 723390be4d..1145e0f545 100644 --- a/Translation/tests/test_compose_on_gaudi.sh +++ b/Translation/tests/test_compose_on_gaudi.sh @@ -30,9 +30,12 @@ function build_docker_images() { cd $WORKPATH/docker_image_build git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork + VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") + git checkout ${VLLM_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="translation translation-ui llm-textgen nginx" + service_list="translation translation-ui llm-textgen vllm-gaudi nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -46,6 +49,10 @@ function start_services() { export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export LLM_ENDPOINT="http://${ip_address}:8008" export LLM_COMPONENT_NAME="OpeaTextGenService" + export NUM_CARDS=1 + export BLOCK_SIZE=128 + export MAX_NUM_SEQS=256 + export MAX_SEQ_LEN_TO_CAPTURE=2048 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} export LLM_SERVICE_HOST_IP=${ip_address} @@ -73,6 +80,12 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + # check response status if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" @@ -97,7 +110,7 @@ function validate_microservices() { "${ip_address}:9000/v1/chat/completions" \ "data: " \ "llm" \ - "llm-textgen-gaudi-server" \ + "translation-gaudi-llm-server" \ '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' } @@ -107,7 +120,7 @@ function validate_megaservice() { "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "print" \ "mega-translation" \ - "translation-xeon-backend-server" \ + "translation-gaudi-backend-server" \ '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' # test the megaservice for text translation @@ -115,7 +128,7 @@ function validate_megaservice() { "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "mega-translation" \ - "translation-xeon-backend-server" \ + "translation-gaudi-backend-server" \ '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx @@ -123,7 +136,7 @@ function validate_megaservice() { "${ip_address}:${NGINX_PORT}/v1/translation" \ "print" \ "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ + "translation-gaudi-nginx-server" \ '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } diff --git a/Translation/tests/test_compose_tgi_on_gaudi.sh b/Translation/tests/test_compose_tgi_on_gaudi.sh index fa6e004f99..f900b433de 100644 --- a/Translation/tests/test_compose_tgi_on_gaudi.sh +++ b/Translation/tests/test_compose_tgi_on_gaudi.sh @@ -45,7 +45,7 @@ function start_services() { export http_proxy=${http_proxy} export https_proxy=${https_proxy} export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" - export TGI_LLM_ENDPOINT="http://${ip_address}:8008" + export LLM_ENDPOINT="http://${ip_address}:8008" export LLM_COMPONENT_NAME="OpeaTextGenService" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} @@ -132,7 +132,7 @@ function validate_megaservice() { "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "print" \ "mega-translation" \ - "translation-xeon-backend-server" \ + "translation-gaudi-backend-server" \ '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' # test the megaservice for text translation @@ -140,7 +140,7 @@ function validate_megaservice() { "${ip_address}:${BACKEND_SERVICE_PORT}/v1/translation" \ "translation" \ "mega-translation" \ - "translation-xeon-backend-server" \ + "translation-gaudi-backend-server" \ '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megeservice via nginx @@ -148,7 +148,7 @@ function validate_megaservice() { "${ip_address}:${NGINX_PORT}/v1/translation" \ "print" \ "mega-translation-nginx" \ - "translation-xeon-nginx-server" \ + "translation-gaudi-nginx-server" \ '{"language_from": "Golang","language_to": "Python","source_data": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' } @@ -183,7 +183,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose stop && docker compose rm -f + docker compose -f compose_tgi.yaml stop && docker compose rm -f } function main() { @@ -195,7 +195,7 @@ function main() { validate_microservices validate_megaservice -# validate_frontend + validate_frontend stop_docker echo y | docker system prune From e44f0de77964f4abf12170a2ef8471438526d011 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 16:47:46 +0800 Subject: [PATCH 09/13] modify gaudi readme Signed-off-by: letonghan --- .../docker_compose/intel/hpu/gaudi/README.md | 107 +++++++++++++++++- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/Translation/docker_compose/intel/hpu/gaudi/README.md b/Translation/docker_compose/intel/hpu/gaudi/README.md index 31ed7da040..3a49a7ab4e 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/README.md +++ b/Translation/docker_compose/intel/hpu/gaudi/README.md @@ -74,6 +74,37 @@ By default, the LLM model is set to a default value as listed below: Change the `LLM_MODEL_ID` below for your needs. +For users in China who are unable to download models directly from Huggingface, you can use [ModelScope](https://www.modelscope.cn/models) or a Huggingface mirror to download models. The vLLM/TGI can load the models either online or offline as described below: + +1. Online + + ```bash + export HF_TOKEN=${your_hf_token} + export HF_ENDPOINT="https://hf-mirror.com" + model_name="haoranxu/ALMA-13B" + # Start vLLM LLM Service + docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80 + # Start TGI LLM Service + docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name + ``` + +2. Offline + + - Search your model name in ModelScope. For example, check [this page](https://www.modelscope.cn/models/rubraAI/Mistral-7B-Instruct-v0.3/files) for model `haoranxu/ALMA-13B`. + + - Click on `Download this model` button, and choose one way to download the model to your local path `/path/to/model`. + + - Run the following command to start the LLM service. + + ```bash + export HF_TOKEN=${your_hf_token} + export model_path="/path/to/model" + # Start vLLM LLM Service + docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80 + # Start TGI LLM Service + docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data + ``` + ### Setup Environment Variables 1. Set the required environment variables: @@ -105,7 +136,19 @@ Change the `LLM_MODEL_ID` below for your needs. ### Start Microservice Docker Containers ```bash -docker compose up -d +cd GenAIExamples/Translation/docker_compose/intel/cpu/gaudi +``` + +If use vLLM as the LLM serving backend. + +```bash +docker compose -f compose.yaml up -d +``` + +If use TGI as the LLM serving backend. + +```bash +docker compose -f compose_tgi.yaml up -d ``` > Note: The docker images will be automatically downloaded from `docker hub`: @@ -119,43 +162,97 @@ docker pull opea/nginx:latest ### Validate Microservices -1. TGI Service +1. LLM backend Service + + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready. + + Try the command below to check whether the LLM serving is ready. ```bash + # vLLM service + docker logs translation-gaudi-vllm-service 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + ```bash + # TGI service + docker logs translation-gaudi-tgi-service | grep Connected + # If the service is ready, you will get the response like below. + 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected + ``` + + Then try the `cURL` command below to validate services. + + ```bash + # either vLLM or TGI service + # text translation curl http://${host_ip}:8008/generate \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + # code translation + curl http://${host_ip}:8008/v1/chat/completions \ + -X POST \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` 2. LLM Microservice ```bash + # text translation curl http://${host_ip}:9000/v1/chat/completions \ -X POST \ -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' \ -H 'Content-Type: application/json' + # code translation + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' \ + -H 'Content-Type: application/json' ``` 3. MegaService ```bash + # text translation curl http://${host_ip}:8888/v1/translation -H "Content-Type: application/json" -d '{ - "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + "language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + # code translation + curl http://${host_ip}:8888/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` 4. Nginx Service ```bash + # text translation curl http://${host_ip}:${NGINX_PORT}/v1/translation \ -H "Content-Type: application/json" \ - -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。"}' + -d '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + # code translation + curl http://${host_ip}:${NGINX_PORT}/v1/translation \ + -H "Content-Type: application/json" \ + -d '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}","translate_type":"code"}' ``` Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service. ## 🚀 Launch the UI +### Launch with origin port + Open this URL `http://{host_ip}:5173` in your browser to access the frontend. + ![project-screenshot](../../../../assets/img/trans_ui_init.png) ![project-screenshot](../../../../assets/img/trans_ui_select.png) + +### Launch with Nginx + +If you want to launch the UI using Nginx, open this URL: `http://{host_ip}:{NGINX_PORT}` in your browser to access the frontend. + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/71214938-819c-4979-89cb-c03d937cd7b5) + +![image](https://github.com/intel-ai-tce/GenAIExamples/assets/21761437/be543e96-ddcd-4ee0-9f2c-4e99fee77e37) From b8a03ca5b8e7209722270cb4472e470bb2cbe111 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 16:53:11 +0800 Subject: [PATCH 10/13] integrate codetrans other files into translation Signed-off-by: letonghan --- Translation/benchmark/performance/README.md | 77 +++++++++++++++ .../benchmark/performance/benchmark.sh | 99 +++++++++++++++++++ .../benchmark/performance/benchmark.yaml | 47 +++++++++ Translation/kubernetes/helm/README.md | 18 ++++ Translation/kubernetes/helm/cpu-values.yaml | 5 + Translation/kubernetes/helm/gaudi-values.yaml | 32 ++++++ Translation/tests/test_gmc_on_gaudi.sh | 2 +- Translation/tests/test_gmc_on_xeon.sh | 2 +- 8 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 Translation/benchmark/performance/README.md create mode 100644 Translation/benchmark/performance/benchmark.sh create mode 100644 Translation/benchmark/performance/benchmark.yaml create mode 100644 Translation/kubernetes/helm/README.md create mode 100644 Translation/kubernetes/helm/cpu-values.yaml create mode 100644 Translation/kubernetes/helm/gaudi-values.yaml diff --git a/Translation/benchmark/performance/README.md b/Translation/benchmark/performance/README.md new file mode 100644 index 0000000000..fa43991dea --- /dev/null +++ b/Translation/benchmark/performance/README.md @@ -0,0 +1,77 @@ +# Translation Benchmarking + +This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance. + +By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community. + +## Purpose + +We aim to run these benchmarks and share them with the OPEA community for three primary reasons: + +- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs. +- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case. +- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc. + +## Metrics + +The benchmark will report the below metrics, including: + +- Number of Concurrent Requests +- End-to-End Latency: P50, P90, P99 (in milliseconds) +- End-to-End First Token Latency: P50, P90, P99 (in milliseconds) +- Average Next Token Latency (in milliseconds) +- Average Token Latency (in milliseconds) +- Requests Per Second (RPS) +- Output Tokens Per Second +- Input Tokens Per Second + +Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`. + +## Getting Started + +We recommend using Kubernetes to deploy the Translation service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. + +### Prerequisites + +- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md). + +- Every node has direct internet access +- Set up kubectl on the master node with access to the Kubernetes cluster. +- Install Python 3.8+ on the master node for running GenAIEval. +- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. +- Ensure that the container's ulimit can meet the the number of requests. + +```bash +# The way to modify the containered ulimit: +sudo systemctl edit containerd +# Add two lines: +[Service] +LimitNOFILE=65536:1048576 + +sudo systemctl daemon-reload; sudo systemctl restart containerd +``` + +### Test Steps + +Please deploy Translation service before benchmarking. + +#### Run Benchmark Test + +Before the benchmark, we can configure the number of test queries and test output directory by: + +```bash +export USER_QUERIES="[1, 1, 1, 1]" +export TEST_OUTPUT_DIR="/tmp/benchmark_output" +``` + +And then run the benchmark by: + +```bash +bash benchmark.sh -n +``` + +The argument `-n` refers to the number of test nodes. + +#### Data collection + +All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps. diff --git a/Translation/benchmark/performance/benchmark.sh b/Translation/benchmark/performance/benchmark.sh new file mode 100644 index 0000000000..6eac50baf8 --- /dev/null +++ b/Translation/benchmark/performance/benchmark.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +deployment_type="k8s" +node_number=1 +service_port=7777 +query_per_node=128 + +benchmark_tool_path="$(pwd)/GenAIEval" + +usage() { + echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]" + echo " -d deployment_type deployment type, select between k8s and docker (default: ${deployment_type})" + echo " -n node_number Test node number, required only for k8s deployment_type, (default: ${node_number})" + echo " -i service_ip service ip, required only for docker deployment_type" + echo " -p service_port service port, required only for docker deployment_type, (default: ${service_port})" + exit 1 +} + +while getopts ":d:n:i:p:" opt; do + case ${opt} in + d ) + deployment_type=$OPTARG + ;; + n ) + node_number=$OPTARG + ;; + i ) + service_ip=$OPTARG + ;; + p ) + service_port=$OPTARG + ;; + \? ) + echo "Invalid option: -$OPTARG" 1>&2 + usage + ;; + : ) + echo "Invalid option: -$OPTARG requires an argument" 1>&2 + usage + ;; + esac +done + +if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then + echo "Error: service_ip is required for docker deployment_type" 1>&2 + usage +fi + +if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then + echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2 +fi + +function main() { + if [[ ! -d ${benchmark_tool_path} ]]; then + echo "Benchmark tool not found, setting up..." + setup_env + fi + run_benchmark +} + +function setup_env() { + git clone https://github.com/opea-project/GenAIEval.git + pushd ${benchmark_tool_path} + python3 -m venv stress_venv + source stress_venv/bin/activate + pip install -r requirements.txt + popd +} + +function run_benchmark() { + source ${benchmark_tool_path}/stress_venv/bin/activate + export DEPLOYMENT_TYPE=${deployment_type} + export SERVICE_IP=${service_ip:-"None"} + export SERVICE_PORT=${service_port:-"None"} + if [[ -z $USER_QUERIES ]]; then + user_query=$((query_per_node*node_number)) + export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]" + echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}." + fi + export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//') + if [[ -z $WARMUP ]]; then export WARMUP=0; fi + if [[ -z $TEST_OUTPUT_DIR ]]; then + if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then + export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}" + else + export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker" + fi + echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}." + fi + + envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml + cd ${benchmark_tool_path}/evals/benchmark + python benchmark.py +} + +main diff --git a/Translation/benchmark/performance/benchmark.yaml b/Translation/benchmark/performance/benchmark.yaml new file mode 100644 index 0000000000..a8bff2f3d3 --- /dev/null +++ b/Translation/benchmark/performance/benchmark.yaml @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +test_suite_config: # Overall configuration settings for the test suite + examples: ["codetrans"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna + deployment_type: "k8s" # Default is "k8s", can also be "docker" + service_ip: None # Leave as None for k8s, specify for Docker + service_port: None # Leave as None for k8s, specify for Docker + warm_ups: 0 # Number of test requests for warm-up + run_time: 60m # The max total run time for the test suite + seed: # The seed for all RNGs + user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] # Number of test requests at each concurrency level + query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult. + random_prompt: false # Use random prompts if true, fixed prompts if false + collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false + data_visualization: false # Generate data visualization if true, do not generate data visualization if false + llm_model: "HuggingFaceH4/mistral-7b-grok" # The LLM model used for the test + test_output_dir: "/home/sdp/benchmark_output" # The directory to store the test output + load_shape: # Tenant concurrency pattern + name: constant # poisson or constant(locust default load shape) + params: # Loadshape-specific parameters + constant: # Constant load shape specific parameters, activate only if load_shape.name is constant + concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users + # arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate + poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson + arrival_rate: 1.0 # Request arrival rate + namespace: "" # Fill the user-defined namespace. Otherwise, it will be default. + +test_cases: + codetrans: + llm: + run_test: true + service_name: "llm-svc" # Replace with your service name + parameters: + model_name: "HuggingFaceH4/mistral-7b-grok" + max_new_tokens: 128 + temperature: 0.01 + top_k: 10 + top_p: 0.95 + repetition_penalty: 1.03 + stream: true + llmserve: + run_test: true + service_name: "codetrans-llm-svc" # Replace with your service name + e2e: + run_test: true + service_name: "codetrans-backend-server-svc" # Replace with your service name diff --git a/Translation/kubernetes/helm/README.md b/Translation/kubernetes/helm/README.md new file mode 100644 index 0000000000..dedc2520eb --- /dev/null +++ b/Translation/kubernetes/helm/README.md @@ -0,0 +1,18 @@ +# Deploy Translation on Kubernetes cluster + +- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. +- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). + +## Deploy on Xeon + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install codetrans oci://ghcr.io/opea-project/charts/codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml +``` + +## Deploy on Gaudi + +``` +export HFTOKEN="insert-your-huggingface-token-here" +helm install codetrans oci://ghcr.io/opea-project/charts/codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml +``` diff --git a/Translation/kubernetes/helm/cpu-values.yaml b/Translation/kubernetes/helm/cpu-values.yaml new file mode 100644 index 0000000000..313f050754 --- /dev/null +++ b/Translation/kubernetes/helm/cpu-values.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi: + LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 diff --git a/Translation/kubernetes/helm/gaudi-values.yaml b/Translation/kubernetes/helm/gaudi-values.yaml new file mode 100644 index 0000000000..89ed259285 --- /dev/null +++ b/Translation/kubernetes/helm/gaudi-values.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +tgi: + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.3.1" + resources: + limits: + habana.ai/gaudi: 1 + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/Translation/tests/test_gmc_on_gaudi.sh b/Translation/tests/test_gmc_on_gaudi.sh index 45c47c04ee..7e7f335943 100755 --- a/Translation/tests/test_gmc_on_gaudi.sh +++ b/Translation/tests/test_gmc_on_gaudi.sh @@ -37,7 +37,7 @@ function validate_translation() { kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log exit_code=$? if [ $exit_code -ne 0 ]; then - echo "chatqna failed, please check the logs in ${LOG_PATH}!" + echo "translation failed, please check the logs in ${LOG_PATH}!" exit 1 fi diff --git a/Translation/tests/test_gmc_on_xeon.sh b/Translation/tests/test_gmc_on_xeon.sh index 56caa54f40..9769079d83 100755 --- a/Translation/tests/test_gmc_on_xeon.sh +++ b/Translation/tests/test_gmc_on_xeon.sh @@ -37,7 +37,7 @@ function validate_translation() { kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query":"Translate this from Chinese to English:\nChinese: 我爱机器翻译。\nEnglish:"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_translation.log exit_code=$? if [ $exit_code -ne 0 ]; then - echo "chatqna failed, please check the logs in ${LOG_PATH}!" + echo "translation failed, please check the logs in ${LOG_PATH}!" exit 1 fi From 8c94082cf4bc14d56a44446529bff578b3b449f5 Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 17:29:00 +0800 Subject: [PATCH 11/13] fic rocm compose parameter Signed-off-by: letonghan --- Translation/docker_compose/amd/gpu/rocm/compose.yaml | 1 + Translation/tests/test_compose_on_rocm.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/Translation/docker_compose/amd/gpu/rocm/compose.yaml b/Translation/docker_compose/amd/gpu/rocm/compose.yaml index 399c59e81b..41625fc8b3 100644 --- a/Translation/docker_compose/amd/gpu/rocm/compose.yaml +++ b/Translation/docker_compose/amd/gpu/rocm/compose.yaml @@ -49,6 +49,7 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: "http://translation-tgi-service" LLM_MODEL_ID: ${TRANSLATION_LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${TRANSLATION_LLM_COMPONENT_NAME} HUGGINGFACEHUB_API_TOKEN: ${TRANSLATION_HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index b88251586b..eb62321d3e 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -47,6 +47,7 @@ function start_services() { export TRANSLATION_HOST_IP=${ip_address} export TRANSLATION_LLM_MODEL_ID="haoranxu/ALMA-13B" export TRANSLATION_TGI_LLM_ENDPOINT="http://${TRANSLATION_HOST_IP}:${TRANSLATION_TGI_SERVICE_PORT}" + export TRANSLATION_LLM_COMPONENT_NAME="OpeaTextGenService" export TRANSLATION_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export TRANSLATION_MEGA_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} export TRANSLATION_LLM_SERVICE_HOST_IP=${TRANSLATION_HOST_IP} From 32d1535c71a9afb122c64f00a234a0c3cf396e9f Mon Sep 17 00:00:00 2001 From: letonghan Date: Tue, 18 Mar 2025 17:39:32 +0800 Subject: [PATCH 12/13] fix rocm curl command Signed-off-by: letonghan --- Translation/tests/test_compose_on_rocm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Translation/tests/test_compose_on_rocm.sh b/Translation/tests/test_compose_on_rocm.sh index eb62321d3e..53d19140b1 100644 --- a/Translation/tests/test_compose_on_rocm.sh +++ b/Translation/tests/test_compose_on_rocm.sh @@ -135,7 +135,7 @@ function validate_megaservice() { "translation" \ "translation-backend-server" \ "translation-backend-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' # test the megaservice for code translation validate_services \ @@ -151,7 +151,7 @@ function validate_megaservice() { "translation" \ "translation-nginx-server" \ "translation-nginx-server" \ - '{"language_from": "Chinese","language_to": "English","source_language": "我爱机器翻译。","translate_type":"text"}' + '{"language_from": "Chinese","language_to": "English","source_data": "我爱机器翻译。","translate_type":"text"}' } function validate_frontend() { From 3f05d405ae51c34f1c04ee20b27146c2eebef275 Mon Sep 17 00:00:00 2001 From: WenjiaoYue Date: Fri, 21 Mar 2025 18:09:29 +0800 Subject: [PATCH 13/13] update Translation UI --- Translation/ui/svelte/src/lib/shared/Network.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Translation/ui/svelte/src/lib/shared/Network.ts b/Translation/ui/svelte/src/lib/shared/Network.ts index 489550818a..cee93d0f1f 100644 --- a/Translation/ui/svelte/src/lib/shared/Network.ts +++ b/Translation/ui/svelte/src/lib/shared/Network.ts @@ -23,7 +23,8 @@ export async function fetchLanguageResponse(input: string, transform: string, tr payload = { language_from: transform, language_to: transTo, - source_language: input, + source_data: input, + translate_type: "text", }; url = `${BASE_URL}`;