Priority
P1-Stopper
OS type
Ubuntu
Hardware type
Xeon-GNR
Installation method
Deploy method
Running nodes
Single Node
What's the version?
afb46bd
Description
https://github.com/opea-project/GenAIExamples/actions/runs/15189596329/job/42752387606

`2025-05-23T03:12:17.327895Z INFO download: text_generation_launcher: Starting check and download process for meta-llama/Meta-Llama-3-8B-Instruct
Error: DownloadError
2025-05-23T03:12:22.649807Z ERROR download: text_generation_launcher: Download encountered an error:
2025-05-23 03:12:20.499 | INFO | text_generation_server.utils.import_utils::80 - Detected system ipex
/opt/conda/lib/python3.11/site-packages/text_generation_server/utils/sgmv.py:18: UserWarning: Could not import SGMV kernel from Punica, falling back to loop.
warnings.warn("Could not import SGMV kernel from Punica, falling back to loop.")
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py:304 │
│ in hf_raise_for_status │
│ │
│ 301 │ │
│ 302 │ """ │
│ 303 │ try: │
│ ❱ 304 │ │ response.raise_for_status() │
│ 305 │ except HTTPError as e: │
│ 306 │ │ error_code = response.headers.get("X-Error-Code") │
│ 307 │ │ error_message = response.headers.get("X-Error-Message") │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ endpoint_name = None │ │
│ │ error_code = 'GatedRepo' │ │
│ │ error_message = 'Access to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have'+63 │ │
│ │ message = '401 Client Error.\n\nCannot access gated repo for url │ │
│ │ https://huggingface.co/meta-'+56 │ │
│ │ response = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/requests/models.py:1024 in │
│ raise_for_status │
│ │
│ 1021 │ │ │ ) │
│ 1022 │ │ │
│ 1023 │ │ if http_error_msg: │
│ ❱ 1024 │ │ │ raise HTTPError(http_error_msg, response=self) │
│ 1025 │ │
│ 1026 │ def close(self): │
│ 1027 │ │ """Releases the connection back to the pool. Once this method │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ http_error_msg = '401 Client Error: Unauthorized for url: │ │
│ │ https://huggingface.co/meta-llama/Meta-L'+43 │ │
│ │ reason = 'Unauthorized' │ │
│ │ self = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
╰──────────────────────────────────────────────────────────────────────────────╯
HTTPError: 401 Client Error: Unauthorized for url:
https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.j
son
The above exception was the direct cause of the following exception:
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /opt/conda/lib/python3.11/site-packages/text_generation_server/cli.py:197 in │
│ download_weights │
│ │
│ 194 │ │ try: │
│ 195 │ │ │ import json │
│ 196 │ │ │ │
│ ❱ 197 │ │ │ config = hf_hub_download( │
│ 198 │ │ │ │ model_id, revision=revision, filename="config.json" │
│ 199 │ │ │ ) │
│ 200 │ │ │ with open(config, "r") as f: │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ auto_convert = True │ │
│ │ extension = '.safetensors' │ │
│ │ is_local_model = False │ │
│ │ json = <module 'json' from │ │
│ │ '/opt/conda/lib/python3.11/json/init.py'> │ │
│ │ json_output = True │ │
│ │ logger_level = 'INFO' │ │
│ │ merge_lora = False │ │
│ │ model_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ revision = None │ │
│ │ trust_remote_code = False │ │
│ │ utils = <module 'text_generation_server.utils' from │ │
│ │ '/opt/conda/lib/python3.11/site-packages/text_gener… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py │
│ :114 in _inner_fn │
│ │
│ 111 │ │ if check_use_auth_token: │
│ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__na │
│ 113 │ │ │
│ ❱ 114 │ │ return fn(*args, **kwargs) │
│ 115 │ │
│ 116 │ return _inner_fn # type: ignore │
│ 117 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ arg_name = 'filename' │ │
│ │ arg_value = 'config.json' │ │
│ │ args = ('meta-llama/Meta-Llama-3-8B-Instruct',) │ │
│ │ check_use_auth_token = True │ │
│ │ has_token = False │ │
│ │ kwargs = {'revision': None, 'filename': 'config.json'} │ │
│ │ signature = <Signature (repo_id: str, filename: str, *, │ │
│ │ subfolder: Optional[str] = None, repo_type: │ │
│ │ Optional[str] = None, revision: Optional[str] = │ │
│ │ None, library_name: Optional[str] = None, │ │
│ │ library_version: Optional[str] = None, cache_dir: │ │
│ │ Union[str, pathlib.Path, NoneType] = None, │ │
│ │ local_dir: Union[str, pathlib.Path, NoneType] = │ │
│ │ None, user_agent: Union[Dict, str, NoneType] = │ │
│ │ None, force_download: bool = False, proxies: │ │
│ │ Optional[Dict] = None, etag_timeout: float = 10, │ │
│ │ token: Union[bool, str, NoneType] = None, │ │
│ │ local_files_only: bool = False, headers: │ │
│ │ Optional[Dict[str, str]] = None, endpoint: │ │
│ │ Optional[str] = None, legacy_cache_layout: bool = │ │
│ │ False, resume_download: Optional[bool] = None, │ │
│ │ force_filename: Optional[str] = None, │ │
│ │ local_dir_use_symlinks: Union[bool, │ │
│ │ Literal['auto']] = 'auto') -> str> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:122 │
│ 1 in hf_hub_download │
│ │
│ 1218 │ │ │ local_files_only=local_files_only, │
│ 1219 │ │ ) │
│ 1220 │ else: │
│ ❱ 1221 │ │ return _hf_hub_download_to_cache_dir( │
│ 1222 │ │ │ # Destination │
│ 1223 │ │ │ cache_dir=cache_dir, │
│ 1224 │ │ │ # File info │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ cache_dir = '/data' │ │
│ │ endpoint = None │ │
│ │ etag_timeout = 10 │ │
│ │ filename = 'config.json' │ │
│ │ force_download = False │ │
│ │ force_filename = None │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ legacy_cache_layout = False │ │
│ │ library_name = None │ │
│ │ library_version = None │ │
│ │ local_dir = None │ │
│ │ local_dir_use_symlinks = 'auto' │ │
│ │ local_files_only = False │ │
│ │ proxies = None │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ resume_download = None │ │
│ │ revision = 'main' │ │
│ │ subfolder = None │ │
│ │ token = None │ │
│ │ user_agent = None │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:132 │
│ 5 in _hf_hub_download_to_cache_dir │
│ │
│ 1322 │ │ │ │ │ return pointer_path │
│ 1323 │ │ │
│ 1324 │ │ # Otherwise, raise appropriate error │
│ ❱ 1325 │ │ _raise_on_head_call_error(head_call_error, force_download, lo │
│ 1326 │ │
│ 1327 │ # From now on, etag, commit_hash, url and size are not None. │
│ 1328 │ assert etag is not None, "etag must have been retrieved from serv │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ cache_dir = '/data' │ │
│ │ commit_hash = None │ │
│ │ endpoint = None │ │
│ │ etag = None │ │
│ │ etag_timeout = 10 │ │
│ │ expected_size = None │ │
│ │ filename = 'config.json' │ │
│ │ force_download = False │ │
│ │ head_call_error = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-I… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ local_files_only = False │ │
│ │ locks_dir = '/data/.locks' │ │
│ │ proxies = None │ │
│ │ ref_path = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct… │ │
│ │ relative_filename = 'config.json' │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ revision = 'main' │ │
│ │ storage_folder = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct' │ │
│ │ url_to_download = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:182 │
│ 3 in _raise_on_head_call_error │
│ │
│ 1820 │ │ ) │
│ 1821 │ elif isinstance(head_call_error, RepositoryNotFoundError) or isin │
│ 1822 │ │ # Repo not found or gated => let's raise the actual error │
│ ❱ 1823 │ │ raise head_call_error │
│ 1824 │ else: │
│ 1825 │ │ # Otherwise: most likely a connection issue or Hub downtime = │
│ 1826 │ │ raise LocalEntryNotFoundError( │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ force_download = False │ │
│ │ head_call_error = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6e… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-In… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ local_files_only = False │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:172 │
│ 2 in _get_metadata_or_catch_error │
│ │
│ 1719 │ if not local_files_only: │
│ 1720 │ │ try: │
│ 1721 │ │ │ try: │
│ ❱ 1722 │ │ │ │ metadata = get_hf_file_metadata(url=url, proxies=prox │
│ 1723 │ │ │ except EntryNotFoundError as http_error: │
│ 1724 │ │ │ │ if storage_folder is not None and relative_filename i │
│ 1725 │ │ │ │ │ # Cache the non-existence of the file │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ commit_hash = None │ │
│ │ endpoint = None │ │
│ │ etag = None │ │
│ │ etag_timeout = 10 │ │
│ │ expected_size = None │ │
│ │ filename = 'config.json' │ │
│ │ head_error_call = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-I… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ local_files_only = False │ │
│ │ proxies = None │ │
│ │ relative_filename = 'config.json' │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ revision = 'main' │ │
│ │ storage_folder = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct' │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ │ url_to_download = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py │
│ :114 in _inner_fn │
│ │
│ 111 │ │ if check_use_auth_token: │
│ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__na │
│ 113 │ │ │
│ ❱ 114 │ │ return fn(*args, **kwargs) │
│ 115 │ │
│ 116 │ return _inner_fn # type: ignore │
│ 117 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ arg_name = 'headers' │ │
│ │ arg_value = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ args = () │ │
│ │ check_use_auth_token = True │ │
│ │ has_token = False │ │
│ │ kwargs = { │ │
│ │ │ 'url': │ │
│ │ 'https://huggingface.co/meta-llama/Meta-Llama-3-… │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10, │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu' │ │
│ │ │ } │ │
│ │ } │ │
│ │ signature = <Signature (url: str, token: Union[bool, str, │ │
│ │ NoneType] = None, proxies: Optional[Dict] = None, │ │
│ │ timeout: Optional[float] = 10, library_name: │ │
│ │ Optional[str] = None, library_version: │ │
│ │ Optional[str] = None, user_agent: Union[Dict, │ │
│ │ str, NoneType] = None, headers: │ │
│ │ Optional[Dict[str, str]] = None) -> │ │
│ │ huggingface_hub.file_download.HfFileMetadata> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:164 │
│ 5 in get_hf_file_metadata │
│ │
│ 1642 │ headers["Accept-Encoding"] = "identity" # prevent any compressio │
│ 1643 │ │
│ 1644 │ # Retrieve metadata │
│ ❱ 1645 │ r = _request_wrapper( │
│ 1646 │ │ method="HEAD", │
│ 1647 │ │ url=url, │
│ 1648 │ │ headers=headers, │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu', │ │
│ │ │ 'Accept-Encoding': 'identity' │ │
│ │ } │ │
│ │ library_name = None │ │
│ │ library_version = None │ │
│ │ proxies = None │ │
│ │ timeout = 10 │ │
│ │ token = None │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-In… │ │
│ │ user_agent = None │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:372 │
│ in _request_wrapper │
│ │
│ 369 │ """ │
│ 370 │ # Recursively follow relative redirects │
│ 371 │ if follow_relative_redirects: │
│ ❱ 372 │ │ response = _request_wrapper( │
│ 373 │ │ │ method=method, │
│ 374 │ │ │ url=url, │
│ 375 │ │ │ follow_relative_redirects=False, │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ follow_relative_redirects = True │ │
│ │ method = 'HEAD' │ │
│ │ params = { │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu', │ │
│ │ │ │ 'Accept-Encoding': 'identity' │ │
│ │ │ }, │ │
│ │ │ 'allow_redirects': False, │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10 │ │
│ │ } │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Lla… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:396 │
│ in _request_wrapper │
│ │
│ 393 │ │
│ 394 │ # Perform request and return if status_code is not in the retry l │
│ 395 │ response = get_session().request(method=method, url=url, **params │
│ ❱ 396 │ hf_raise_for_status(response) │
│ 397 │ return response │
│ 398 │
│ 399 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ follow_relative_redirects = False │ │
│ │ method = 'HEAD' │ │
│ │ params = { │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu', │ │
│ │ │ │ 'Accept-Encoding': 'identity' │ │
│ │ │ }, │ │
│ │ │ 'allow_redirects': False, │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10 │ │
│ │ } │ │
│ │ response = <Response [401]> │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Lla… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py:321 │
│ in hf_raise_for_status │
│ │
│ 318 │ │ │ message = ( │
│ 319 │ │ │ │ f"{response.status_code} Client Error." + "\n\n" + f"C │
│ 320 │ │ │ ) │
│ ❱ 321 │ │ │ raise GatedRepoError(message, response) from e │
│ 322 │ │ │
│ 323 │ │ elif error_message == "Access to this resource is disabled.": │
│ 324 │ │ │ message = ( │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ endpoint_name = None │ │
│ │ error_code = 'GatedRepo' │ │
│ │ error_message = 'Access to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have'+63 │ │
│ │ message = '401 Client Error.\n\nCannot access gated repo for url │ │
│ │ https://huggingface.co/meta-'+56 │ │
│ │ response = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
╰──────────────────────────────────────────────────────────────────────────────╯
GatedRepoError: 401 Client Error. (Request ID:
Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6e0e-48d3-a38a-ff86ee5199e1)
Cannot access gated repo for url
https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.j
son.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must have
access to it and be authenticated to access it. Please log in.
`
Reproduce steps
cd DocSum/tests
bash test_compose_tgi_on_xeon.sh
Raw log
Attachments
No response
Priority
P1-Stopper
OS type
Ubuntu
Hardware type
Xeon-GNR
Installation method
Deploy method
Running nodes
Single Node
What's the version?
afb46bd
Description
https://github.com/opea-project/GenAIExamples/actions/runs/15189596329/job/42752387606

`2025-05-23T03:12:17.327895Z INFO download: text_generation_launcher: Starting check and download process for meta-llama/Meta-Llama-3-8B-Instruct
Error: DownloadError
2025-05-23T03:12:22.649807Z ERROR download: text_generation_launcher: Download encountered an error:
2025-05-23 03:12:20.499 | INFO | text_generation_server.utils.import_utils::80 - Detected system ipex
/opt/conda/lib/python3.11/site-packages/text_generation_server/utils/sgmv.py:18: UserWarning: Could not import SGMV kernel from Punica, falling back to loop.
warnings.warn("Could not import SGMV kernel from Punica, falling back to loop.")
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py:304 │
│ in hf_raise_for_status │
│ │
│ 301 │ │
│ 302 │ """ │
│ 303 │ try: │
│ ❱ 304 │ │ response.raise_for_status() │
│ 305 │ except HTTPError as e: │
│ 306 │ │ error_code = response.headers.get("X-Error-Code") │
│ 307 │ │ error_message = response.headers.get("X-Error-Message") │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ endpoint_name = None │ │
│ │ error_code = 'GatedRepo' │ │
│ │ error_message = 'Access to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have'+63 │ │
│ │ message = '401 Client Error.\n\nCannot access gated repo for url │ │
│ │ https://huggingface.co/meta-'+56 │ │
│ │ response = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/requests/models.py:1024 in │
│ raise_for_status │
│ │
│ 1021 │ │ │ ) │
│ 1022 │ │ │
│ 1023 │ │ if http_error_msg: │
│ ❱ 1024 │ │ │ raise HTTPError(http_error_msg, response=self) │
│ 1025 │ │
│ 1026 │ def close(self): │
│ 1027 │ │ """Releases the connection back to the pool. Once this method │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ http_error_msg = '401 Client Error: Unauthorized for url: │ │
│ │ https://huggingface.co/meta-llama/Meta-L'+43 │ │
│ │ reason = 'Unauthorized' │ │
│ │ self = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
╰──────────────────────────────────────────────────────────────────────────────╯
HTTPError: 401 Client Error: Unauthorized for url:
https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.j
son
The above exception was the direct cause of the following exception:
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /opt/conda/lib/python3.11/site-packages/text_generation_server/cli.py:197 in │
│ download_weights │
│ │
│ 194 │ │ try: │
│ 195 │ │ │ import json │
│ 196 │ │ │ │
│ ❱ 197 │ │ │ config = hf_hub_download( │
│ 198 │ │ │ │ model_id, revision=revision, filename="config.json" │
│ 199 │ │ │ ) │
│ 200 │ │ │ with open(config, "r") as f: │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ auto_convert = True │ │
│ │ extension = '.safetensors' │ │
│ │ is_local_model = False │ │
│ │ json = <module 'json' from │ │
│ │ '/opt/conda/lib/python3.11/json/init.py'> │ │
│ │ json_output = True │ │
│ │ logger_level = 'INFO' │ │
│ │ merge_lora = False │ │
│ │ model_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ revision = None │ │
│ │ trust_remote_code = False │ │
│ │ utils = <module 'text_generation_server.utils' from │ │
│ │ '/opt/conda/lib/python3.11/site-packages/text_gener… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py │
│ :114 in _inner_fn │
│ │
│ 111 │ │ if check_use_auth_token: │
│ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__na │
│ 113 │ │ │
│ ❱ 114 │ │ return fn(*args, **kwargs) │
│ 115 │ │
│ 116 │ return _inner_fn # type: ignore │
│ 117 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ arg_name = 'filename' │ │
│ │ arg_value = 'config.json' │ │
│ │ args = ('meta-llama/Meta-Llama-3-8B-Instruct',) │ │
│ │ check_use_auth_token = True │ │
│ │ has_token = False │ │
│ │ kwargs = {'revision': None, 'filename': 'config.json'} │ │
│ │ signature = <Signature (repo_id: str, filename: str, *, │ │
│ │ subfolder: Optional[str] = None, repo_type: │ │
│ │ Optional[str] = None, revision: Optional[str] = │ │
│ │ None, library_name: Optional[str] = None, │ │
│ │ library_version: Optional[str] = None, cache_dir: │ │
│ │ Union[str, pathlib.Path, NoneType] = None, │ │
│ │ local_dir: Union[str, pathlib.Path, NoneType] = │ │
│ │ None, user_agent: Union[Dict, str, NoneType] = │ │
│ │ None, force_download: bool = False, proxies: │ │
│ │ Optional[Dict] = None, etag_timeout: float = 10, │ │
│ │ token: Union[bool, str, NoneType] = None, │ │
│ │ local_files_only: bool = False, headers: │ │
│ │ Optional[Dict[str, str]] = None, endpoint: │ │
│ │ Optional[str] = None, legacy_cache_layout: bool = │ │
│ │ False, resume_download: Optional[bool] = None, │ │
│ │ force_filename: Optional[str] = None, │ │
│ │ local_dir_use_symlinks: Union[bool, │ │
│ │ Literal['auto']] = 'auto') -> str> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:122 │
│ 1 in hf_hub_download │
│ │
│ 1218 │ │ │ local_files_only=local_files_only, │
│ 1219 │ │ ) │
│ 1220 │ else: │
│ ❱ 1221 │ │ return _hf_hub_download_to_cache_dir( │
│ 1222 │ │ │ # Destination │
│ 1223 │ │ │ cache_dir=cache_dir, │
│ 1224 │ │ │ # File info │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ cache_dir = '/data' │ │
│ │ endpoint = None │ │
│ │ etag_timeout = 10 │ │
│ │ filename = 'config.json' │ │
│ │ force_download = False │ │
│ │ force_filename = None │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ legacy_cache_layout = False │ │
│ │ library_name = None │ │
│ │ library_version = None │ │
│ │ local_dir = None │ │
│ │ local_dir_use_symlinks = 'auto' │ │
│ │ local_files_only = False │ │
│ │ proxies = None │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ resume_download = None │ │
│ │ revision = 'main' │ │
│ │ subfolder = None │ │
│ │ token = None │ │
│ │ user_agent = None │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:132 │
│ 5 in _hf_hub_download_to_cache_dir │
│ │
│ 1322 │ │ │ │ │ return pointer_path │
│ 1323 │ │ │
│ 1324 │ │ # Otherwise, raise appropriate error │
│ ❱ 1325 │ │ _raise_on_head_call_error(head_call_error, force_download, lo │
│ 1326 │ │
│ 1327 │ # From now on, etag, commit_hash, url and size are not None. │
│ 1328 │ assert etag is not None, "etag must have been retrieved from serv │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ cache_dir = '/data' │ │
│ │ commit_hash = None │ │
│ │ endpoint = None │ │
│ │ etag = None │ │
│ │ etag_timeout = 10 │ │
│ │ expected_size = None │ │
│ │ filename = 'config.json' │ │
│ │ force_download = False │ │
│ │ head_call_error = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-I… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ local_files_only = False │ │
│ │ locks_dir = '/data/.locks' │ │
│ │ proxies = None │ │
│ │ ref_path = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct… │ │
│ │ relative_filename = 'config.json' │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ revision = 'main' │ │
│ │ storage_folder = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct' │ │
│ │ url_to_download = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:182 │
│ 3 in _raise_on_head_call_error │
│ │
│ 1820 │ │ ) │
│ 1821 │ elif isinstance(head_call_error, RepositoryNotFoundError) or isin │
│ 1822 │ │ # Repo not found or gated => let's raise the actual error │
│ ❱ 1823 │ │ raise head_call_error │
│ 1824 │ else: │
│ 1825 │ │ # Otherwise: most likely a connection issue or Hub downtime = │
│ 1826 │ │ raise LocalEntryNotFoundError( │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ force_download = False │ │
│ │ head_call_error = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6e… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-In… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ local_files_only = False │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:172 │
│ 2 in _get_metadata_or_catch_error │
│ │
│ 1719 │ if not local_files_only: │
│ 1720 │ │ try: │
│ 1721 │ │ │ try: │
│ ❱ 1722 │ │ │ │ metadata = get_hf_file_metadata(url=url, proxies=prox │
│ 1723 │ │ │ except EntryNotFoundError as http_error: │
│ 1724 │ │ │ │ if storage_folder is not None and relative_filename i │
│ 1725 │ │ │ │ │ # Cache the non-existence of the file │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ commit_hash = None │ │
│ │ endpoint = None │ │
│ │ etag = None │ │
│ │ etag_timeout = 10 │ │
│ │ expected_size = None │ │
│ │ filename = 'config.json' │ │
│ │ head_error_call = GatedRepoError('401 Client Error. (Request ID: │ │
│ │ Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6… │ │
│ │ access gated repo for url │ │
│ │ https://huggingface.co/meta-llama/Meta-Llama-3-8B-I… │ │
│ │ to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have access to it and be │ │
│ │ authenticated to access it. Please log in.') │ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ local_files_only = False │ │
│ │ proxies = None │ │
│ │ relative_filename = 'config.json' │ │
│ │ repo_id = 'meta-llama/Meta-Llama-3-8B-Instruct' │ │
│ │ repo_type = 'model' │ │
│ │ revision = 'main' │ │
│ │ storage_folder = '/data/models--meta-llama--Meta-Llama-3-8B-Instruct' │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ │ url_to_download = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py │
│ :114 in _inner_fn │
│ │
│ 111 │ │ if check_use_auth_token: │
│ 112 │ │ │ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__na │
│ 113 │ │ │
│ ❱ 114 │ │ return fn(*args, **kwargs) │
│ 115 │ │
│ 116 │ return _inner_fn # type: ignore │
│ 117 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ arg_name = 'headers' │ │
│ │ arg_value = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu' │ │
│ │ } │ │
│ │ args = () │ │
│ │ check_use_auth_token = True │ │
│ │ has_token = False │ │
│ │ kwargs = { │ │
│ │ │ 'url': │ │
│ │ 'https://huggingface.co/meta-llama/Meta-Llama-3-… │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10, │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu' │ │
│ │ │ } │ │
│ │ } │ │
│ │ signature = <Signature (url: str, token: Union[bool, str, │ │
│ │ NoneType] = None, proxies: Optional[Dict] = None, │ │
│ │ timeout: Optional[float] = 10, library_name: │ │
│ │ Optional[str] = None, library_version: │ │
│ │ Optional[str] = None, user_agent: Union[Dict, │ │
│ │ str, NoneType] = None, headers: │ │
│ │ Optional[Dict[str, str]] = None) -> │ │
│ │ huggingface_hub.file_download.HfFileMetadata> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:164 │
│ 5 in get_hf_file_metadata │
│ │
│ 1642 │ headers["Accept-Encoding"] = "identity" # prevent any compressio │
│ 1643 │ │
│ 1644 │ # Retrieve metadata │
│ ❱ 1645 │ r = _request_wrapper( │
│ 1646 │ │ method="HEAD", │
│ 1647 │ │ url=url, │
│ 1648 │ │ headers=headers, │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ headers = { │ │
│ │ │ 'user-agent': 'unknown/None; hf_hub/0.23.5; │ │
│ │ python/3.11.10; torch/2.5.0.dev20240815+cpu', │ │
│ │ │ 'Accept-Encoding': 'identity' │ │
│ │ } │ │
│ │ library_name = None │ │
│ │ library_version = None │ │
│ │ proxies = None │ │
│ │ timeout = 10 │ │
│ │ token = None │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-In… │ │
│ │ user_agent = None │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:372 │
│ in _request_wrapper │
│ │
│ 369 │ """ │
│ 370 │ # Recursively follow relative redirects │
│ 371 │ if follow_relative_redirects: │
│ ❱ 372 │ │ response = _request_wrapper( │
│ 373 │ │ │ method=method, │
│ 374 │ │ │ url=url, │
│ 375 │ │ │ follow_relative_redirects=False, │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ follow_relative_redirects = True │ │
│ │ method = 'HEAD' │ │
│ │ params = { │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu', │ │
│ │ │ │ 'Accept-Encoding': 'identity' │ │
│ │ │ }, │ │
│ │ │ 'allow_redirects': False, │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10 │ │
│ │ } │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Lla… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/file_download.py:396 │
│ in _request_wrapper │
│ │
│ 393 │ │
│ 394 │ # Perform request and return if status_code is not in the retry l │
│ 395 │ response = get_session().request(method=method, url=url, **params │
│ ❱ 396 │ hf_raise_for_status(response) │
│ 397 │ return response │
│ 398 │
│ 399 │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ follow_relative_redirects = False │ │
│ │ method = 'HEAD' │ │
│ │ params = { │ │
│ │ │ 'headers': { │ │
│ │ │ │ 'user-agent': 'unknown/None; │ │
│ │ hf_hub/0.23.5; python/3.11.10; │ │
│ │ torch/2.5.0.dev20240815+cpu', │ │
│ │ │ │ 'Accept-Encoding': 'identity' │ │
│ │ │ }, │ │
│ │ │ 'allow_redirects': False, │ │
│ │ │ 'proxies': None, │ │
│ │ │ 'timeout': 10 │ │
│ │ } │ │
│ │ response = <Response [401]> │ │
│ │ url = 'https://huggingface.co/meta-llama/Meta-Lla… │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
│ │
│ /opt/conda/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py:321 │
│ in hf_raise_for_status │
│ │
│ 318 │ │ │ message = ( │
│ 319 │ │ │ │ f"{response.status_code} Client Error." + "\n\n" + f"C │
│ 320 │ │ │ ) │
│ ❱ 321 │ │ │ raise GatedRepoError(message, response) from e │
│ 322 │ │ │
│ 323 │ │ elif error_message == "Access to this resource is disabled.": │
│ 324 │ │ │ message = ( │
│ │
│ ╭───────────────────────────────── locals ─────────────────────────────────╮ │
│ │ endpoint_name = None │ │
│ │ error_code = 'GatedRepo' │ │
│ │ error_message = 'Access to model meta-llama/Meta-Llama-3-8B-Instruct is │ │
│ │ restricted. You must have'+63 │ │
│ │ message = '401 Client Error.\n\nCannot access gated repo for url │ │
│ │ https://huggingface.co/meta-'+56 │ │
│ │ response = <Response [401]> │ │
│ ╰──────────────────────────────────────────────────────────────────────────╯ │
╰──────────────────────────────────────────────────────────────────────────────╯
GatedRepoError: 401 Client Error. (Request ID:
Root=1-682fe795-1915c27221f6c1a137f72700;3c121411-6e0e-48d3-a38a-ff86ee5199e1)
Cannot access gated repo for url
https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.j
son.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must have
access to it and be authenticated to access it. Please log in.
`
Reproduce steps
cd DocSum/tests
bash test_compose_tgi_on_xeon.sh
Raw log
Attachments
No response