-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGGUF Quantization with Imatrix and K-Quantization
1 lines (1 loc) · 29.6 KB
/
GGUF Quantization with Imatrix and K-Quantization
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30762,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Introduction\n\nThis note implement the concept in https://towardsdatascience.com/gguf-quantization-with-imatrix-and-k-quantization-to-run-llms-on-your-cpu-02356b531926\n\nThis note will quantize Meta-Llama with Imatrix and K-quantization will llama.cpp\n\n# Install llama.cpp\n\n\n","metadata":{}},{"cell_type":"code","source":"!git clone https://github.com/ggerganov/llama.cpp\n%cd llama.cpp \n!export GGML_CUDA=1 make && pip install -r requirements.txt","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:37:32.357334Z","iopub.execute_input":"2024-09-14T14:37:32.358313Z","iopub.status.idle":"2024-09-14T14:38:32.143406Z","shell.execute_reply.started":"2024-09-14T14:37:32.358244Z","shell.execute_reply":"2024-09-14T14:38:32.142321Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Cloning into 'llama.cpp'...\nremote: Enumerating objects: 34161, done.\u001b[K\nremote: Counting objects: 100% (7605/7605), done.\u001b[K\nremote: Compressing objects: 100% (629/629), done.\u001b[K\nremote: Total 34161 (delta 7338), reused 7021 (delta 6972), pack-reused 26556 (from 1)\u001b[K\nReceiving objects: 100% (34161/34161), 57.84 MiB | 25.34 MiB/s, done.\nResolving deltas: 100% (24766/24766), done.\n/kaggle/working/llama.cpp\nLooking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu\nRequirement already satisfied: numpy~=1.26.4 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 1)) (1.26.4)\nRequirement already satisfied: sentencepiece~=0.2.0 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 2)) (0.2.0)\nRequirement already satisfied: transformers<5.0.0,>=4.40.1 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (4.44.0)\nCollecting gguf>=0.1.0 (from -r ./requirements/requirements-convert_legacy_llama.txt (line 4))\n Downloading gguf-0.10.0-py3-none-any.whl.metadata (3.5 kB)\nCollecting protobuf<5.0.0,>=4.21.0 (from -r ./requirements/requirements-convert_legacy_llama.txt (line 5))\n Downloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\nCollecting torch~=2.2.1 (from -r ./requirements/requirements-convert_hf_to_gguf.txt (line 3))\n Downloading https://download.pytorch.org/whl/cpu/torch-2.2.2%2Bcpu-cp310-cp310-linux_x86_64.whl (186.8 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m186.8/186.8 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.15.1)\nRequirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.24.6)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (6.0.2)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2024.5.15)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2.32.3)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.4.4)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.19.1)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (4.66.4)\nRequirement already satisfied: typing-extensions>=4.8.0 in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (4.12.2)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (1.13.2)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (3.3)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (3.1.4)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (2024.6.1)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.1.2)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (2.1.5)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.7)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2024.7.4)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (1.3.0)\nDownloading gguf-0.10.0-py3-none-any.whl (71 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: protobuf, gguf, torch\n Attempting uninstall: protobuf\n Found existing installation: protobuf 3.20.3\n Uninstalling protobuf-3.20.3:\n Successfully uninstalled protobuf-3.20.3\n Attempting uninstall: torch\n Found existing installation: torch 2.4.0\n Uninstalling torch-2.4.0:\n Successfully uninstalled torch-2.4.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\napache-beam 2.46.0 requires cloudpickle~=2.2.1, but you have cloudpickle 3.0.0 which is incompatible.\napache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.\napache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.\napache-beam 2.46.0 requires protobuf<4,>3.12.2, but you have protobuf 4.25.4 which is incompatible.\napache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 16.1.0 which is incompatible.\ngoogle-cloud-aiplatform 0.6.0a1 requires google-api-core[grpc]<2.0.0dev,>=1.22.2, but you have google-api-core 2.11.1 which is incompatible.\ngoogle-cloud-automl 1.0.1 requires google-api-core[grpc]<2.0.0dev,>=1.14.0, but you have google-api-core 2.11.1 which is incompatible.\ngoogle-cloud-bigquery 2.34.4 requires protobuf<4.0.0dev,>=3.12.0, but you have protobuf 4.25.4 which is incompatible.\ngoogle-cloud-bigtable 1.7.3 requires protobuf<4.0.0dev, but you have protobuf 4.25.4 which is incompatible.\ngoogle-cloud-vision 2.8.0 requires protobuf<4.0.0dev,>=3.19.0, but you have protobuf 4.25.4 which is incompatible.\nkfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.\nkfp 2.5.0 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.4 which is incompatible.\nkfp-pipeline-spec 0.2.2 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.4 which is incompatible.\ntensorflow-metadata 0.14.0 requires protobuf<4,>=3.7, but you have protobuf 4.25.4 which is incompatible.\ntensorflow-transform 0.14.0 requires protobuf<4,>=3.7, but you have protobuf 4.25.4 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed gguf-0.10.0 protobuf-4.25.3 torch-2.2.2+cpu\n","output_type":"stream"}]},{"cell_type":"code","source":"\n\n\n\n","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:41:46.171293Z","iopub.execute_input":"2024-09-14T14:41:46.171726Z","iopub.status.idle":"2024-09-14T14:41:47.066359Z","shell.execute_reply.started":"2024-09-14T14:41:46.171684Z","shell.execute_reply":"2024-09-14T14:41:47.065325Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: read).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Download model\n\n","metadata":{}},{"cell_type":"code","source":"!mkdir \"./quantized_model_llama-2b/\"","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:46:46.228744Z","iopub.execute_input":"2024-09-14T14:46:46.229417Z","iopub.status.idle":"2024-09-14T14:46:47.313696Z","shell.execute_reply.started":"2024-09-14T14:46:46.229371Z","shell.execute_reply":"2024-09-14T14:46:47.312518Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"import os\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\n\nos.environ[\"HF_TOKEN\"]=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\")\n\nos.environ[\"WANDB_API_KEY\"]=user_secrets.get_secret(\"WANDB_API_KEY\")\nos.environ[\"MODEL_NAME\"] = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\nos.environ[\"DATASET\"] = \"HuggingFaceH4/ultrafeedback_binarized\"\n\nlogin(os.environ[\"HF_TOKEN\"])\nfrom huggingface_hub import snapshot_download\nmodel_name = \"Meta-Llama-3.1-8B-Instruct\" # the model we want to quantize\nmethods = ['Q4_K_S','Q4_K_M'] #the methods to be used for quantization\nbase_model = \"./original_model_llama-2b/\" # where the FP16 GGUF model will be stored\nquantized_path = \"./quantized_model_llama-2b/\" #where the quantized GGUF model will be stored\n\nsnapshot_download(repo_id=model_name, local_dir=base_model , local_dir_use_symlinks=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:48:25.012775Z","iopub.execute_input":"2024-09-14T14:48:25.013483Z","iopub.status.idle":"2024-09-14T14:48:25.765127Z","shell.execute_reply.started":"2024-09-14T14:48:25.013442Z","shell.execute_reply":"2024-09-14T14:48:25.763604Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: read).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"},{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/requests/models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n","\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main","\nThe above exception was the direct cause of the following exception:\n","\u001b[0;31mRepositoryNotFoundError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[8], line 19\u001b[0m\n\u001b[1;32m 16\u001b[0m base_model \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./original_model_llama-2b/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# where the FP16 GGUF model will be stored\u001b[39;00m\n\u001b[1;32m 17\u001b[0m quantized_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./quantized_model_llama-2b/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m#where the quantized GGUF model will be stored\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m \u001b[43msnapshot_download\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir_use_symlinks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/_snapshot_download.py:238\u001b[0m, in \u001b[0;36msnapshot_download\u001b[0;34m(repo_id, repo_type, revision, cache_dir, local_dir, library_name, library_version, user_agent, proxies, etag_timeout, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class, headers, endpoint, local_dir_use_symlinks, resume_download)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LocalEntryNotFoundError(\n\u001b[1;32m 232\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot find an appropriate cached snapshot folder for the specified revision on the local disk and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutgoing traffic has been disabled. To enable repo look-ups and downloads online, set \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHF_HUB_OFFLINE=0\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m as environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 235\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mapi_call_error\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(api_call_error, RepositoryNotFoundError) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(api_call_error, GatedRepoError):\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# Repo not found => let's raise the actual error\u001b[39;00m\n\u001b[0;32m--> 238\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m api_call_error\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LocalEntryNotFoundError(\n\u001b[1;32m 242\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error happened while trying to locate the files on the Hub and we cannot find the appropriate\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m snapshot folder for the specified revision on the local disk. Please check your internet connection\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 244\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m and try again.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 245\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mapi_call_error\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/_snapshot_download.py:169\u001b[0m, in \u001b[0;36msnapshot_download\u001b[0;34m(repo_id, repo_type, revision, cache_dir, local_dir, library_name, library_version, user_agent, proxies, etag_timeout, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class, headers, endpoint, local_dir_use_symlinks, resume_download)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# if we have internet connection we want to list files to download\u001b[39;00m\n\u001b[1;32m 162\u001b[0m api \u001b[38;5;241m=\u001b[39m HfApi(\n\u001b[1;32m 163\u001b[0m library_name\u001b[38;5;241m=\u001b[39mlibrary_name,\n\u001b[1;32m 164\u001b[0m library_version\u001b[38;5;241m=\u001b[39mlibrary_version,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 167\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[1;32m 168\u001b[0m )\n\u001b[0;32m--> 169\u001b[0m repo_info \u001b[38;5;241m=\u001b[39m \u001b[43mapi\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mSSLError, requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mProxyError):\n\u001b[1;32m 171\u001b[0m \u001b[38;5;66;03m# Actually raise for those subclasses of ConnectionError\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2588\u001b[0m, in \u001b[0;36mHfApi.repo_info\u001b[0;34m(self, repo_id, revision, repo_type, timeout, files_metadata, expand, token)\u001b[0m\n\u001b[1;32m 2586\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2587\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported repo type.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2588\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2589\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2590\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2591\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2592\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2593\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpand\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpand\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 2594\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiles_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2595\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2373\u001b[0m, in \u001b[0;36mHfApi.model_info\u001b[0;34m(self, repo_id, revision, timeout, securityStatus, files_metadata, expand, token)\u001b[0m\n\u001b[1;32m 2371\u001b[0m params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexpand\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m expand\n\u001b[1;32m 2372\u001b[0m r \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mget(path, headers\u001b[38;5;241m=\u001b[39mheaders, timeout\u001b[38;5;241m=\u001b[39mtimeout, params\u001b[38;5;241m=\u001b[39mparams)\n\u001b[0;32m-> 2373\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2374\u001b[0m data \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m 2375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ModelInfo(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:352\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRepoNotFound\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 334\u001b[0m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m 335\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mrequest \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[38;5;66;03m# => for now, we process them as `RepoNotFound` anyway.\u001b[39;00m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9\u001b[39;00m\n\u001b[1;32m 344\u001b[0m message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 345\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 351\u001b[0m )\n\u001b[0;32m--> 352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RepositoryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n\u001b[1;32m 355\u001b[0m message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 356\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mBad request for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m endpoint:\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m endpoint_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mBad request:\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 357\u001b[0m )\n","\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-66e5a239-0f2b5d830d5292120b51342f;0ae91166-1b46-41a2-802d-65839ead16cb)\n\nRepository Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated."],"ename":"RepositoryNotFoundError","evalue":"404 Client Error. (Request ID: Root=1-66e5a239-0f2b5d830d5292120b51342f;0ae91166-1b46-41a2-802d-65839ead16cb)\n\nRepository Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.","output_type":"error"}]}]}