GGUF Quantization with Imatrix and K-Quantization

{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30762,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Introduction\n\nThis note implement the concept in https://towardsdatascience.com/gguf-quantization-with-imatrix-and-k-quantization-to-run-llms-on-your-cpu-02356b531926\n\nThis note will quantize Meta-Llama with Imatrix and K-quantization will llama.cpp\n\n# Install llama.cpp\n\n\n","metadata":{}},{"cell_type":"code","source":"!git clone https://github.com/ggerganov/llama.cpp\n%cd llama.cpp \n!export GGML_CUDA=1 make && pip install -r requirements.txt","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:37:32.357334Z","iopub.execute_input":"2024-09-14T14:37:32.358313Z","iopub.status.idle":"2024-09-14T14:38:32.143406Z","shell.execute_reply.started":"2024-09-14T14:37:32.358244Z","shell.execute_reply":"2024-09-14T14:38:32.142321Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Cloning into 'llama.cpp'...\nremote: Enumerating objects: 34161, done.\u001b[K\nremote: Counting objects: 100% (7605/7605), done.\u001b[K\nremote: Compressing objects: 100% (629/629), done.\u001b[K\nremote: Total 34161 (delta 7338), reused 7021 (delta 6972), pack-reused 26556 (from 1)\u001b[K\nReceiving objects: 100% (34161/34161), 57.84 MiB | 25.34 MiB/s, done.\nResolving deltas: 100% (24766/24766), done.\n/kaggle/working/llama.cpp\nLooking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu, https://download.pytorch.org/whl/cpu\nRequirement already satisfied: numpy~=1.26.4 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 1)) (1.26.4)\nRequirement already satisfied: sentencepiece~=0.2.0 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 2)) (0.2.0)\nRequirement already satisfied: transformers<5.0.0,>=4.40.1 in /opt/conda/lib/python3.10/site-packages (from -r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (4.44.0)\nCollecting gguf>=0.1.0 (from -r ./requirements/requirements-convert_legacy_llama.txt (line 4))\n  Downloading gguf-0.10.0-py3-none-any.whl.metadata (3.5 kB)\nCollecting protobuf<5.0.0,>=4.21.0 (from -r ./requirements/requirements-convert_legacy_llama.txt (line 5))\n  Downloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\nCollecting torch~=2.2.1 (from -r ./requirements/requirements-convert_hf_to_gguf.txt (line 3))\n  Downloading https://download.pytorch.org/whl/cpu/torch-2.2.2%2Bcpu-cp310-cp310-linux_x86_64.whl (186.8 MB)\n\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m186.8/186.8 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.15.1)\nRequirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.24.6)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (6.0.2)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2024.5.15)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2.32.3)\nRequirement already satisfied: safetensors>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.4.4)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (0.19.1)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (4.66.4)\nRequirement already satisfied: typing-extensions>=4.8.0 in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (4.12.2)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (1.13.2)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (3.3)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (3.1.4)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (2024.6.1)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.1.2)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (2.1.5)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (3.7)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers<5.0.0,>=4.40.1->-r ./requirements/requirements-convert_legacy_llama.txt (line 3)) (2024.7.4)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch~=2.2.1->-r ./requirements/requirements-convert_hf_to_gguf.txt (line 3)) (1.3.0)\nDownloading gguf-0.10.0-py3-none-any.whl (71 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: protobuf, gguf, torch\n  Attempting uninstall: protobuf\n    Found existing installation: protobuf 3.20.3\n    Uninstalling protobuf-3.20.3:\n      Successfully uninstalled protobuf-3.20.3\n  Attempting uninstall: torch\n    Found existing installation: torch 2.4.0\n    Uninstalling torch-2.4.0:\n      Successfully uninstalled torch-2.4.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\napache-beam 2.46.0 requires cloudpickle~=2.2.1, but you have cloudpickle 3.0.0 which is incompatible.\napache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.\napache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.\napache-beam 2.46.0 requires protobuf<4,>3.12.2, but you have protobuf 4.25.4 which is incompatible.\napache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 16.1.0 which is incompatible.\ngoogle-cloud-aiplatform 0.6.0a1 requires google-api-core[grpc]<2.0.0dev,>=1.22.2, but you have google-api-core 2.11.1 which is incompatible.\ngoogle-cloud-automl 1.0.1 requires google-api-core[grpc]<2.0.0dev,>=1.14.0, but you have google-api-core 2.11.1 which is incompatible.\ngoogle-cloud-bigquery 2.34.4 requires protobuf<4.0.0dev,>=3.12.0, but you have protobuf 4.25.4 which is incompatible.\ngoogle-cloud-bigtable 1.7.3 requires protobuf<4.0.0dev, but you have protobuf 4.25.4 which is incompatible.\ngoogle-cloud-vision 2.8.0 requires protobuf<4.0.0dev,>=3.19.0, but you have protobuf 4.25.4 which is incompatible.\nkfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.\nkfp 2.5.0 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.4 which is incompatible.\nkfp-pipeline-spec 0.2.2 requires protobuf<4,>=3.13.0, but you have protobuf 4.25.4 which is incompatible.\ntensorflow-metadata 0.14.0 requires protobuf<4,>=3.7, but you have protobuf 4.25.4 which is incompatible.\ntensorflow-transform 0.14.0 requires protobuf<4,>=3.7, but you have protobuf 4.25.4 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed gguf-0.10.0 protobuf-4.25.3 torch-2.2.2+cpu\n","output_type":"stream"}]},{"cell_type":"code","source":"\n\n\n\n","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:41:46.171293Z","iopub.execute_input":"2024-09-14T14:41:46.171726Z","iopub.status.idle":"2024-09-14T14:41:47.066359Z","shell.execute_reply.started":"2024-09-14T14:41:46.171684Z","shell.execute_reply":"2024-09-14T14:41:47.065325Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: read).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Download model\n\n","metadata":{}},{"cell_type":"code","source":"!mkdir \"./quantized_model_llama-2b/\"","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:46:46.228744Z","iopub.execute_input":"2024-09-14T14:46:46.229417Z","iopub.status.idle":"2024-09-14T14:46:47.313696Z","shell.execute_reply.started":"2024-09-14T14:46:46.229371Z","shell.execute_reply":"2024-09-14T14:46:47.312518Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"import os\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\n\nos.environ[\"HF_TOKEN\"]=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\")\n\nos.environ[\"WANDB_API_KEY\"]=user_secrets.get_secret(\"WANDB_API_KEY\")\nos.environ[\"MODEL_NAME\"] = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\nos.environ[\"DATASET\"] = \"HuggingFaceH4/ultrafeedback_binarized\"\n\nlogin(os.environ[\"HF_TOKEN\"])\nfrom huggingface_hub import snapshot_download\nmodel_name = \"Meta-Llama-3.1-8B-Instruct\" # the model we want to quantize\nmethods = ['Q4_K_S','Q4_K_M'] #the methods to be used for quantization\nbase_model = \"./original_model_llama-2b/\" # where the FP16 GGUF model will be stored\nquantized_path = \"./quantized_model_llama-2b/\" #where the quantized GGUF model will be stored\n\nsnapshot_download(repo_id=model_name, local_dir=base_model , local_dir_use_symlinks=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-14T14:48:25.012775Z","iopub.execute_input":"2024-09-14T14:48:25.013483Z","iopub.status.idle":"2024-09-14T14:48:25.765127Z","shell.execute_reply.started":"2024-09-14T14:48:25.013442Z","shell.execute_reply":"2024-09-14T14:48:25.763604Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: read).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"},{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/requests/models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1024\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n","\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main","\nThe above exception was the direct cause of the following exception:\n","\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)","Cell \u001b[0;32mIn[8], line 19\u001b[0m\n\u001b[1;32m     16\u001b[0m base_model \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./original_model_llama-2b/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# where the FP16 GGUF model will be stored\u001b[39;00m\n\u001b[1;32m     17\u001b[0m quantized_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./quantized_model_llama-2b/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m#where the quantized GGUF model will be stored\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m \u001b[43msnapshot_download\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m \u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir_use_symlinks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/_snapshot_download.py:238\u001b[0m, in \u001b[0;36msnapshot_download\u001b[0;34m(repo_id, repo_type, revision, cache_dir, local_dir, library_name, library_version, user_agent, proxies, etag_timeout, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class, headers, endpoint, local_dir_use_symlinks, resume_download)\u001b[0m\n\u001b[1;32m    231\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m LocalEntryNotFoundError(\n\u001b[1;32m    232\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot find an appropriate cached snapshot folder for the specified revision on the local disk and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    233\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutgoing traffic has been disabled. To enable repo look-ups and downloads online, set \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    234\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHF_HUB_OFFLINE=0\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m as environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    235\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mapi_call_error\u001b[39;00m\n\u001b[1;32m    236\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(api_call_error, RepositoryNotFoundError) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(api_call_error, GatedRepoError):\n\u001b[1;32m    237\u001b[0m     \u001b[38;5;66;03m# Repo not found => let's raise the actual error\u001b[39;00m\n\u001b[0;32m--> 238\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m api_call_error\n\u001b[1;32m    239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    240\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n\u001b[1;32m    241\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m LocalEntryNotFoundError(\n\u001b[1;32m    242\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error happened while trying to locate the files on the Hub and we cannot find the appropriate\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    243\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m snapshot folder for the specified revision on the local disk. Please check your internet connection\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    244\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m and try again.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    245\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mapi_call_error\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/_snapshot_download.py:169\u001b[0m, in \u001b[0;36msnapshot_download\u001b[0;34m(repo_id, repo_type, revision, cache_dir, local_dir, library_name, library_version, user_agent, proxies, etag_timeout, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class, headers, endpoint, local_dir_use_symlinks, resume_download)\u001b[0m\n\u001b[1;32m    160\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    161\u001b[0m     \u001b[38;5;66;03m# if we have internet connection we want to list files to download\u001b[39;00m\n\u001b[1;32m    162\u001b[0m     api \u001b[38;5;241m=\u001b[39m HfApi(\n\u001b[1;32m    163\u001b[0m         library_name\u001b[38;5;241m=\u001b[39mlibrary_name,\n\u001b[1;32m    164\u001b[0m         library_version\u001b[38;5;241m=\u001b[39mlibrary_version,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    167\u001b[0m         headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[1;32m    168\u001b[0m     )\n\u001b[0;32m--> 169\u001b[0m     repo_info \u001b[38;5;241m=\u001b[39m \u001b[43mapi\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    170\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mSSLError, requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mProxyError):\n\u001b[1;32m    171\u001b[0m     \u001b[38;5;66;03m# Actually raise for those subclasses of ConnectionError\u001b[39;00m\n\u001b[1;32m    172\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2588\u001b[0m, in \u001b[0;36mHfApi.repo_info\u001b[0;34m(self, repo_id, revision, repo_type, timeout, files_metadata, expand, token)\u001b[0m\n\u001b[1;32m   2586\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   2587\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported repo type.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2588\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2589\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2590\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2591\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2592\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2593\u001b[0m \u001b[43m    \u001b[49m\u001b[43mexpand\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexpand\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m   2594\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfiles_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfiles_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2595\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/hf_api.py:2373\u001b[0m, in \u001b[0;36mHfApi.model_info\u001b[0;34m(self, repo_id, revision, timeout, securityStatus, files_metadata, expand, token)\u001b[0m\n\u001b[1;32m   2371\u001b[0m     params[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexpand\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m expand\n\u001b[1;32m   2372\u001b[0m r \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mget(path, headers\u001b[38;5;241m=\u001b[39mheaders, timeout\u001b[38;5;241m=\u001b[39mtimeout, params\u001b[38;5;241m=\u001b[39mparams)\n\u001b[0;32m-> 2373\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2374\u001b[0m data \u001b[38;5;241m=\u001b[39m r\u001b[38;5;241m.\u001b[39mjson()\n\u001b[1;32m   2375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ModelInfo(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:352\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    333\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRepoNotFound\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m    334\u001b[0m     response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m    335\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mrequest \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    342\u001b[0m     \u001b[38;5;66;03m# => for now, we process them as `RepoNotFound` anyway.\u001b[39;00m\n\u001b[1;32m    343\u001b[0m     \u001b[38;5;66;03m# See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9\u001b[39;00m\n\u001b[1;32m    344\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    345\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    346\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    350\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    351\u001b[0m     )\n\u001b[0;32m--> 352\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m RepositoryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n\u001b[1;32m    355\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    356\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mBad request for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m endpoint:\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m endpoint_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mBad request:\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    357\u001b[0m     )\n","\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-66e5a239-0f2b5d830d5292120b51342f;0ae91166-1b46-41a2-802d-65839ead16cb)\n\nRepository Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated."],"ename":"RepositoryNotFoundError","evalue":"404 Client Error. (Request ID: Root=1-66e5a239-0f2b5d830d5292120b51342f;0ae91166-1b46-41a2-802d-65839ead16cb)\n\nRepository Not Found for url: https://huggingface.co/api/models/Meta-Llama-3.1-8B-Instruct/revision/main.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.","output_type":"error"}]}]}