-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEvaluating OLMoE
1 lines (1 loc) · 56.6 KB
/
Evaluating OLMoE
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30762,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Overview\n\n","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"markdown","source":"This is a sample note about how to evaluate language model which is directly copied from Aisuko's note \"Evaluating language models \"","metadata":{}},{"cell_type":"code","source":"# !pip install -U -q lm-eval==0.4.3","metadata":{"execution":{"iopub.status.busy":"2024-09-08T06:03:51.584992Z","iopub.execute_input":"2024-09-08T06:03:51.585980Z","iopub.status.idle":"2024-09-08T06:03:51.592349Z","shell.execute_reply.started":"2024-09-08T06:03:51.585938Z","shell.execute_reply":"2024-09-08T06:03:51.589193Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"!git clone https://github.com/EleutherAI/lm-evaluation-harness\n%cd lm-evaluation-harness\n!pip install -e .","metadata":{"execution":{"iopub.status.busy":"2024-09-08T06:03:51.594088Z","iopub.execute_input":"2024-09-08T06:03:51.594421Z","iopub.status.idle":"2024-09-08T06:04:27.631003Z","shell.execute_reply.started":"2024-09-08T06:03:51.594388Z","shell.execute_reply":"2024-09-08T06:04:27.630023Z"},"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"Cloning into 'lm-evaluation-harness'...\nremote: Enumerating objects: 40293, done.\u001b[K\nremote: Counting objects: 100% (622/622), done.\u001b[K\nremote: Compressing objects: 100% (398/398), done.\u001b[K\nremote: Total 40293 (delta 324), reused 486 (delta 223), pack-reused 39671 (from 1)\u001b[K\nReceiving objects: 100% (40293/40293), 27.00 MiB | 20.43 MiB/s, done.\nResolving deltas: 100% (28238/28238), done.\n/kaggle/working/lm-evaluation-harness/lm-evaluation-harness/lm-evaluation-harness\nObtaining file:///kaggle/working/lm-evaluation-harness/lm-evaluation-harness/lm-evaluation-harness\n Installing build dependencies ... \u001b[?25ldone\n\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: accelerate>=0.26.0 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.33.0)\nRequirement already satisfied: evaluate in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.4.2)\nRequirement already satisfied: datasets>=2.16.0 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.21.0)\nRequirement already satisfied: jsonlines in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (4.0.0)\nRequirement already satisfied: numexpr in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.10.1)\nRequirement already satisfied: peft>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.12.0)\nRequirement already satisfied: pybind11>=2.6.2 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.13.4)\nRequirement already satisfied: pytablewriter in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (1.2.0)\nRequirement already satisfied: rouge-score>=0.0.4 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.1.2)\nRequirement already satisfied: sacrebleu>=1.5.0 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.4.3)\nRequirement already satisfied: scikit-learn>=0.24.1 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (1.2.2)\nRequirement already satisfied: sqlitedict in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.1.0)\nRequirement already satisfied: torch>=1.8 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (2.4.0)\nRequirement already satisfied: tqdm-multiprocess in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.0.11)\nRequirement already satisfied: transformers>=4.1 in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (4.44.0)\nRequirement already satisfied: zstandard in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.23.0)\nRequirement already satisfied: dill in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (0.3.8)\nRequirement already satisfied: word2number in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (1.1)\nRequirement already satisfied: more-itertools in /opt/conda/lib/python3.10/site-packages (from lm_eval==0.4.4) (10.3.0)\nRequirement already satisfied: numpy<2.0.0,>=1.17 in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (1.26.4)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (21.3)\nRequirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (5.9.3)\nRequirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (6.0.2)\nRequirement already satisfied: huggingface-hub>=0.21.0 in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (0.24.6)\nRequirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from accelerate>=0.26.0->lm_eval==0.4.4) (0.4.4)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (3.15.1)\nRequirement already satisfied: pyarrow>=15.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (16.1.0)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (2.2.2)\nRequirement already satisfied: requests>=2.32.2 in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (2.32.3)\nRequirement already satisfied: tqdm>=4.66.3 in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (4.66.4)\nRequirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (3.4.1)\nRequirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (0.70.16)\nRequirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets>=2.16.0->lm_eval==0.4.4) (2024.6.1)\nRequirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets>=2.16.0->lm_eval==0.4.4) (3.9.5)\nRequirement already satisfied: absl-py in /opt/conda/lib/python3.10/site-packages (from rouge-score>=0.0.4->lm_eval==0.4.4) (1.4.0)\nRequirement already satisfied: nltk in /opt/conda/lib/python3.10/site-packages (from rouge-score>=0.0.4->lm_eval==0.4.4) (3.2.4)\nRequirement already satisfied: six>=1.14.0 in /opt/conda/lib/python3.10/site-packages (from rouge-score>=0.0.4->lm_eval==0.4.4) (1.16.0)\nRequirement already satisfied: portalocker in /opt/conda/lib/python3.10/site-packages (from sacrebleu>=1.5.0->lm_eval==0.4.4) (2.10.1)\nRequirement already satisfied: regex in /opt/conda/lib/python3.10/site-packages (from sacrebleu>=1.5.0->lm_eval==0.4.4) (2024.5.15)\nRequirement already satisfied: tabulate>=0.8.9 in /opt/conda/lib/python3.10/site-packages (from sacrebleu>=1.5.0->lm_eval==0.4.4) (0.9.0)\nRequirement already satisfied: colorama in /opt/conda/lib/python3.10/site-packages (from sacrebleu>=1.5.0->lm_eval==0.4.4) (0.4.6)\nRequirement already satisfied: lxml in /opt/conda/lib/python3.10/site-packages (from sacrebleu>=1.5.0->lm_eval==0.4.4) (5.3.0)\nRequirement already satisfied: scipy>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.1->lm_eval==0.4.4) (1.14.0)\nRequirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.1->lm_eval==0.4.4) (1.4.2)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.1->lm_eval==0.4.4) (3.5.0)\nRequirement already satisfied: typing-extensions>=4.8.0 in /opt/conda/lib/python3.10/site-packages (from torch>=1.8->lm_eval==0.4.4) (4.12.2)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.8->lm_eval==0.4.4) (1.13.2)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.8->lm_eval==0.4.4) (3.3)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.8->lm_eval==0.4.4) (3.1.4)\nRequirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.10/site-packages (from transformers>=4.1->lm_eval==0.4.4) (0.19.1)\nRequirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonlines->lm_eval==0.4.4) (23.2.0)\nRequirement already satisfied: setuptools>=38.3.0 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (70.0.0)\nRequirement already satisfied: DataProperty<2,>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (1.0.1)\nRequirement already satisfied: mbstrdecoder<2,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (1.1.3)\nRequirement already satisfied: pathvalidate<4,>=2.3.0 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (3.2.1)\nRequirement already satisfied: tabledata<2,>=1.3.1 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (1.3.3)\nRequirement already satisfied: tcolorpy<1,>=0.0.5 in /opt/conda/lib/python3.10/site-packages (from pytablewriter->lm_eval==0.4.4) (0.1.6)\nRequirement already satisfied: typepy<2,>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval==0.4.4) (1.3.2)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->lm_eval==0.4.4) (1.3.1)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->lm_eval==0.4.4) (1.4.1)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->lm_eval==0.4.4) (6.0.5)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->lm_eval==0.4.4) (1.9.4)\nRequirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets>=2.16.0->lm_eval==0.4.4) (4.0.3)\nRequirement already satisfied: chardet<6,>=3.0.4 in /opt/conda/lib/python3.10/site-packages (from mbstrdecoder<2,>=1.0.0->pytablewriter->lm_eval==0.4.4) (5.2.0)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->accelerate>=0.26.0->lm_eval==0.4.4) (3.1.2)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval==0.4.4) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval==0.4.4) (3.7)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval==0.4.4) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval==0.4.4) (2024.7.4)\nRequirement already satisfied: python-dateutil<3.0.0,>=2.8.0 in /opt/conda/lib/python3.10/site-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval==0.4.4) (2.9.0.post0)\nRequirement already satisfied: pytz>=2018.9 in /opt/conda/lib/python3.10/site-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval==0.4.4) (2024.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.8->lm_eval==0.4.4) (2.1.5)\nRequirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets>=2.16.0->lm_eval==0.4.4) (2024.1)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.8->lm_eval==0.4.4) (1.3.0)\nBuilding wheels for collected packages: lm_eval\n Building editable for lm_eval (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25h Created wheel for lm_eval: filename=lm_eval-0.4.4-0.editable-py3-none-any.whl size=18641 sha256=5246109c5e0329e8ccdd1612b726ac0d6b6ee5aca4e63b8cecf24f483382c570\n Stored in directory: /tmp/pip-ephem-wheel-cache-fcnqygfh/wheels/5f/e6/00/18b2187ff3e205506d570b04374442637ba58335d6028891eb\nSuccessfully built lm_eval\nInstalling collected packages: lm_eval\n Attempting uninstall: lm_eval\n Found existing installation: lm_eval 0.4.4\n Uninstalling lm_eval-0.4.4:\n Successfully uninstalled lm_eval-0.4.4\nSuccessfully installed lm_eval-0.4.4\n","output_type":"stream"}]},{"cell_type":"code","source":"import os\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\n\nos.environ[\"HF_TOKEN\"]=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\")\n\nos.environ[\"WANDB_API_KEY\"]=user_secrets.get_secret(\"WANDB_API_KEY\")\nos.environ[\"WANDB_PROJECT\"] = \"Evaluating allenai OLMo-1B-0724-hf\"\nos.environ[\"WANDB_NAME\"] = \"eva-OLMo-1B-0724-hf\"\nos.environ[\"MODEL_NAME\"] = \"allenai/OLMo-1B-0724-hf\"\nos.environ[\"DATASET\"] = \"HuggingFaceH4/ultrafeedback_binarized\"\n\nlogin(os.environ[\"HF_TOKEN\"])","metadata":{"execution":{"iopub.status.busy":"2024-09-08T06:04:27.632470Z","iopub.execute_input":"2024-09-08T06:04:27.632803Z","iopub.status.idle":"2024-09-08T06:04:28.023038Z","shell.execute_reply.started":"2024-09-08T06:04:27.632768Z","shell.execute_reply":"2024-09-08T06:04:28.022149Z"},"trusted":true},"execution_count":14,"outputs":[{"name":"stdout","text":"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\nToken is valid (permission: read).\nYour token has been saved to /root/.cache/huggingface/token\nLogin successful\n","output_type":"stream"}]},{"cell_type":"code","source":"# !lm_eval --tasks list","metadata":{"_kg_hide-input":true,"execution":{"iopub.status.busy":"2024-09-08T06:04:28.026499Z","iopub.execute_input":"2024-09-08T06:04:28.026814Z","iopub.status.idle":"2024-09-08T06:04:28.030815Z","shell.execute_reply.started":"2024-09-08T06:04:28.026780Z","shell.execute_reply":"2024-09-08T06:04:28.029829Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"!lm_eval --model hf \\\n --model_args pretrained=${MODEL_NAME} \\\n --tasks mmlu \\\n --device cuda:0 \\\n --num_fewshot 0 \\\n --batch_size 4 \\\n --output_path results \\\n --use_cache True\\\n --log_samples \\\n --limit 10 \\\n# --hf_hub_log_args hub_results_org=aisuko,hub_repo_name=eval-smolLM-135M,push_results_to_hub=True,push_samples_to_hub=True,public_repo=False","metadata":{"execution":{"iopub.status.busy":"2024-09-08T06:04:28.032151Z","iopub.execute_input":"2024-09-08T06:04:28.032612Z","iopub.status.idle":"2024-09-08T06:07:32.910996Z","shell.execute_reply.started":"2024-09-08T06:04:28.032568Z","shell.execute_reply":"2024-09-08T06:07:32.910022Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"config.json: 100%|█████████████████████████████| 609/609 [00:00<00:00, 3.07MB/s]\ntokenizer_config.json: 100%|███████████████| 5.37k/5.37k [00:00<00:00, 31.0MB/s]\ntokenizer.json: 100%|██████████████████████| 2.12M/2.12M [00:00<00:00, 27.8MB/s]\nspecial_tokens_map.json: 100%|████████████████| 65.0/65.0 [00:00<00:00, 509kB/s]\nmodel.safetensors.index.json: 100%|████████| 9.25k/9.25k [00:00<00:00, 40.0MB/s]\nDownloading shards: 0%| | 0/2 [00:00<?, ?it/s]\nmodel-00001-of-00002.safetensors: 0%| | 0.00/4.71G [00:00<?, ?B/s]\u001b[A\nmodel-00001-of-00002.safetensors: 0%| | 10.5M/4.71G [00:00<00:55, 84.8MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 1%| | 31.5M/4.71G [00:00<00:33, 140MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 1%| | 52.4M/4.71G [00:00<00:28, 164MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 2%| | 73.4M/4.71G [00:00<00:26, 177MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 2%| | 94.4M/4.71G [00:00<00:25, 184MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 2%|▏ | 115M/4.71G [00:00<00:24, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 3%|▏ | 136M/4.71G [00:00<00:24, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 3%|▏ | 157M/4.71G [00:00<00:23, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 4%|▏ | 178M/4.71G [00:00<00:23, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 4%|▎ | 199M/4.71G [00:01<00:23, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 5%|▎ | 220M/4.71G [00:01<00:23, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 5%|▎ | 241M/4.71G [00:01<00:23, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 6%|▎ | 262M/4.71G [00:01<00:23, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 6%|▎ | 283M/4.71G [00:01<00:23, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 6%|▍ | 304M/4.71G [00:01<00:23, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 7%|▍ | 325M/4.71G [00:01<00:23, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 7%|▍ | 346M/4.71G [00:01<00:23, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 8%|▍ | 367M/4.71G [00:01<00:22, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 8%|▍ | 388M/4.71G [00:02<00:22, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 9%|▌ | 409M/4.71G [00:02<00:22, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 9%|▌ | 430M/4.71G [00:02<00:22, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 10%|▌ | 451M/4.71G [00:02<00:22, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 10%|▌ | 472M/4.71G [00:02<00:22, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 10%|▋ | 493M/4.71G [00:02<00:22, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 11%|▋ | 514M/4.71G [00:02<00:22, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 11%|▋ | 535M/4.71G [00:02<00:22, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 12%|▋ | 556M/4.71G [00:02<00:22, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 12%|▋ | 577M/4.71G [00:03<00:22, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 13%|▊ | 598M/4.71G [00:03<00:21, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 13%|▊ | 619M/4.71G [00:03<00:21, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 14%|▊ | 640M/4.71G [00:03<00:21, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 14%|▊ | 661M/4.71G [00:03<00:21, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 14%|▊ | 682M/4.71G [00:03<00:21, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 15%|▉ | 703M/4.71G [00:03<00:21, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 15%|▉ | 724M/4.71G [00:03<00:21, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 16%|▉ | 744M/4.71G [00:03<00:20, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 16%|▉ | 765M/4.71G [00:04<00:20, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 17%|█ | 786M/4.71G [00:04<00:20, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 17%|█ | 807M/4.71G [00:04<00:20, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 18%|█ | 828M/4.71G [00:04<00:20, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 18%|█ | 849M/4.71G [00:04<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 18%|█ | 870M/4.71G [00:04<00:19, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 19%|█▏ | 891M/4.71G [00:04<00:19, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 19%|█▏ | 912M/4.71G [00:04<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 20%|█▏ | 933M/4.71G [00:04<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 20%|█▏ | 954M/4.71G [00:05<00:19, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 21%|█▏ | 975M/4.71G [00:05<00:19, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 21%|█▎ | 996M/4.71G [00:05<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 22%|█ | 1.02G/4.71G [00:05<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 22%|█ | 1.04G/4.71G [00:05<00:19, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 22%|█ | 1.06G/4.71G [00:05<00:18, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 23%|█▏ | 1.08G/4.71G [00:05<00:18, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 23%|█▏ | 1.10G/4.71G [00:05<00:18, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 24%|█▏ | 1.12G/4.71G [00:05<00:18, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 24%|█▏ | 1.14G/4.71G [00:06<00:18, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 25%|█▏ | 1.16G/4.71G [00:06<00:18, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 25%|█▎ | 1.18G/4.71G [00:06<00:18, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 26%|█▎ | 1.21G/4.71G [00:06<00:18, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 26%|█▎ | 1.23G/4.71G [00:06<00:18, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 27%|█▎ | 1.25G/4.71G [00:06<00:18, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 27%|█▎ | 1.27G/4.71G [00:06<00:17, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 27%|█▎ | 1.29G/4.71G [00:06<00:17, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 28%|█▍ | 1.31G/4.71G [00:06<00:17, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 28%|█▍ | 1.33G/4.71G [00:07<00:17, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 29%|█▍ | 1.35G/4.71G [00:07<00:17, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 29%|█▍ | 1.37G/4.71G [00:07<00:17, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 30%|█▍ | 1.39G/4.71G [00:07<00:17, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 30%|█▌ | 1.42G/4.71G [00:07<00:17, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 31%|█▌ | 1.44G/4.71G [00:07<00:17, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 31%|█▌ | 1.46G/4.71G [00:07<00:16, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 31%|█▌ | 1.48G/4.71G [00:07<00:16, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 32%|█▌ | 1.50G/4.71G [00:07<00:16, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 32%|█▌ | 1.52G/4.71G [00:08<00:16, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 33%|█▋ | 1.54G/4.71G [00:08<00:16, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 33%|█▋ | 1.56G/4.71G [00:08<00:16, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 34%|█▋ | 1.58G/4.71G [00:08<00:16, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 34%|█▋ | 1.60G/4.71G [00:08<00:16, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 35%|█▋ | 1.63G/4.71G [00:08<00:16, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 35%|█▋ | 1.65G/4.71G [00:08<00:15, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 35%|█▊ | 1.67G/4.71G [00:08<00:15, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 36%|█▊ | 1.69G/4.71G [00:08<00:15, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 36%|█▊ | 1.71G/4.71G [00:08<00:15, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 37%|█▊ | 1.73G/4.71G [00:09<00:15, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 37%|█▊ | 1.75G/4.71G [00:09<00:15, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 38%|█▉ | 1.77G/4.71G [00:09<00:15, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 38%|█▉ | 1.79G/4.71G [00:09<00:15, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 39%|█▉ | 1.81G/4.71G [00:09<00:15, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 39%|█▉ | 1.84G/4.71G [00:09<00:15, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 39%|█▉ | 1.86G/4.71G [00:09<00:15, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 40%|█▉ | 1.88G/4.71G [00:09<00:14, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 40%|██ | 1.90G/4.71G [00:09<00:14, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 41%|██ | 1.92G/4.71G [00:10<00:14, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 41%|██ | 1.94G/4.71G [00:10<00:14, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 42%|██ | 1.96G/4.71G [00:10<00:14, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 42%|██ | 1.98G/4.71G [00:10<00:14, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 43%|██▏ | 2.00G/4.71G [00:10<00:14, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 43%|██▏ | 2.02G/4.71G [00:10<00:13, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 43%|██▏ | 2.04G/4.71G [00:10<00:14, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 44%|██▏ | 2.07G/4.71G [00:10<00:13, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 44%|██▏ | 2.09G/4.71G [00:10<00:13, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 45%|██▏ | 2.11G/4.71G [00:11<00:13, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 45%|██▎ | 2.13G/4.71G [00:11<00:13, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 46%|██▎ | 2.15G/4.71G [00:11<00:13, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 46%|██▎ | 2.17G/4.71G [00:11<00:13, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 47%|██▎ | 2.19G/4.71G [00:11<00:13, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 47%|██▎ | 2.21G/4.71G [00:11<00:12, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 47%|██▎ | 2.23G/4.71G [00:11<00:12, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 48%|██▍ | 2.25G/4.71G [00:11<00:12, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 48%|██▍ | 2.28G/4.71G [00:11<00:12, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 49%|██▍ | 2.30G/4.71G [00:12<00:12, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 49%|██▍ | 2.32G/4.71G [00:12<00:12, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 50%|██▍ | 2.34G/4.71G [00:12<00:12, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 50%|██▌ | 2.36G/4.71G [00:12<00:12, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 51%|██▌ | 2.38G/4.71G [00:12<00:12, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 51%|██▌ | 2.40G/4.71G [00:12<00:12, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 51%|██▌ | 2.42G/4.71G [00:12<00:11, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 52%|██▌ | 2.44G/4.71G [00:12<00:11, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 52%|██▌ | 2.46G/4.71G [00:12<00:11, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 53%|██▋ | 2.49G/4.71G [00:13<00:11, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 53%|██▋ | 2.51G/4.71G [00:13<00:11, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 54%|██▋ | 2.53G/4.71G [00:13<00:11, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 54%|██▋ | 2.55G/4.71G [00:13<00:11, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 55%|██▋ | 2.57G/4.71G [00:13<00:11, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 55%|██▊ | 2.59G/4.71G [00:13<00:17, 119MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 55%|██▊ | 2.61G/4.71G [00:13<00:15, 134MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 56%|██▊ | 2.63G/4.71G [00:14<00:14, 148MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 56%|██▊ | 2.65G/4.71G [00:14<00:12, 158MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 57%|██▊ | 2.67G/4.71G [00:14<00:12, 168MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 57%|██▊ | 2.69G/4.71G [00:14<00:11, 176MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 58%|██▉ | 2.72G/4.71G [00:14<00:10, 182MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 58%|██▉ | 2.74G/4.71G [00:14<00:10, 186MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 59%|██▉ | 2.76G/4.71G [00:14<00:10, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 59%|██▉ | 2.78G/4.71G [00:14<00:10, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 59%|██▉ | 2.80G/4.71G [00:14<00:10, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 60%|██▉ | 2.82G/4.71G [00:15<00:09, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 60%|███ | 2.84G/4.71G [00:15<00:09, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 61%|███ | 2.86G/4.71G [00:15<00:09, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 61%|███ | 2.88G/4.71G [00:15<00:09, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 62%|███ | 2.90G/4.71G [00:15<00:09, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 62%|███ | 2.93G/4.71G [00:15<00:09, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 63%|███▏ | 2.95G/4.71G [00:15<00:09, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 63%|███▏ | 2.97G/4.71G [00:15<00:08, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 63%|███▏ | 2.99G/4.71G [00:15<00:08, 195MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 64%|███▏ | 3.01G/4.71G [00:15<00:08, 196MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 64%|███▏ | 3.03G/4.71G [00:16<00:08, 196MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 65%|███▏ | 3.05G/4.71G [00:16<00:08, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 65%|███▎ | 3.07G/4.71G [00:16<00:08, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 66%|███▎ | 3.09G/4.71G [00:16<00:08, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 66%|███▎ | 3.11G/4.71G [00:16<00:08, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 67%|███▎ | 3.14G/4.71G [00:16<00:08, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 67%|███▎ | 3.16G/4.71G [00:16<00:08, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 67%|███▎ | 3.18G/4.71G [00:16<00:08, 183MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 68%|███▍ | 3.20G/4.71G [00:17<00:08, 183MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 68%|███▍ | 3.22G/4.71G [00:17<00:08, 183MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 69%|███▍ | 3.24G/4.71G [00:17<00:08, 183MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 69%|███▍ | 3.26G/4.71G [00:17<00:08, 180MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 70%|███▍ | 3.28G/4.71G [00:17<00:07, 182MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 70%|███▌ | 3.30G/4.71G [00:17<00:07, 182MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 71%|███▌ | 3.32G/4.71G [00:17<00:07, 180MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 71%|███▌ | 3.34G/4.71G [00:17<00:07, 182MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 72%|███▌ | 3.37G/4.71G [00:17<00:07, 180MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 72%|███▌ | 3.39G/4.71G [00:18<00:07, 183MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 72%|███▌ | 3.41G/4.71G [00:18<00:07, 184MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 73%|███▋ | 3.43G/4.71G [00:18<00:06, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 73%|███▋ | 3.45G/4.71G [00:18<00:06, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 74%|███▋ | 3.47G/4.71G [00:18<00:06, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 74%|███▋ | 3.49G/4.71G [00:18<00:06, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 75%|███▋ | 3.51G/4.71G [00:18<00:06, 186MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 75%|███▊ | 3.53G/4.71G [00:18<00:06, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 76%|███▊ | 3.55G/4.71G [00:18<00:06, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 76%|███▊ | 3.58G/4.71G [00:19<00:06, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 76%|███▊ | 3.60G/4.71G [00:19<00:05, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 77%|███▊ | 3.62G/4.71G [00:19<00:05, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 77%|███▊ | 3.64G/4.71G [00:19<00:05, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 78%|███▉ | 3.66G/4.71G [00:19<00:05, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 78%|███▉ | 3.68G/4.71G [00:19<00:05, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 79%|███▉ | 3.70G/4.71G [00:19<00:05, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 79%|███▉ | 3.72G/4.71G [00:19<00:05, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 80%|███▉ | 3.74G/4.71G [00:19<00:05, 185MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 80%|███▉ | 3.76G/4.71G [00:20<00:05, 186MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 80%|████ | 3.79G/4.71G [00:20<00:04, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 81%|████ | 3.81G/4.71G [00:20<00:04, 186MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 81%|████ | 3.83G/4.71G [00:20<00:04, 187MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 82%|████ | 3.85G/4.71G [00:20<00:04, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 82%|████ | 3.87G/4.71G [00:20<00:04, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 83%|████▏| 3.89G/4.71G [00:20<00:04, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 83%|████▏| 3.91G/4.71G [00:20<00:04, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 84%|████▏| 3.93G/4.71G [00:20<00:04, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 84%|████▏| 3.95G/4.71G [00:21<00:03, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 84%|████▏| 3.97G/4.71G [00:21<00:03, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 85%|████▏| 4.00G/4.71G [00:21<00:03, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 85%|████▎| 4.02G/4.71G [00:21<00:03, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 86%|████▎| 4.04G/4.71G [00:21<00:03, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 86%|████▎| 4.06G/4.71G [00:21<00:03, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 87%|████▎| 4.08G/4.71G [00:21<00:03, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 87%|████▎| 4.10G/4.71G [00:21<00:03, 192MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 88%|████▍| 4.12G/4.71G [00:21<00:03, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 88%|████▍| 4.14G/4.71G [00:22<00:02, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 88%|████▍| 4.16G/4.71G [00:22<00:02, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 89%|████▍| 4.18G/4.71G [00:22<00:02, 195MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 89%|████▍| 4.20G/4.71G [00:22<00:02, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 90%|████▍| 4.23G/4.71G [00:22<00:02, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 90%|████▌| 4.25G/4.71G [00:22<00:02, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 91%|████▌| 4.27G/4.71G [00:22<00:02, 195MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 91%|████▌| 4.29G/4.71G [00:22<00:02, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 92%|████▌| 4.31G/4.71G [00:22<00:02, 194MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 92%|████▌| 4.33G/4.71G [00:23<00:01, 195MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 92%|████▌| 4.35G/4.71G [00:23<00:01, 193MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 93%|████▋| 4.37G/4.71G [00:23<00:01, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 93%|████▋| 4.39G/4.71G [00:23<00:01, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 94%|████▋| 4.41G/4.71G [00:23<00:01, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 94%|████▋| 4.44G/4.71G [00:23<00:01, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 95%|████▋| 4.46G/4.71G [00:23<00:01, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 95%|████▊| 4.48G/4.71G [00:23<00:01, 189MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 96%|████▊| 4.50G/4.71G [00:23<00:01, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 96%|████▊| 4.52G/4.71G [00:23<00:00, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 96%|████▊| 4.54G/4.71G [00:24<00:00, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 97%|████▊| 4.56G/4.71G [00:24<00:00, 191MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 97%|████▊| 4.58G/4.71G [00:24<00:00, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 98%|████▉| 4.60G/4.71G [00:24<00:00, 190MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 98%|████▉| 4.62G/4.71G [00:24<00:00, 188MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 99%|████▉| 4.65G/4.71G [00:24<00:00, 186MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 99%|████▉| 4.67G/4.71G [00:25<00:00, 117MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 100%|████▉| 4.69G/4.71G [00:25<00:00, 131MB/s]\u001b[A\nmodel-00001-of-00002.safetensors: 100%|█████| 4.71G/4.71G [00:25<00:00, 187MB/s]\u001b[A\nDownloading shards: 50%|████████████▌ | 1/2 [00:25<00:25, 25.44s/it]\nmodel-00002-of-00002.safetensors: 0%| | 0.00/412M [00:00<?, ?B/s]\u001b[A\nmodel-00002-of-00002.safetensors: 3%|▏ | 10.5M/412M [00:00<00:08, 47.2MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 8%|▍ | 31.5M/412M [00:00<00:04, 89.4MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 13%|▊ | 52.4M/412M [00:00<00:03, 118MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 18%|█ | 73.4M/412M [00:00<00:02, 140MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 23%|█▎ | 94.4M/412M [00:00<00:02, 153MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 28%|█▉ | 115M/412M [00:00<00:01, 164MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 33%|██▎ | 136M/412M [00:00<00:01, 170MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 38%|██▋ | 157M/412M [00:01<00:01, 175MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 43%|███ | 178M/412M [00:01<00:01, 179MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 48%|███▍ | 199M/412M [00:01<00:01, 181MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 53%|███▋ | 220M/412M [00:01<00:01, 183MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 59%|████ | 241M/412M [00:01<00:00, 185MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 64%|████▍ | 262M/412M [00:01<00:00, 185MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 69%|████▊ | 283M/412M [00:01<00:00, 184MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 74%|█████▏ | 304M/412M [00:01<00:00, 183MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 79%|█████▌ | 325M/412M [00:01<00:00, 183MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 84%|█████▉ | 346M/412M [00:02<00:00, 183MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 89%|██████▏| 367M/412M [00:02<00:00, 185MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 94%|██████▌| 388M/412M [00:02<00:00, 185MB/s]\u001b[A\nmodel-00002-of-00002.safetensors: 100%|███████| 412M/412M [00:02<00:00, 168MB/s]\u001b[A\nDownloading shards: 100%|█████████████████████████| 2/2 [00:28<00:00, 14.02s/it]\nLoading checkpoint shards: 100%|██████████████████| 2/2 [00:13<00:00, 6.59s/it]\ngeneration_config.json: 100%|███████████████████| 115/115 [00:00<00:00, 907kB/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 450.69it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 466.39it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 452.50it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 462.79it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 460.12it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 464.69it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 465.54it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 448.37it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 450.29it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 449.50it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 467.08it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 452.20it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 457.49it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 450.66it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 456.55it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 455.60it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 470.52it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 469.05it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 464.26it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 470.10it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 457.51it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 460.94it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 462.57it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 454.53it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 444.63it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 439.60it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 438.05it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 445.80it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 442.90it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 453.28it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 443.37it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 451.84it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 464.87it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 452.34it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 470.57it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 456.12it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 462.12it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 461.02it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 464.92it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 456.53it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 471.23it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 466.37it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 446.73it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 453.86it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 405.30it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 434.84it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 425.20it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 429.82it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 419.33it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 441.01it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 450.38it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 471.50it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 451.18it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 463.28it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 459.06it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 472.32it/s]\n100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 464.28it/s]\nChecking cached requests: 100%|███████████| 2280/2280 [00:00<00:00, 4497.98it/s]\nRunning loglikelihood requests: 0%| | 0/2280 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\nRunning loglikelihood requests: 100%|███████| 2280/2280 [00:46<00:00, 48.72it/s]\nhf (pretrained=allenai/OLMo-1B-0724-hf), gen_kwargs: (None), limit: 10.0, num_fewshot: 0, batch_size: 4\n| Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr|\n|---------------------------------------|------:|------|-----:|------|---|-----:|---|-----:|\n|mmlu | 2|none | |acc |↑ |0.2895|± |0.0188|\n| - humanities | 2|none | |acc |↑ |0.2385|± |0.0371|\n| - formal_logic | 1|none | 0|acc |↑ |0.0000|± |0.0000|\n| - high_school_european_history | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - high_school_us_history | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - high_school_world_history | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - international_law | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - jurisprudence | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - logical_fallacies | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - moral_disputes | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - moral_scenarios | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - philosophy | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - prehistory | 1|none | 0|acc |↑ |0.5000|± |0.1667|\n| - professional_law | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - world_religions | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - other | 2|none | |acc |↑ |0.3000|± |0.0390|\n| - business_ethics | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - clinical_knowledge | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - college_medicine | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - global_facts | 1|none | 0|acc |↑ |0.5000|± |0.1667|\n| - human_aging | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - management | 1|none | 0|acc |↑ |0.0000|± |0.0000|\n| - marketing | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - medical_genetics | 1|none | 0|acc |↑ |0.5000|± |0.1667|\n| - miscellaneous | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - nutrition | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - professional_accounting | 1|none | 0|acc |↑ |0.5000|± |0.1667|\n| - professional_medicine | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - virology | 1|none | 0|acc |↑ |0.0000|± |0.0000|\n| - social sciences | 2|none | |acc |↑ |0.3167|± |0.0423|\n| - econometrics | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - high_school_geography | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - high_school_government_and_politics| 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - high_school_macroeconomics | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - high_school_microeconomics | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - high_school_psychology | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - human_sexuality | 1|none | 0|acc |↑ |0.6000|± |0.1633|\n| - professional_psychology | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - public_relations | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - security_studies | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - sociology | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - us_foreign_policy | 1|none | 0|acc |↑ |0.6000|± |0.1633|\n| - stem | 2|none | |acc |↑ |0.3000|± |0.0331|\n| - abstract_algebra | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - anatomy | 1|none | 0|acc |↑ |0.0000|± |0.0000|\n| - astronomy | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - college_biology | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - college_chemistry | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - college_computer_science | 1|none | 0|acc |↑ |0.4000|± |0.1633|\n| - college_mathematics | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - college_physics | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - computer_security | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - conceptual_physics | 1|none | 0|acc |↑ |0.1000|± |0.1000|\n| - electrical_engineering | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - elementary_mathematics | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - high_school_biology | 1|none | 0|acc |↑ |0.6000|± |0.1633|\n| - high_school_chemistry | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - high_school_computer_science | 1|none | 0|acc |↑ |0.5000|± |0.1667|\n| - high_school_mathematics | 1|none | 0|acc |↑ |0.6000|± |0.1633|\n| - high_school_physics | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n| - high_school_statistics | 1|none | 0|acc |↑ |0.2000|± |0.1333|\n| - machine_learning | 1|none | 0|acc |↑ |0.3000|± |0.1528|\n\n| Groups |Version|Filter|n-shot|Metric| |Value | |Stderr|\n|------------------|------:|------|------|------|---|-----:|---|-----:|\n|mmlu | 2|none | |acc |↑ |0.2895|± |0.0188|\n| - humanities | 2|none | |acc |↑ |0.2385|± |0.0371|\n| - other | 2|none | |acc |↑ |0.3000|± |0.0390|\n| - social sciences| 2|none | |acc |↑ |0.3167|± |0.0423|\n| - stem | 2|none | |acc |↑ |0.3000|± |0.0331|\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Related issues\n\n* https://github.com/EleutherAI/lm-evaluation-harness/issues/2263","metadata":{}}]}