Evaluate model smollm-360M-instruct-v0.2-Q8_0-GGU

{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[],"dockerImageVersionId":30761,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Overview\n\nEvaluate model HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF by using https://github.com/EleutherAI/lm-evaluation-harness\n","metadata":{}},{"cell_type":"markdown","source":"This note is not completed. I'm still looking for a way to run benchmark of  gguf model ","metadata":{}},{"cell_type":"code","source":"# !pip install -U -q lm-eval==0.4.3","metadata":{"execution":{"iopub.status.busy":"2024-09-04T16:22:50.462265Z","iopub.execute_input":"2024-09-04T16:22:50.463469Z","iopub.status.idle":"2024-09-04T16:22:50.477606Z","shell.execute_reply.started":"2024-09-04T16:22:50.463410Z","shell.execute_reply":"2024-09-04T16:22:50.476246Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"!git clone https://github.com/EleutherAI/lm-evaluation-harness\n%cd lm-evaluation-harness\n!pip install -e .","metadata":{"execution":{"iopub.status.busy":"2024-09-04T16:22:50.478986Z","iopub.execute_input":"2024-09-04T16:22:50.479400Z","iopub.status.idle":"2024-09-04T16:23:51.981985Z","shell.execute_reply.started":"2024-09-04T16:22:50.479360Z","shell.execute_reply":"2024-09-04T16:23:51.980453Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import os\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\n\nos.environ[\"HF_TOKEN\"]=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\")\n\nos.environ[\"WANDB_API_KEY\"]=user_secrets.get_secret(\"WANDB_API_KEY\")\nos.environ[\"WANDB_PROJECT\"] = \"Evaluating HuggingFace SmolLM-135M-Instruct\"\nos.environ[\"WANDB_NAME\"] = \"eva-smollm-135M-instruct\"\nos.environ[\"MODEL_NAME\"] = \"smollm-360M-instruct-v0.2-Q8_0-GGUF\"\n\n\nlogin(os.environ[\"HF_TOKEN\"])","metadata":{"execution":{"iopub.status.busy":"2024-09-04T16:23:51.985646Z","iopub.execute_input":"2024-09-04T16:23:51.986230Z","iopub.status.idle":"2024-09-04T16:23:52.811485Z","shell.execute_reply.started":"2024-09-04T16:23:51.986155Z","shell.execute_reply":"2024-09-04T16:23:52.810245Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# !lm_eval --tasks list","metadata":{"execution":{"iopub.status.busy":"2024-09-04T16:23:52.813211Z","iopub.execute_input":"2024-09-04T16:23:52.813640Z","iopub.status.idle":"2024-09-04T16:23:52.818757Z","shell.execute_reply.started":"2024-09-04T16:23:52.813598Z","shell.execute_reply":"2024-09-04T16:23:52.817497Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"%cd\n!pip install cmake\n!git clone https://github.com/ggerganov/llama.cpp.git\n%cd llama.cpp/\n!cmake -B build\n!cmake --build build --config Release\n%cd build/bin/\n!wget https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf\n!./llama-server -m smollm-360m-instruct-add-basics-q8_0.gguf --port 8080","metadata":{"execution":{"iopub.status.busy":"2024-09-04T16:23:52.820081Z","iopub.execute_input":"2024-09-04T16:23:52.820490Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"!lm_eval --model hf \\\n    --model_args model=${MODEL_NAME},base_url=http://127.0.0.1:8000/v1/chat/completions  \\\n    --tasks mmlu \\\n    --device cuda:0 \\\n    --num_fewshot 0 \\\n    --batch_size 4 \\\n    --model gguf \\\n    --output_path results \\\n    --use_cache True\\\n    --log_samples \\\n    --limit 10 \n    #--hf_hub_log_args hub_results_org=micost,hub_repo_name=eval-smolLM-135M,push_results_to_hub=True,push_samples_to_hub=True,public_repo=False","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}