-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSmall Language Model is What We Need
1 lines (1 loc) · 10.1 KB
/
Small Language Model is What We Need
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30699,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Overview\n\nOpenELM is a open-source model. It is open-source from head to toes which not only the weights, but also the traeining data, hyperparameters and code. This 270M parameter variant is the smallest modern GPT with long context(2048 tokens) that is actually usable.\n\nAccoridng to Apple, it uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model. This concept comes from the DeLighT: Deep and Light-weight Transformer paper.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"!pip install -U -q transformers==4.39.3\n!pip install -U -q accelerate==0.28.0","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:34.439721Z","iopub.execute_input":"2024-06-13T15:23:34.440695Z","iopub.status.idle":"2024-06-13T15:23:59.536776Z","shell.execute_reply.started":"2024-06-13T15:23:34.440630Z","shell.execute_reply":"2024-06-13T15:23:59.535491Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"import os\nimport torch\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\nlogin(token=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\"))\n\nos.environ[\"TOKENIZER\"]=\"meta-llama/Llama-2-7b-hf\"\nos.environ[\"MODEL\"]=\"apple/OpenELM-270M-Instruct\"\n\n# torch.backends.cuda.enable_mem_efficient_sdp(False)\n# torch.backends.cuda.enable_flash_sdp(False)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.538954Z","iopub.execute_input":"2024-06-13T15:23:59.539274Z","iopub.status.idle":"2024-06-13T15:23:59.747142Z","shell.execute_reply.started":"2024-06-13T15:23:59.539242Z","shell.execute_reply":"2024-06-13T15:23:59.745493Z"},"trusted":true},"execution_count":4,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mBackendError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkaggle_secrets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m UserSecretsClient\n\u001b[1;32m 5\u001b[0m user_secrets \u001b[38;5;241m=\u001b[39m UserSecretsClient()\n\u001b[0;32m----> 6\u001b[0m login(token\u001b[38;5;241m=\u001b[39m\u001b[43muser_secrets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_secret\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHUGGINGFACE_TOKEN\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 8\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTOKENIZER\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta-llama/Llama-2-7b-hf\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 9\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMODEL\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapple/OpenELM-270M-Instruct\u001b[39m\u001b[38;5;124m\"\u001b[39m\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/kaggle_secrets.py:64\u001b[0m, in \u001b[0;36mUserSecretsClient.get_secret\u001b[0;34m(self, label)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ValidationError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLabel must be non-empty.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 61\u001b[0m request_body \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLabel\u001b[39m\u001b[38;5;124m'\u001b[39m: label,\n\u001b[1;32m 63\u001b[0m }\n\u001b[0;32m---> 64\u001b[0m response_json \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweb_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmake_post_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET_USER_SECRET_BY_LABEL_ENDPOINT\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msecret\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m response_json:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BackendError(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnexpected response from the service. Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse_json\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/kaggle_web_client.py:49\u001b[0m, in \u001b[0;36mKaggleWebClient.make_post_request\u001b[0;34m(self, data, endpoint, timeout)\u001b[0m\n\u001b[1;32m 47\u001b[0m response_json \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mread())\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response_json\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwasSuccessful\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresult\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m response_json:\n\u001b[0;32m---> 49\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BackendError(\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnexpected response from the service. Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse_json\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response_json[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresult\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (URLError, socket\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;28;01mas\u001b[39;00m e:\n","\u001b[0;31mBackendError\u001b[0m: Unexpected response from the service. Response: {'errors': ['No user secrets exist for kernel id 59892273 and label HUGGINGFACE_TOKEN.'], 'error': {'code': 5, 'details': []}, 'wasSuccessful': False}."],"ename":"BackendError","evalue":"Unexpected response from the service. Response: {'errors': ['No user secrets exist for kernel id 59892273 and label HUGGINGFACE_TOKEN.'], 'error': {'code': 5, 'details': []}, 'wasSuccessful': False}.","output_type":"error"}]},{"cell_type":"code","source":"from transformers import AutoTokenizer, AutoModelForCausalLM\n\ntokenizer = AutoTokenizer.from_pretrained(os.getenv(\"TOKENIZER\"))","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.747883Z","iopub.status.idle":"2024-06-13T15:23:59.748219Z","shell.execute_reply.started":"2024-06-13T15:23:59.748051Z","shell.execute_reply":"2024-06-13T15:23:59.748065Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import AutoModelForCausalLM\n\nmodel = AutoModelForCausalLM.from_pretrained(os.getenv(\"MODEL\"), trust_remote_code=True)\nmodel.eval()\nmodel.to(\"cuda\")\nmodel.device","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.750033Z","iopub.status.idle":"2024-06-13T15:23:59.750529Z","shell.execute_reply.started":"2024-06-13T15:23:59.750260Z","shell.execute_reply":"2024-06-13T15:23:59.750280Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import TextStreamer\n\n# https://huggingface.co/docs/transformers/main/en/internal/generation_utils#transformers.TextStreamer\nstreamer=TextStreamer(tokenizer)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.752102Z","iopub.status.idle":"2024-06-13T15:23:59.752586Z","shell.execute_reply.started":"2024-06-13T15:23:59.752346Z","shell.execute_reply":"2024-06-13T15:23:59.752366Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"prompt=\"The weather in Melbourne is pretty good today\"\n\ntokenized_prompt=tokenizer(prompt, return_tensors='pt').to(\"cuda\")","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.753809Z","iopub.status.idle":"2024-06-13T15:23:59.754307Z","shell.execute_reply.started":"2024-06-13T15:23:59.754042Z","shell.execute_reply":"2024-06-13T15:23:59.754063Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"_=model.generate(**tokenized_prompt, \n streamer=streamer, \n max_new_tokens=200, \n pad_token_id=0,\n repetition_penalty= 1.2,\n do_sample=True,\n temperature=0.1, top_k=50)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.756434Z","iopub.status.idle":"2024-06-13T15:23:59.756872Z","shell.execute_reply.started":"2024-06-13T15:23:59.756645Z","shell.execute_reply":"2024-06-13T15:23:59.756663Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Acknowledge\n\n* https://medium.com/the-ai-explorer/omg-270-millions-small-apple-to-do-what-1db9eb0601f1","metadata":{}},{"cell_type":"markdown","source":"# Reference\n\n* https://github.com/apple/corenet/tree/main/mlx_examples/open_elm","metadata":{}}]}