Small Language Model is What We Need

{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30699,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Overview\n\nOpenELM is a open-source model. It is open-source from head to toes which not only the weights, but also the traeining data, hyperparameters and code. This 270M parameter variant is the smallest modern GPT with long context(2048 tokens) that is actually usable.\n\nAccoridng to Apple, it uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model. This concept comes from the DeLighT: Deep and Light-weight Transformer paper.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19"}},{"cell_type":"code","source":"!pip install -U -q transformers==4.39.3\n!pip install -U -q accelerate==0.28.0","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:34.439721Z","iopub.execute_input":"2024-06-13T15:23:34.440695Z","iopub.status.idle":"2024-06-13T15:23:59.536776Z","shell.execute_reply.started":"2024-06-13T15:23:34.440630Z","shell.execute_reply":"2024-06-13T15:23:59.535491Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"import os\nimport torch\nfrom huggingface_hub import login\nfrom kaggle_secrets import UserSecretsClient\nuser_secrets = UserSecretsClient()\nlogin(token=user_secrets.get_secret(\"HUGGINGFACE_TOKEN\"))\n\nos.environ[\"TOKENIZER\"]=\"meta-llama/Llama-2-7b-hf\"\nos.environ[\"MODEL\"]=\"apple/OpenELM-270M-Instruct\"\n\n# torch.backends.cuda.enable_mem_efficient_sdp(False)\n# torch.backends.cuda.enable_flash_sdp(False)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.538954Z","iopub.execute_input":"2024-06-13T15:23:59.539274Z","iopub.status.idle":"2024-06-13T15:23:59.747142Z","shell.execute_reply.started":"2024-06-13T15:23:59.539242Z","shell.execute_reply":"2024-06-13T15:23:59.745493Z"},"trusted":true},"execution_count":4,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mBackendError\u001b[0m                              Traceback (most recent call last)","Cell \u001b[0;32mIn[4], line 6\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mkaggle_secrets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m UserSecretsClient\n\u001b[1;32m      5\u001b[0m user_secrets \u001b[38;5;241m=\u001b[39m UserSecretsClient()\n\u001b[0;32m----> 6\u001b[0m login(token\u001b[38;5;241m=\u001b[39m\u001b[43muser_secrets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_secret\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHUGGINGFACE_TOKEN\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m      8\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTOKENIZER\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta-llama/Llama-2-7b-hf\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      9\u001b[0m os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMODEL\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapple/OpenELM-270M-Instruct\u001b[39m\u001b[38;5;124m\"\u001b[39m\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/kaggle_secrets.py:64\u001b[0m, in \u001b[0;36mUserSecretsClient.get_secret\u001b[0;34m(self, label)\u001b[0m\n\u001b[1;32m     60\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m ValidationError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLabel must be non-empty.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     61\u001b[0m request_body \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     62\u001b[0m     \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLabel\u001b[39m\u001b[38;5;124m'\u001b[39m: label,\n\u001b[1;32m     63\u001b[0m }\n\u001b[0;32m---> 64\u001b[0m response_json \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweb_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmake_post_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET_USER_SECRET_BY_LABEL_ENDPOINT\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msecret\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m response_json:\n\u001b[1;32m     66\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m BackendError(\n\u001b[1;32m     67\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnexpected response from the service. Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse_json\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n","File \u001b[0;32m~/.local/lib/python3.10/site-packages/kaggle_web_client.py:49\u001b[0m, in \u001b[0;36mKaggleWebClient.make_post_request\u001b[0;34m(self, data, endpoint, timeout)\u001b[0m\n\u001b[1;32m     47\u001b[0m         response_json \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mread())\n\u001b[1;32m     48\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response_json\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwasSuccessful\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresult\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m response_json:\n\u001b[0;32m---> 49\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m BackendError(\n\u001b[1;32m     50\u001b[0m                 \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnexpected response from the service. Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse_json\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     51\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m response_json[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresult\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (URLError, socket\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;28;01mas\u001b[39;00m e:\n","\u001b[0;31mBackendError\u001b[0m: Unexpected response from the service. Response: {'errors': ['No user secrets exist for kernel id 59892273 and label HUGGINGFACE_TOKEN.'], 'error': {'code': 5, 'details': []}, 'wasSuccessful': False}."],"ename":"BackendError","evalue":"Unexpected response from the service. Response: {'errors': ['No user secrets exist for kernel id 59892273 and label HUGGINGFACE_TOKEN.'], 'error': {'code': 5, 'details': []}, 'wasSuccessful': False}.","output_type":"error"}]},{"cell_type":"code","source":"from transformers import AutoTokenizer, AutoModelForCausalLM\n\ntokenizer = AutoTokenizer.from_pretrained(os.getenv(\"TOKENIZER\"))","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.747883Z","iopub.status.idle":"2024-06-13T15:23:59.748219Z","shell.execute_reply.started":"2024-06-13T15:23:59.748051Z","shell.execute_reply":"2024-06-13T15:23:59.748065Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import AutoModelForCausalLM\n\nmodel = AutoModelForCausalLM.from_pretrained(os.getenv(\"MODEL\"), trust_remote_code=True)\nmodel.eval()\nmodel.to(\"cuda\")\nmodel.device","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.750033Z","iopub.status.idle":"2024-06-13T15:23:59.750529Z","shell.execute_reply.started":"2024-06-13T15:23:59.750260Z","shell.execute_reply":"2024-06-13T15:23:59.750280Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import TextStreamer\n\n# https://huggingface.co/docs/transformers/main/en/internal/generation_utils#transformers.TextStreamer\nstreamer=TextStreamer(tokenizer)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.752102Z","iopub.status.idle":"2024-06-13T15:23:59.752586Z","shell.execute_reply.started":"2024-06-13T15:23:59.752346Z","shell.execute_reply":"2024-06-13T15:23:59.752366Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"prompt=\"The weather in Melbourne is pretty good today\"\n\ntokenized_prompt=tokenizer(prompt, return_tensors='pt').to(\"cuda\")","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.753809Z","iopub.status.idle":"2024-06-13T15:23:59.754307Z","shell.execute_reply.started":"2024-06-13T15:23:59.754042Z","shell.execute_reply":"2024-06-13T15:23:59.754063Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"_=model.generate(**tokenized_prompt, \n               streamer=streamer, \n               max_new_tokens=200, \n               pad_token_id=0,\n               repetition_penalty= 1.2,\n               do_sample=True,\n               temperature=0.1, top_k=50)","metadata":{"execution":{"iopub.status.busy":"2024-06-13T15:23:59.756434Z","iopub.status.idle":"2024-06-13T15:23:59.756872Z","shell.execute_reply.started":"2024-06-13T15:23:59.756645Z","shell.execute_reply":"2024-06-13T15:23:59.756663Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Acknowledge\n\n* https://medium.com/the-ai-explorer/omg-270-millions-small-apple-to-do-what-1db9eb0601f1","metadata":{}},{"cell_type":"markdown","source":"# Reference\n\n* https://github.com/apple/corenet/tree/main/mlx_examples/open_elm","metadata":{}}]}