Skip to content

Commit

Permalink
python notebook for easy testing of new LLMs & design prompts
Browse files Browse the repository at this point in the history
  • Loading branch information
icppWorld committed Jan 22, 2025
1 parent 9993bb6 commit 37d333b
Show file tree
Hide file tree
Showing 2 changed files with 246 additions and 0 deletions.
244 changes: 244 additions & 0 deletions scripts/prompt-design.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prompt design for llama.cpp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Verify we're in the Conda environment"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/opt/miniconda3/envs/llama_cpp_canister/bin/python\n"
]
}
],
"source": [
"import sys\n",
"\n",
"print(sys.executable)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import python packages"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import json\n",
"import base64\n",
"import io\n",
"from dotenv import load_dotenv\n",
"import requests\n",
"import pprint\n",
"from pathlib import Path\n",
"import subprocess\n",
"import jupyter_black\n",
"import textwrap\n",
"\n",
"# Activate the jupyter_black extension, which reformats code cells with black\n",
"# https://github.com/n8henrie/jupyter-black\n",
"jupyter_black.load()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Command:\n",
" ../../ggerganov_llama_latest.cpp/build/bin/llama-cli -m ../models/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf --no-display-prompt -n 1024 --seed 42 --temp 0.7 -p '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\ngive me a short introduction to LLMs.<|im_end|>\\n<|im_start|>assistant\\n'\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 91\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;66;03m# top_k = 50\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;66;03m# top_p = 0.95\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# min_p = 0.05\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[38;5;66;03m# mirostat_lr = 0.1\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;66;03m# mirostat_ent = 5.0\u001b[39;00m\n\u001b[1;32m 90\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m<|im_start|>system\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mYou are a helpful assistant.<|im_end|>\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m<|im_start|>user\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mgive me a short introduction to LLMs.<|im_end|>\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m<|im_start|>assistant\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 91\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrun_llama_cpp\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 93\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 94\u001b[0m \u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mtemp\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# top_k,\u001b[39;49;00m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# top_p,\u001b[39;49;00m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# min_p,\u001b[39;49;00m\n\u001b[1;32m 99\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# tfs,\u001b[39;49;00m\n\u001b[1;32m 100\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# typical,\u001b[39;49;00m\n\u001b[1;32m 101\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mirostat,\u001b[39;49;00m\n\u001b[1;32m 102\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mirostat_lr,\u001b[39;49;00m\n\u001b[1;32m 103\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mirostat_ent,\u001b[39;49;00m\n\u001b[1;32m 104\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtextwrap\u001b[39;00m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mprompt:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, textwrap\u001b[38;5;241m.\u001b[39mfill(prompt, width\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m80\u001b[39m))\n",
"Cell \u001b[0;32mIn[4], line 71\u001b[0m, in \u001b[0;36mrun_llama_cpp\u001b[0;34m(prompt, num_tokens, seed, temp)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mCommand:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mLLAMA_CLI_PATH\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m -m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mMODEL\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m --no-display-prompt -n \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_tokens\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m --seed \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mseed\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m --temp \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtemp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m -p \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprompt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mn\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 67\u001b[0m ),\n\u001b[1;32m 68\u001b[0m )\n\u001b[1;32m 70\u001b[0m \u001b[38;5;66;03m# Run the command and capture the output\u001b[39;00m\n\u001b[0;32m---> 71\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 72\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommand\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstdout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPIPE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstderr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPIPE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 73\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 74\u001b[0m output \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mstdout\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n",
"File \u001b[0;32m/opt/miniconda3/envs/llama_cpp_canister/lib/python3.11/subprocess.py:550\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 548\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Popen(\u001b[38;5;241m*\u001b[39mpopenargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;28;01mas\u001b[39;00m process:\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 550\u001b[0m stdout, stderr \u001b[38;5;241m=\u001b[39m \u001b[43mprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcommunicate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 551\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m TimeoutExpired \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 552\u001b[0m process\u001b[38;5;241m.\u001b[39mkill()\n",
"File \u001b[0;32m/opt/miniconda3/envs/llama_cpp_canister/lib/python3.11/subprocess.py:1209\u001b[0m, in \u001b[0;36mPopen.communicate\u001b[0;34m(self, input, timeout)\u001b[0m\n\u001b[1;32m 1206\u001b[0m endtime \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1209\u001b[0m stdout, stderr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_communicate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendtime\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1210\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;66;03m# https://bugs.python.org/issue25942\u001b[39;00m\n\u001b[1;32m 1212\u001b[0m \u001b[38;5;66;03m# See the detailed comment in .wait().\u001b[39;00m\n\u001b[1;32m 1213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m/opt/miniconda3/envs/llama_cpp_canister/lib/python3.11/subprocess.py:2115\u001b[0m, in \u001b[0;36mPopen._communicate\u001b[0;34m(self, input, endtime, orig_timeout)\u001b[0m\n\u001b[1;32m 2108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_timeout(endtime, orig_timeout,\n\u001b[1;32m 2109\u001b[0m stdout, stderr,\n\u001b[1;32m 2110\u001b[0m skip_check_and_raise\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 2111\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m( \u001b[38;5;66;03m# Impossible :)\u001b[39;00m\n\u001b[1;32m 2112\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_check_timeout(..., skip_check_and_raise=True) \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 2113\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfailed to raise TimeoutExpired.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 2115\u001b[0m ready \u001b[38;5;241m=\u001b[39m \u001b[43mselector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mselect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2116\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_timeout(endtime, orig_timeout, stdout, stderr)\n\u001b[1;32m 2118\u001b[0m \u001b[38;5;66;03m# XXX Rewrite these to use non-blocking I/O on the file\u001b[39;00m\n\u001b[1;32m 2119\u001b[0m \u001b[38;5;66;03m# objects; they are no longer using C stdio!\u001b[39;00m\n",
"File \u001b[0;32m/opt/miniconda3/envs/llama_cpp_canister/lib/python3.11/selectors.py:415\u001b[0m, in \u001b[0;36m_PollLikeSelector.select\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 413\u001b[0m ready \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 415\u001b[0m fd_event_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_selector\u001b[38;5;241m.\u001b[39mpoll(timeout)\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mInterruptedError\u001b[39;00m:\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ready\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# Define where the llama-cli is located, relative to this notebook\n",
"LLAMA_CLI_PATH = \"../../ggerganov_llama_b841d0.cpp/llama-cli\" # Current llama_cpp_canister version\n",
"# LLAMA_CLI_PATH = \"../../ggerganov_llama_latest.cpp/build/bin/llama-cli\"\n",
"\n",
"# Select a model to use\n",
"MODEL = \"../models/Qwen/Qwen2.5-0.5B-Instruct-GGUF/qwen2.5-0.5b-instruct-q8_0.gguf\"\n",
"# MODEL = \"../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q8_0.gguf\"\n",
"# MODEL = (\n",
"# \"../models/tensorblock/SmolLM2-135M-Instruct-GGUF/SmolLM2-135M-Instruct-Q4_K_M.gguf\"\n",
"# )\n",
"# MODEL = \"../models/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf\"\n",
"# MODEL = \"../models/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q2_K.gguf\"\n",
"\n",
"def run_llama_cpp(\n",
" prompt,\n",
" num_tokens,\n",
" seed,\n",
" temp,\n",
" # top_k,\n",
" # top_p,\n",
" # min_p,\n",
" # tfs,\n",
" # typical,\n",
" # mirostat,\n",
" # mirostat_lr,\n",
" # mirostat_ent,\n",
"):\n",
"\n",
" command = [\n",
" LLAMA_CLI_PATH,\n",
" \"-m\",\n",
" MODEL,\n",
" # \"--simple-io\",\n",
" \"--no-display-prompt\", # only return the generated text, without special characters\n",
" # \"-sp\", # output special tokens\n",
" \"-n\",\n",
" f\"{num_tokens}\",\n",
" \"--seed\",\n",
" f\"{seed}\",\n",
" \"--temp\",\n",
" f\"{temp}\",\n",
" # \"--top-k\",\n",
" # f\"{top_k}\",\n",
" # \"--top-p\",\n",
" # f\"{top_p}\",\n",
" # \"--min-p\",\n",
" # f\"{min_p}\",\n",
" # \"--tfs\",\n",
" # f\"{tfs}\",\n",
" # \"--typical\",\n",
" # f\"{typical}\",\n",
" # \"--mirostat\",\n",
" # f\"{mirostat}\",\n",
" # \"--mirostat-lr\",\n",
" # f\"{mirostat_lr}\",\n",
" # \"--mirostat-ent\",\n",
" # f\"{mirostat_ent}\",\n",
" \"-p\",\n",
" prompt,\n",
" ]\n",
"\n",
" # Print the command on a single line for terminal use, preserving \\n\n",
" print(\n",
" \"\\nCommand:\\n\",\n",
" f\"{LLAMA_CLI_PATH} -m {MODEL} --no-display-prompt -n {num_tokens} --seed {seed} --temp {temp} -p '{prompt}'\".replace(\n",
" \"\\n\", \"\\\\n\"\n",
" ),\n",
" )\n",
"\n",
" # Run the command and capture the output\n",
" result = subprocess.run(\n",
" command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True\n",
" )\n",
" output = result.stdout\n",
" return output\n",
"\n",
"\n",
"seed = 42\n",
"num_tokens = 1024\n",
"temp = 0.7\n",
"# top_k = 50\n",
"# top_p = 0.95\n",
"# min_p = 0.05\n",
"# tfs = 0.9\n",
"# typical = 0.9\n",
"# mirostat = 2\n",
"# mirostat_lr = 0.1\n",
"# mirostat_ent = 5.0\n",
"\n",
"prompt = f\"<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\ngive me a short introduction to LLMs.<|im_end|>\\n<|im_start|>assistant\\n\"\n",
"response = run_llama_cpp(\n",
" prompt,\n",
" num_tokens,\n",
" seed,\n",
" temp,\n",
" # top_k,\n",
" # top_p,\n",
" # min_p,\n",
" # tfs,\n",
" # typical,\n",
" # mirostat,\n",
" # mirostat_lr,\n",
" # mirostat_ent,\n",
")\n",
"\n",
"import textwrap\n",
"\n",
"print(\"\\nprompt:\\n\", textwrap.fill(prompt, width=80))\n",
"print(\"\\nresponse:\\n\", textwrap.fill(response, width=80))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "llama_cpp_canister",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ipykernel
jupyter_black
requests
python-dotenv
black
Expand Down

0 comments on commit 37d333b

Please sign in to comment.