diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 331317a16..ba009624a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -35,15 +35,13 @@ jobs: - uses: actions/checkout@v3 - name: Install Poetry uses: snok/install-poetry@v1 - with: - version: 1.4.0 - - name: Poetry config - run: poetry self add 'poethepoet[poetry_plugin]' - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.11' cache: 'poetry' + - name: Poetry config + run: poetry self add 'poethepoet[poetry_plugin]' - name: Install dependencies run: poetry install --with dev - name: Set the version diff --git a/demos/Colab_Compatibility.ipynb b/demos/Colab_Compatibility.ipynb index f81cb2be3..11ad6dc5c 100644 --- a/demos/Colab_Compatibility.ipynb +++ b/demos/Colab_Compatibility.ipynb @@ -16,9 +16,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_57027/2944939757.py:18: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", + "/var/folders/pr/77j77_bs2gl2stxyrvr14x3c0000gn/T/ipykernel_60991/3507779555.py:18: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", " ipython.magic(\"load_ext autoreload\")\n", - "/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_57027/2944939757.py:19: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", + "/var/folders/pr/77j77_bs2gl2stxyrvr14x3c0000gn/T/ipykernel_60991/3507779555.py:19: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", " ipython.magic(\"autoreload 2\")\n" ] } @@ -43,7 +43,7 @@ " # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n", " ipython.magic(\"load_ext autoreload\")\n", " ipython.magic(\"autoreload 2\")\n", - " \n", + "\n", "\n", "\n", "if IN_COLAB or IN_GITHUB:\n", @@ -58,14 +58,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "TransformerLens currently supports 207 models out of the box.\n" + "TransformerLens currently supports 208 models out of the box.\n" ] } ], @@ -96,7 +96,7 @@ "def mark_models_as_tested(model_set: List[str]) -> None:\n", " for model in model_set:\n", " untested_models.remove(model)\n", - " \n", + "\n", "\n", "def run_set(model_set: List[str], device=\"cuda\") -> None:\n", " for model in model_set:\n", @@ -112,12 +112,12 @@ "def run_llama_set(model_set: List[str], weight_root: str, device=\"cuda\") -> None:\n", " for model in model_set:\n", " print(\"Testing \" + model)\n", - " # to run this, make sure weight root is the root that contains all models with the \n", + " # to run this, make sure weight root is the root that contains all models with the\n", " # sub directories sharing the same name as the model in the list of models\n", " tokenizer = LlamaTokenizer.from_pretrained(weight_root + model)\n", " hf_model = LlamaForCausalLM.from_pretrained(weight_root + model, low_cpu_mem_usage=True)\n", " tl_model = HookedTransformer.from_pretrained_no_processing(\n", - " model, \n", + " model,\n", " hf_model=hf_model,\n", " device=device,\n", " fold_ln=False,\n", @@ -309,7 +309,7 @@ "\n", "if IN_COLAB:\n", " run_set(free_compatible)\n", - " \n", + "\n", "mark_models_as_tested(free_compatible)" ] }, @@ -357,6 +357,7 @@ " \"mistralai/Mistral-7B-Instruct-v0.1\",\n", " \"mistralai/Mistral-7B-v0.1\",\n", " \"mistralai/Mistral-Nemo-Base-2407\",\n", + " \"mistralai/Mistral-Small-24B-Base-2501\",\n", " \"Qwen/Qwen-7B\",\n", " \"Qwen/Qwen-7B-Chat\",\n", " \"Qwen/Qwen1.5-4B\",\n", @@ -377,7 +378,7 @@ "\n", "if IN_COLAB:\n", " run_set(paid_gpu_models)\n", - " \n", + "\n", "mark_models_as_tested(paid_gpu_models)" ] }, @@ -410,7 +411,7 @@ "\n", "if IN_COLAB:\n", " run_set(paid_cpu_models, \"cpu\")\n", - " \n", + "\n", "mark_models_as_tested(paid_cpu_models)" ] }, @@ -528,7 +529,7 @@ "# Any models listed in the cell below have not been tested. This should always remain blank. If your\n", "# PR fails due to this notebook, most likely you need to check any new model changes to ensure that\n", "# this notebook is up to date.\n", - "print(*untested_models, sep = '\\n')" + "print(*untested_models, sep=\"\\n\")" ] } ], @@ -548,7 +549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index 129ef5eb3..b8a4a540b 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -182,6 +182,7 @@ "stabilityai/stablelm-tuned-alpha-7b", "mistralai/Mistral-7B-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-Small-24B-Base-2501", "mistralai/Mistral-Nemo-Base-2407", "mistralai/Mixtral-8x7B-v0.1", "mistralai/Mixtral-8x7B-Instruct-v0.1", @@ -979,7 +980,7 @@ def convert_hf_model_config(model_name: str, **kwargs): "normalization_type": "RMS", "positional_embedding_type": "rotary", "rotary_adjacent_pairs": False, - "rotary_dim": 32, + "rotary_dim": 128, "final_rms": True, "gated_mlp": True, "rotary_base": 500000.0,