diff --git a/demos/Colab_Compatibility.ipynb b/demos/Colab_Compatibility.ipynb index f1a567b5f..5d8638a26 100644 --- a/demos/Colab_Compatibility.ipynb +++ b/demos/Colab_Compatibility.ipynb @@ -65,7 +65,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "TransformerLens currently supports 216 models out of the box.\n" + "TransformerLens currently supports 217 models out of the box.\n" ] } ], @@ -289,6 +289,7 @@ " \"Qwen/Qwen2.5-1.5B\",\n", " \"Qwen/Qwen2.5-1.5B-Instruct\",\n", " \"Qwen/Qwen3-0.6B\",\n", + " \"Qwen/Qwen3-0.6B-Base\",\n", " \"Qwen/Qwen3-1.7B\",\n", " \"roneneldan/TinyStories-1Layer-21M\",\n", " \"roneneldan/TinyStories-1M\",\n", diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index 8bfb6315d..3bc901dba 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -236,6 +236,7 @@ "Qwen/Qwen2.5-72B-Instruct", "Qwen/QwQ-32B-Preview", "Qwen/Qwen3-0.6B", + "Qwen/Qwen3-0.6B-Base", "Qwen/Qwen3-1.7B", "Qwen/Qwen3-4B", "Qwen/Qwen3-8B", @@ -692,6 +693,7 @@ "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"], "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"], "Qwen/Qwen3-0.6B": ["qwen3-0.6b"], + "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"], "Qwen/Qwen3-1.7B": ["qwen3-1.7b"], "Qwen/Qwen3-4B": ["qwen3-4b"], "Qwen/Qwen3-8B": ["qwen3-8b"],