diff --git a/unsloth/chat_templates.py b/unsloth/chat_templates.py
index 7b6da3e449..ee4235c74e 100644
--- a/unsloth/chat_templates.py
+++ b/unsloth/chat_templates.py
@@ -23,7 +23,6 @@
     "apply_chat_template",
 
     "test_construct_chat_template",
-    "create_ollama_modelfile",
 ]
 
 from transformers import StoppingCriteria, StoppingCriteriaList
@@ -1079,14 +1078,29 @@ def construct_chat_template( \
         )
     pass
 
+    # Check tokenizer types
+    tokenizer_name = tokenizer.name_or_path.lower()
+    if tokenizer_name.startswith(("unsloth/llama-3-8b-instruct", "unsloth/llama-3-70b-instruct")):
+        # Add <|eot_id|>
+        extra_eos_tokens.append("<|eot_id|>")
+    elif ("<|eot_id|>" in extra_eos_tokens or "<|eot_id|>" in chat_template) and \
+        tokenizer_name.startswith(("unsloth/llama-3-8b", "unsloth/llama-3-70b")):
+        # Warn
+        logger.warning(
+            "Unsloth: Base llama-3 models did not train <|eot_id|>.\n"\
+            "Please use the instruct version or use <|end_of_text|>"
+        )
+    pass
+    extra_eos_tokens = list(set(extra_eos_tokens))
+
     count_eos = 0
     for eos in extra_eos_tokens:
-        count_eos += len(re.findall(r"{OUTPUT}" + eos.encode("unicode-escape").decode("utf-8"), chat_template))
+        count_eos += len(re.findall(r"{OUTPUT}" + re.escape(eos), chat_template))
     pass
     if count_eos == 0:
         logger.warning("Unsloth: We automatically added an EOS token to stop endless generations.")
         eos = extra_eos_tokens[0]
-        chat_template = re.sub(r"{OUTPUT}", r"{OUTPUT}" + eos.encode("unicode-escape").decode("utf-8"), chat_template)
+        chat_template = re.sub(r"{OUTPUT}", r"{OUTPUT}" + eos, chat_template)
     pass
 
     # O(N^2) search finding 2 repeatted pieces of text
@@ -1151,7 +1165,9 @@ def construct_chat_template( \
     # Check bos_token is in system prompt
     ollama_system = system_part
     has_bos_token = False
+    always_bos_token = False
     if tokenizer("A").input_ids[0] == getattr(tokenizer, "bos_token_id", None):
+        always_bos_token = True
         if ollama_system.startswith(tokenizer.bos_token):
             has_bos_token = True
             ollama_system = ollama_system[len(tokenizer.bos_token):]
@@ -1166,11 +1182,6 @@ def construct_chat_template( \
     input_modelfile  = "{{ if .Prompt }}" + input_part .replace("{INPUT}",  "{{ .Prompt }}") + "{{ end }}"
     output_modelfile = output_part.replace("{OUTPUT}", "{{ .Response }}")
 
-    # Check if EOS token is at the end of the output
-    if not output_modelfile.endswith(tuple(extra_eos_tokens)):
-        output_modelfile += "{__EOS_TOKEN__}"
-    pass
-
     # Ollama EOS
     ollama_eos = get_ollama_eos_tokens(tokenizer, extra_eos_tokens)
     ollama_eos = '\n'.join(f'PARAMETER stop "{eos}"' for eos in ollama_eos)
@@ -1215,10 +1226,7 @@ def process(part, which, content = "message['content']"):
         partial_system = process(system_part, "{SYSTEM}", "messages[0]['content']")
         partial_system = partial_system.replace("{SYSTEM}", "")
 
-        # If {SYSTEM} is non existent, simply just use the content
-        if "{SYSTEM}" not in partial_system:
-            partial_system = "messages[0]['content']"
-        else:
+        if "{SYSTEM}" in partial_system:
             if default_system_message is None:
                 raise RuntimeError("Unsloth: Please specify a default system message!")
         pass
@@ -1226,21 +1234,22 @@ def process(part, which, content = "message['content']"):
         # Separate the BOS
         if has_bos_token:
             partial_system = partial_system.replace(tokenizer.bos_token, "", 1)
+            system_part    = system_part   .replace(tokenizer.bos_token, "", 1)
         pass
-
+        
         partial_system = \
             "{% if messages[0]['role'] == 'system' %}"\
                 "{{ " + partial_system + " }}"\
                 "{% set loop_messages = messages[1:] %}"
         if default_system_message is not None:
             full_system = system_part.replace("{SYSTEM}", default_system_message)
+            if "{SYSTEM}" in system_part:
+                modelfile += '\nSYSTEM: "' + default_system_message + '"'
+            pass
             partial_system += "{% else %}"\
                 "{{ '" + full_system + "' }}"\
                 "{% set loop_messages = messages %}"\
             "{% endif %}"
-
-            # Add to modelfile
-            modelfile += '\nSYSTEM "' + full_system + '"'
         else:
             partial_system += "{% endif %}"
         pass
@@ -1251,6 +1260,22 @@ def process(part, which, content = "message['content']"):
             jinja_template = "{{ bos_token }}" + jinja_template
     pass
 
+    # Check if system part is the same!
+    jinja_template = re.sub(
+        r"\{\% if messages\[0\]\['role'\] \=\= 'system' \%\}\{\{ '(.+?)' \}\}"\
+        r"\{\% set loop\_messages \= messages\[1\:\] \%\}"\
+        r"\{\% else \%\}\{\{ '\1' \}\}\{\% set loop\_messages \= messages \%\}\{\% endif \%\}"\
+        r"\{\% for message in loop\_messages \%\}",
+        r"{{ '\1' }}{% for message in messages %}",
+        jinja_template, flags = re.MULTILINE | re.DOTALL,
+    )
+
+    # Check jinja tempate for bos
+    if always_bos_token:
+        if not jinja_template.startswith("{{ bos_token }}"):
+            jinja_template = "{{ bos_token }}" + jinja_template
+    pass
+    
     return modelfile, jinja_template
 pass
 
@@ -1260,7 +1285,7 @@ def test_construct_chat_template():
     from transformers import AutoTokenizer
     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", token = token)
 
-    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+    chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 
 {SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>
 
@@ -1277,7 +1302,11 @@ def test_construct_chat_template():
       
     extra_eos_tokens = None
 
-    modelfile, jinja_template = construct_chat_template(template, default_system_message, extra_eos_tokens)
+    modelfile, jinja_template = construct_chat_template(
+        tokenizer = tokenizer,
+        chat_template = chat_template,
+        extra_eos_tokens = extra_eos_tokens,
+    )
 
     messages = [
         {"role": "system", "content": "You are an assistant"},
@@ -1291,7 +1320,6 @@ def test_construct_chat_template():
 
     tokenizer.chat_template = jinja_template
     new_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
-
     assert(correct_output == new_output)
     pass
 pass
@@ -1344,43 +1372,6 @@ def formatting_prompts_func(examples):
 pass
 
 
-def create_ollama_modelfile(tokenizer, gguf_location):
-    """
-        Creates an Ollama Modelfile.
-        Use ollama.create(model = "new_ollama_model", modelfile = modelfile)
-    """
-    modelfile = getattr(tokenizer, "_ollama_modelfile", None)
-    if modelfile is None:
-        raise RuntimeError(
-            "Unsloth: Tokenizer does not have a `ollama_modelfile` attribute.\n"\
-            "Please use get_chat_template(...)."
-        )
-    pass
-
-    system_message = getattr(tokenizer, "_system_message", None)
-    if system_message is None:
-        __SYSTEM_MESSAGE__ = ""
-    else:
-        __SYSTEM_MESSAGE__ = f'SYSTEM """{system_message}"""'
-    pass
-
-    modelfile = modelfile\
-        .replace("{{", "⚫@✅#🦥")\
-        .replace("}}", "⚡@🦥#⛵")\
-        .format(
-            __FILE_LOCATION__  = gguf_location,
-            __SYSTEM_MESSAGE__ = __SYSTEM_MESSAGE__,
-            __EOS_TOKEN__      = tokenizer.eos_token,
-        )\
-        .replace("⚫@✅#🦥", "{{")\
-        .replace("⚡@🦥#⛵", "}}")\
-        .rstrip()
-    pass
-
-    return modelfile
-pass
-
-
 def create_stopping_criteria(tokenizer, stop_word = "eos_token"):
     class StoppingCriteriaSub(StoppingCriteria):
         __slots__ = "stop_token", "single_match", "length",
diff --git a/unsloth/models/mapper.py b/unsloth/models/mapper.py
index 5ef7583975..4b40065083 100644
--- a/unsloth/models/mapper.py
+++ b/unsloth/models/mapper.py
@@ -47,9 +47,11 @@
         "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     ),
     "unsloth/mistral-7b-instruct-v0.1-bnb-4bit" : (
+        "unsloth/mistral-7b-instruct-v0.1",
         "mistralai/Mistral-7B-Instruct-v0.1",
     ),
     "unsloth/mistral-7b-instruct-v0.2-bnb-4bit" : (
+        "unsloth/mistral-7b-instruct-v0.2",
         "mistralai/Mistral-7B-Instruct-v0.2",
     ),
     "unsloth/llama-2-7b-chat-bnb-4bit" : (
diff --git a/unsloth/save.py b/unsloth/save.py
index f8f884a9d3..9163c6d38d 100644
--- a/unsloth/save.py
+++ b/unsloth/save.py
@@ -891,10 +891,10 @@ def save_to_gguf(
     # Map quant methods
     new_quantization_method = []
     for quant_method in quantization_method:
-        if   quant_method == "not_quantized":  quantization_method = model_dtype
-        elif quant_method == "fast_quantized": quantization_method = "q8_0"
-        elif quant_method == "quantized":      quantization_method = "q4_k_m"
-        elif quant_method is None:             quantization_method = "q8_0"
+        if   quant_method == "not_quantized":  quant_method = model_dtype
+        elif quant_method == "fast_quantized": quant_method = "q8_0"
+        elif quant_method == "quantized":      quant_method = "q4_k_m"
+        elif quant_method is None:             quant_method = "q8_0"
 
         # Check if wrong method
         if quant_method not in ALLOWED_QUANTS.keys():
@@ -978,6 +978,11 @@ def save_to_gguf(
         pass
     pass
 
+    # If only q8_0:
+    if len(quantization_method) == 1 and quantization_method[0] == "q8_0":
+        strength = 0
+    pass
+
     if   strength >= 3: first_conversion = "f32"
     elif strength >= 2: first_conversion = "f16"
     elif strength >= 1: first_conversion = "bf16"
@@ -1008,7 +1013,7 @@ def save_to_gguf(
     n_cpus *= 2
     # Concurrency from https://rentry.org/llama-cpp-conversions#merging-loras-into-a-model
     
-    final_location = f"./{model_directory}-unsloth.{first_conversion.upper()}.gguf"
+    final_location = f"./{model_directory}/unsloth.{first_conversion.upper()}.gguf"
 
     print(f"Unsloth: [1] Converting model at {model_directory} into {first_conversion} GGUF format.\n"\
           f"The output location will be {final_location}\n"\
@@ -1072,12 +1077,12 @@ def save_to_gguf(
 
     full_precision_location = final_location
 
-    all_saved_locations = []
+    all_saved_locations = [full_precision_location,]
     # Convert each type!
     for quant_method in quantization_method:
         if quant_method != first_conversion:
             print(f"Unsloth: [2] Converting GGUF 16bit into {quant_method}. This will take 20 minutes...")
-            final_location = f"./{model_directory}-unsloth.{quant_method.upper()}.gguf"
+            final_location = f"./{model_directory}/unsloth.{quant_method.upper()}.gguf"
 
             command = f"./{quantize_location} {full_precision_location} "\
                 f"{final_location} {quant_method} {n_cpus}"
@@ -1365,6 +1370,29 @@ def fix_tokenizer_bos_token(tokenizer):
 pass
 
 
+def create_ollama_modelfile(tokenizer, gguf_location):
+    """
+        Creates an Ollama Modelfile.
+        Use ollama.create(model = "new_ollama_model", modelfile = modelfile)
+    """
+    modelfile = getattr(tokenizer, "_ollama_modelfile", None)
+    if modelfile is None: return None
+
+    modelfile = modelfile\
+        .replace("{{", "⚫@✅#🦥")\
+        .replace("}}", "⚡@🦥#⛵")\
+        .format(
+            __FILE_LOCATION__  = gguf_location,
+        )\
+        .replace("⚫@✅#🦥", "{{")\
+        .replace("⚡@🦥#⛵", "}}")\
+        .rstrip()
+    pass
+
+    return modelfile
+pass
+
+
 def unsloth_save_pretrained_gguf(
     self,
     save_directory       : Union[str, os.PathLike],
@@ -1500,10 +1528,21 @@ def unsloth_save_pretrained_gguf(
         new_save_directory, quantization_method, first_conversion, makefile,
     )
 
+    # Save Ollama modelfile
+    modelfile = create_ollama_modelfile(tokenizer, all_file_locations[0])
+    modelfile_location = None
+    if modelfile is not None:
+        modelfile_location = os.path.join(new_save_directory, "Modelfile")
+        with open(modelfile_location, "w") as file:
+            file.write(modelfile)
+        pass
+        print(f"Unsloth: Saved Ollama Modelfile to {modelfile_location}")
+    pass
+
     if fix_bos_token:
         logger.warning(
             f"Unsloth: ##### The current model auto adds a BOS token.\n"\
-            "Unsloth: ##### We removed in GGUF's chat template for you."
+            "Unsloth: ##### We removed it in GGUF's chat template for you."
         )
     pass
 
@@ -1520,6 +1559,15 @@ def unsloth_save_pretrained_gguf(
                 new_save_directory.lstrip('/.')
             print(f"Saved GGUF to https://huggingface.co/{link}")
         pass
+
+        # Save modelfile
+        if modelfile_location is not None:
+            username = upload_to_huggingface(
+                self, save_directory, token,
+                "GGUF converted", "gguf", modelfile_location, old_username, private,
+            )
+            print(f"Saved Ollama Modelfile to https://huggingface.co/{link}")
+        pass
     pass
 pass
 
@@ -1654,6 +1702,17 @@ def unsloth_push_to_hub_gguf(
         new_save_directory, quantization_method, first_conversion, makefile,
     )
 
+    # Save Ollama modelfile
+    modelfile = create_ollama_modelfile(tokenizer, all_file_locations[0])
+    modelfile_location = None
+    if modelfile is not None:
+        modelfile_location = os.path.join(new_save_directory, "Modelfile")
+        with open(modelfile_location, "w") as file:
+            file.write(modelfile)
+        pass
+        print(f"Unsloth: Saved Ollama Modelfile to {modelfile_location}")
+    pass
+
     for file_location in all_file_locations:
         print("Unsloth: Uploading GGUF to Huggingface Hub...")
         username = upload_to_huggingface(
@@ -1667,10 +1726,19 @@ def unsloth_push_to_hub_gguf(
         print(f"Saved GGUF to https://huggingface.co/{link}")
     pass
 
+    # Save modelfile
+    if modelfile_location is not None:
+        username = upload_to_huggingface(
+            self, repo_id, token,
+            "GGUF converted", "gguf", modelfile_location, old_username, private,
+        )
+        print(f"Saved Ollama Modelfile to https://huggingface.co/{link}")
+    pass
+
     if fix_bos_token:
         logger.warning(
             f"Unsloth: ##### The current model auto adds a BOS token.\n"\
-            "Unsloth: ##### We removed in GGUF's chat template for you."
+            "Unsloth: ##### We removed it in GGUF's chat template for you."
         )
     pass
 pass