From 2ba09765eb86ce6de2f802fce962a2ec7a63dfad Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 3 Apr 2026 20:30:47 +0000 Subject: [PATCH] Add Gemma 4 model sampling defaults Add per-model YAML configs and MODEL_NAME_MAPPING entries for all 8 Gemma 4 models (4 instruct + 4 base): - gemma-4-31B-it / gemma-4-31B - gemma-4-26B-A4B-it / gemma-4-26B-A4B - gemma-4-E2B-it / gemma-4-E2B - gemma-4-E4B-it / gemma-4-E4B GGUF variants (only for -it models) resolve via the gemma-4 family entry in inference_defaults.json. Sampling defaults: temperature=1.0, top_p=0.95, top_k=64, min_p=0.0, no repetition or presence penalty. Matches gemma-3n and gemma-3. --- .../assets/configs/inference_defaults.json | 10 +++- .../gemma/unsloth_gemma-4-26B-A4B-it.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-26B-A4B.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-31B-it.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-31B.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-E2B-it.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-E2B.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-E4B-it.yaml | 47 +++++++++++++++++++ .../gemma/unsloth_gemma-4-E4B.yaml | 47 +++++++++++++++++++ studio/backend/utils/models/model_config.py | 32 +++++++++++++ 10 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B-it.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B-it.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B-it.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B-it.yaml create mode 100644 studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B.yaml diff --git a/studio/backend/assets/configs/inference_defaults.json b/studio/backend/assets/configs/inference_defaults.json index f520041add..1b4b5381e8 100644 --- a/studio/backend/assets/configs/inference_defaults.json +++ b/studio/backend/assets/configs/inference_defaults.json @@ -93,6 +93,14 @@ "min_p": 0.0, "repetition_penalty": 1.0 }, + "gemma-4": { + "temperature": 1.0, + "top_p": 0.95, + "top_k": 64, + "min_p": 0.0, + "repetition_penalty": 1.0, + "presence_penalty": 0.0 + }, "gemma-3n": { "temperature": 1.0, "top_p": 0.95, @@ -366,7 +374,7 @@ "qwen2.5-coder", "qwen2.5-vl", "qwen2.5-omni", "qwen2.5-math", "qwen2.5", "qwen2-vl", "qwen2", "qwq", - "gemma-3n", "gemma-3", "medgemma", "gemma-2", + "gemma-4", "gemma-3n", "gemma-3", "medgemma", "gemma-2", "llama-4", "llama-3.3", "llama-3.2", "llama-3.1", "llama-3", "phi-4", "phi-3", "mistral-nemo", "mistral-small", "mistral-large", "magistral", "ministral", diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B-it.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B-it.yaml new file mode 100644 index 0000000000..c80506d9f5 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B-it.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-26B-A4B-it +# Also applies to: google/gemma-4-26B-A4B-it, unsloth/gemma-4-26B-A4B-it-GGUF + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B.yaml new file mode 100644 index 0000000000..9e579be503 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-26B-A4B.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-26B-A4B (base/pretrained) +# Also applies to: google/gemma-4-26B-A4B + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B-it.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B-it.yaml new file mode 100644 index 0000000000..cec4ea95e1 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B-it.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-31B-it +# Also applies to: google/gemma-4-31B-it, unsloth/gemma-4-31B-it-GGUF + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B.yaml new file mode 100644 index 0000000000..717cdd5e63 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-31B.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-31B (base/pretrained) +# Also applies to: google/gemma-4-31B + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B-it.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B-it.yaml new file mode 100644 index 0000000000..43e3d78a23 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B-it.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-E2B-it +# Also applies to: google/gemma-4-E2B-it, unsloth/gemma-4-E2B-it-GGUF + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B.yaml new file mode 100644 index 0000000000..bd86cef751 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E2B.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-E2B (base/pretrained) +# Also applies to: google/gemma-4-E2B + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B-it.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B-it.yaml new file mode 100644 index 0000000000..a8ef51836b --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B-it.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-E4B-it +# Also applies to: google/gemma-4-E4B-it, unsloth/gemma-4-E4B-it-GGUF + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B.yaml b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B.yaml new file mode 100644 index 0000000000..740cc99df5 --- /dev/null +++ b/studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-4-E4B.yaml @@ -0,0 +1,47 @@ +# Model defaults for unsloth/gemma-4-E4B (base/pretrained) +# Also applies to: google/gemma-4-E4B + +training: + trust_remote_code: false + max_seq_length: 2048 + num_epochs: 0 + learning_rate: 2e-4 + batch_size: 2 + gradient_accumulation_steps: 4 + warmup_steps: 5 + max_steps: 30 + save_steps: 30 + weight_decay: 0.001 + random_seed: 3407 + packing: false + train_on_completions: true + gradient_checkpointing: "unsloth" + optim: "adamw_8bit" + lr_scheduler_type: "linear" + +lora: + lora_r: 8 + lora_alpha: 8 + lora_dropout: 0.0 + target_modules: + - "all-linear" + use_rslora: false + use_loftq: false + finetune_vision_layers: true + finetune_language_layers: true + finetune_attention_modules: true + finetune_mlp_modules: true + +logging: + enable_wandb: false + wandb_project: "llm-finetuning" + enable_tensorboard: false + tensorboard_dir: "runs" + log_frequency: 10 + +inference: + trust_remote_code: false + temperature: 1.0 + top_p: 0.95 + top_k: 64 + min_p: 0.0 diff --git a/studio/backend/utils/models/model_config.py b/studio/backend/utils/models/model_config.py index 8c25da4f43..f97ea993eb 100644 --- a/studio/backend/utils/models/model_config.py +++ b/studio/backend/utils/models/model_config.py @@ -159,6 +159,38 @@ def extract_model_size_b(model_id: str) -> float | None: "unsloth/gemma-3n-E4B-unsloth-bnb-4bit", "google/gemma-3n-E4B", ], + "unsloth_gemma-4-31B-it.yaml": [ + "unsloth/gemma-4-31B-it", + "google/gemma-4-31B-it", + ], + "unsloth_gemma-4-26B-A4B-it.yaml": [ + "unsloth/gemma-4-26B-A4B-it", + "google/gemma-4-26B-A4B-it", + ], + "unsloth_gemma-4-E2B-it.yaml": [ + "unsloth/gemma-4-E2B-it", + "google/gemma-4-E2B-it", + ], + "unsloth_gemma-4-E4B-it.yaml": [ + "unsloth/gemma-4-E4B-it", + "google/gemma-4-E4B-it", + ], + "unsloth_gemma-4-31B.yaml": [ + "unsloth/gemma-4-31B", + "google/gemma-4-31B", + ], + "unsloth_gemma-4-26B-A4B.yaml": [ + "unsloth/gemma-4-26B-A4B", + "google/gemma-4-26B-A4B", + ], + "unsloth_gemma-4-E2B.yaml": [ + "unsloth/gemma-4-E2B", + "google/gemma-4-E2B", + ], + "unsloth_gemma-4-E4B.yaml": [ + "unsloth/gemma-4-E4B", + "google/gemma-4-E4B", + ], "unsloth_gpt-oss-20b.yaml": [ "openai/gpt-oss-20b", "unsloth/gpt-oss-20b-unsloth-bnb-4bit",