From 55e3cc8deb857ed6720194bb8da81b03081b1f27 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Fri, 13 Dec 2024 16:15:32 -0500
Subject: [PATCH 1/9] Improved code legibility

---
 spectrum.py | 100 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 27 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index 38ae27c..14ab2dd 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -1,15 +1,18 @@
 # spectrum.py
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
-import numpy as np
-import json
-from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog
 import argparse
-from tqdm import tqdm
+import json
 import os
 import time
 
+import numpy as np
+import torch
+from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog
+from tqdm import tqdm
+from transformers import AutoConfig, AutoModelForCausalLM
+
+
 class ModelModifier:
+
     def __init__(self, model_name=None, top_percent=50, batch_size=1):
         self.model_name = model_name
         self.top_percent = top_percent
@@ -18,15 +21,16 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
         if model_name:
             try:
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name, 
-                    torch_dtype=torch.float32, 
-                    low_cpu_mem_usage=True, 
-                    trust_remote_code=True, 
+                    model_name,
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True,
+                    trust_remote_code=True,
                     device_map="auto"
                 )
             except KeyError as e:
                 print(f"Error loading model: {e}")
                 print("Attempting to load with custom configuration...")
+
                 config = AutoConfig.from_pretrained(model_name)
                 config.rope_scaling = {"type": "linear", "factor": 1.0}
                 self.model = AutoModelForCausalLM.from_pretrained(
@@ -37,7 +41,7 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
                     trust_remote_code=True,
                     device_map="auto"
                 )
-            
+
             # Check if the model config has rope_scaling
             if not hasattr(self.model.config, 'rope_scaling'):
                 self.model.config.rope_scaling = {'type': 'linear'}
@@ -48,40 +52,51 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
         else:
             self.model = None
 
-        self.layer_snr = {}
-        self.layer_types = []
+        self.layer_snr = dict()
+        self.layer_types = list()
+
 
     def get_weight_types(self):
         weight_types = set()
+
         for name, module in self.model.named_modules():
             parts = name.split('.')
+
             if any(hasattr(module, attr) for attr in ['weight', 'bias','inv_freq']):
                 layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1)
                 weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name
                 weight_types.add(weight_type)
+
         return list(weight_types)
 
+
     def interactive_select_weights(self):
         weight_types = self.get_weight_types()
         sorted_weight_types = self.sort_weight_types(weight_types)
         selected_types = checkboxlist_dialog(
-            title="Select Weight Types", 
+            title="Select Weight Types",
             text="Deselect the weight types you do not want to scan for SNR:",
             values=[(wt, wt) for wt in sorted_weight_types],
             default_values=sorted_weight_types
         ).run()
         self.layer_types = selected_types
+
         return selected_types
 
+
     def sort_weight_types(self, weight_types):
-        categories = {}
+        categories = dict()
+
         for wt in weight_types:
             category = wt.split('.')[0]
-            categories.setdefault(category, []).append(wt)
+            categories.setdefault(category, list()).append(wt)
+
         sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])}
         sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist]
+
         return sorted_weight_types
 
+
     def calculate_snr_for_layer(self, layer_type):
         layers = [(name, module) for name, module in self.model.named_modules() if layer_type in name and hasattr(module, 'weight')]
         num_batches = (len(layers) + self.batch_size - 1) // self.batch_size
@@ -89,10 +104,13 @@ def calculate_snr_for_layer(self, layer_type):
         with tqdm(total=num_batches, unit='batch', desc=f'Calculating SNR for {layer_type}') as progress_bar:
             for i in range(0, len(layers), self.batch_size):
                 batch_layers = layers[i:i + self.batch_size]
+
                 for name, module in batch_layers:
                     weights = module.weight.detach()
+
                     if weights.ndim < 2:
                         weights = weights.unsqueeze(0)
+
                     S = torch.linalg.svdvals(weights)
                     max_singular_value = S[0]
                     sigma_estimated = self.estimate_sigma_with_full_iqr(S)
@@ -103,14 +121,17 @@ def calculate_snr_for_layer(self, layer_type):
                     snr = signal / noise if noise != 0 else float('inf')
                     snr_ratio = snr / max_singular_value
                     self.layer_snr[name] = {'type': layer_type, 'snr': snr_ratio.item()}
+
                 progress_bar.update(1)
 
+
     @staticmethod
     def marchenko_pastur_threshold(sigma, n, m):
         beta = n / m if n < m else m / n
         threshold = sigma * np.sqrt((1 + np.sqrt(beta)) ** 2)
         return threshold
 
+
     @staticmethod
     def estimate_sigma_with_full_iqr(S):
         q75 = torch.quantile(S, 0.75)
@@ -119,8 +140,9 @@ def estimate_sigma_with_full_iqr(S):
         sigma_estimated = iqr / 1.349
         return sigma_estimated
 
+
     def assess_layers_snr(self, selected_weight_types):
-        total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight'))
+        # total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight'))
         start_time = time.time()
 
         with tqdm(total=len(selected_weight_types), unit='type', desc='Calculating SNR for types') as progress_bar:
@@ -132,65 +154,85 @@ def assess_layers_snr(self, selected_weight_types):
         total_time = end_time - start_time
         print(f"Total time taken: {total_time:.2f} seconds")
 
+
     def save_snr_to_json(self):
         model_name_slug = self.model_name.replace('/', '-').replace('_', '-')
         directory = 'model_snr_results'
         filename = os.path.join(directory, f'snr_results_{model_name_slug}.json')
-        
+
         # Ensure the directory exists
         if not os.path.exists(directory):
             os.makedirs(directory)
-        
-        serializable_data = {}
+
+        serializable_data = dict()
+
         for layer_name, info in self.layer_snr.items():
             snr_value = info['snr'].item() if isinstance(info['snr'], torch.Tensor) else info['snr']
             layer_type = str(info['type'])
             serializable_data[layer_name] = {'snr': snr_value, 'type': layer_type}
-        
+
         with open(filename, 'w') as file:
             json.dump(serializable_data, file, indent=4)
-        
+
         print(f"Results saved to {filename}")
         self.save_top_snr_ratios_to_json(filename)
         self.generate_unfrozen_params_yaml(filename)
 
+
     def generate_unfrozen_params_yaml(self, json_filename, top_percent=None):
         top_percent = top_percent if top_percent is not None else self.top_percent
+
         with open(json_filename, 'r') as file:
             snr_data = json.load(file)
-        unfrozen_parameters = {}
+
+        unfrozen_parameters = dict()
+
         for layer_name, info in snr_data.items():
             layer_type = info['type']
+
             if layer_type not in unfrozen_parameters:
-                unfrozen_parameters[layer_type] = []
+                unfrozen_parameters[layer_type] = list()
+
             unfrozen_parameters[layer_type].append((layer_name, info['snr']))
-        top_layers_by_type = {}
+
+        top_layers_by_type = dict()
+
         for layer_type, layers in unfrozen_parameters.items():
             layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True)
             num_top_layers = int(len(layers) * top_percent / 100)
             top_layers_by_type[layer_type] = [layer[0] for layer in layers_sorted[:num_top_layers]]
+
         # Modify the yaml_filename to include the input json name and top_percent
         json_file_base = os.path.splitext(os.path.basename(json_filename))[0]
         yaml_filename = f"{json_file_base}_unfrozenparameters_{top_percent}percent.yaml"
+
         with open(yaml_filename, 'w') as file:
             file.write("unfrozen_parameters:\n")
             file.write("- ^lm_head.weight$\n")
             file.write("- ^model.embed_tokens.weight$\n")
+
             for layer_type, layer_names in top_layers_by_type.items():
                 file.write(f"# {layer_type} layers\n")
+
                 for layer_name in layer_names:
                     file.write(f"- {layer_name}\n")
+
         print(f"Top {top_percent}% SNR layers saved to {yaml_filename}")
 
     def save_top_snr_ratios_to_json(self, json_filename, filename=None):
         with open(json_filename, 'r') as file:
             snr_data = json.load(file)
-        all_snr_layers = {}
+
+        all_snr_layers = dict()
+
         for layer_name, info in snr_data.items():
             layer_type = info['type']
+
             if layer_type not in all_snr_layers:
-                all_snr_layers[layer_type] = []
+                all_snr_layers[layer_type] = list()
+
             all_snr_layers[layer_type].append((layer_name, info['snr']))
+
         for layer_type, layers in all_snr_layers.items():
             layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True)
             all_snr_layers[layer_type] = {layer[0]: layer[1] for layer in layers_sorted}
@@ -200,8 +242,10 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None):
 
         with open(filename, 'w') as file:
             json.dump(all_snr_layers, file, indent=4)
+
         print(f"All SNR layers sorted and saved to {filename}")
 
+
 def main():
     # Handle command-line arguments
     parser = argparse.ArgumentParser(description="Process SNR data for layers.")
@@ -223,6 +267,7 @@ def main():
         batch_size = int(batch_size) if batch_size else 1
         modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size)
         selected_weight_types = modifier.interactive_select_weights()
+
         if selected_weight_types:
             modifier.assess_layers_snr(selected_weight_types)
             modifier.save_snr_to_json()
@@ -230,5 +275,6 @@ def main():
         else:
             print("No weight types selected.")
 
+
 if __name__ == "__main__":
     main()

From d27eb4542d27132c95503f8dda2357001471e842 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Fri, 13 Dec 2024 16:17:51 -0500
Subject: [PATCH 2/9] Fixed runtime on consumer systems

---
 spectrum.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/spectrum.py b/spectrum.py
index 14ab2dd..ec3edc0 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -41,6 +41,16 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
                     trust_remote_code=True,
                     device_map="auto"
                 )
+            except RuntimeError as e:
+                print(f"Error loading model: {e}")
+                print("Attempting to load on the CPU...")
+
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float32,
+                    trust_remote_code=True,
+                    device_map="cpu"
+                )
 
             # Check if the model config has rope_scaling
             if not hasattr(self.model.config, 'rope_scaling'):

From 4611f0fb3f5b78082f0eb22d516e8c6e60f555b4 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Wed, 18 Dec 2024 13:59:30 -0500
Subject: [PATCH 3/9] Added a cuda param to toggle CPU usage

---
 spectrum.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index ec3edc0..b2ec229 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -13,7 +13,7 @@
 
 class ModelModifier:
 
-    def __init__(self, model_name=None, top_percent=50, batch_size=1):
+    def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
         self.model_name = model_name
         self.top_percent = top_percent
         self.batch_size = batch_size
@@ -26,6 +26,11 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
                     low_cpu_mem_usage=True,
                     trust_remote_code=True,
                     device_map="auto"
+                ) if cuda else AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float32,
+                    trust_remote_code=True,
+                    device_map="cpu"
                 )
             except KeyError as e:
                 print(f"Error loading model: {e}")
@@ -41,16 +46,9 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1):
                     trust_remote_code=True,
                     device_map="auto"
                 )
-            except RuntimeError as e:
+            except (RuntimeError or NotImplementedError) as e:
                 print(f"Error loading model: {e}")
-                print("Attempting to load on the CPU...")
-
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float32,
-                    trust_remote_code=True,
-                    device_map="cpu"
-                )
+                print("Try passing --cuda=False!")
 
             # Check if the model config has rope_scaling
             if not hasattr(self.model.config, 'rope_scaling'):
@@ -166,13 +164,11 @@ def assess_layers_snr(self, selected_weight_types):
 
 
     def save_snr_to_json(self):
-        model_name_slug = self.model_name.replace('/', '-').replace('_', '-')
+        model_name_slug = os.path.basename(self.model_name) if os.path.exists(self.model_name) else self.model_name.replace('/', '-').replace('_', '-')
         directory = 'model_snr_results'
         filename = os.path.join(directory, f'snr_results_{model_name_slug}.json')
 
-        # Ensure the directory exists
-        if not os.path.exists(directory):
-            os.makedirs(directory)
+        os.makedirs(directory, exist_ok=True)
 
         serializable_data = dict()
 
@@ -229,6 +225,7 @@ def generate_unfrozen_params_yaml(self, json_filename, top_percent=None):
 
         print(f"Top {top_percent}% SNR layers saved to {yaml_filename}")
 
+
     def save_top_snr_ratios_to_json(self, json_filename, filename=None):
         with open(json_filename, 'r') as file:
             snr_data = json.load(file)
@@ -260,7 +257,8 @@ def main():
     # Handle command-line arguments
     parser = argparse.ArgumentParser(description="Process SNR data for layers.")
     parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model')
-    parser.add_argument('--top-percent', type=int, default=None, help='Top percentage of layers to select, overriding the default')
+    parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select, overriding the default')
+    parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU')
     args = parser.parse_args()
 
     # Check for existing SNR results file
@@ -269,13 +267,13 @@ def main():
 
     if os.path.exists(snr_file_path):
         print(f"Found existing SNR results file for {args.model_name}")
-        modifier = ModelModifier(top_percent=args.top_percent)
+        modifier = ModelModifier(top_percent=args.top_percent, cuda=args.cuda)
         modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent)
     else:
         print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.")
         batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run()
         batch_size = int(batch_size) if batch_size else 1
-        modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size)
+        modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda)
         selected_weight_types = modifier.interactive_select_weights()
 
         if selected_weight_types:

From be3298a157bd36fb3a45de285fac35a5b3d3e416 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 11:15:47 -0500
Subject: [PATCH 4/9] Calculate SNR for layers that get non-Inf results

---
 spectrum.py | 57 +++++++++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index b2ec229..12b9a4a 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 import torch
-from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog
+from prompt_toolkit.shortcuts import input_dialog
 from tqdm import tqdm
 from transformers import AutoConfig, AutoModelForCausalLM
 
@@ -20,18 +20,22 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
 
         if model_name:
             try:
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float32,
-                    low_cpu_mem_usage=True,
-                    trust_remote_code=True,
-                    device_map="auto"
-                ) if cuda else AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float32,
-                    trust_remote_code=True,
-                    device_map="cpu"
-                )
+                if cuda:
+                    torch.cuda.empty_cache()
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float32,
+                        low_cpu_mem_usage=True,
+                        trust_remote_code=True,
+                        device_map="auto"
+                    )
+                else:
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float32,
+                        trust_remote_code=True,
+                        device_map="cpu"
+                    )
             except KeyError as e:
                 print(f"Error loading model: {e}")
                 print("Attempting to load with custom configuration...")
@@ -61,7 +65,6 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
             self.model = None
 
         self.layer_snr = dict()
-        self.layer_types = list()
 
 
     def get_weight_types(self):
@@ -70,7 +73,7 @@ def get_weight_types(self):
         for name, module in self.model.named_modules():
             parts = name.split('.')
 
-            if any(hasattr(module, attr) for attr in ['weight', 'bias','inv_freq']):
+            if any(hasattr(module, attr) for attr in ['weight', 'bias', 'inv_freq']):
                 layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1)
                 weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name
                 weight_types.add(weight_type)
@@ -78,26 +81,16 @@ def get_weight_types(self):
         return list(weight_types)
 
 
-    def interactive_select_weights(self):
+    def select_weights(self):
         weight_types = self.get_weight_types()
-        sorted_weight_types = self.sort_weight_types(weight_types)
-        selected_types = checkboxlist_dialog(
-            title="Select Weight Types",
-            text="Deselect the weight types you do not want to scan for SNR:",
-            values=[(wt, wt) for wt in sorted_weight_types],
-            default_values=sorted_weight_types
-        ).run()
-        self.layer_types = selected_types
-
-        return selected_types
-
-
-    def sort_weight_types(self, weight_types):
         categories = dict()
 
         for wt in weight_types:
-            category = wt.split('.')[0]
-            categories.setdefault(category, list()).append(wt)
+            print(wt)
+            category, layer = wt.split('.')
+
+            if category in ["mlp", "self_attn"] and layer != "rotary_emb":
+                categories.setdefault(category, list()).append(wt)
 
         sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])}
         sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist]
@@ -274,7 +267,7 @@ def main():
         batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run()
         batch_size = int(batch_size) if batch_size else 1
         modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda)
-        selected_weight_types = modifier.interactive_select_weights()
+        selected_weight_types = modifier.select_weights()
 
         if selected_weight_types:
             modifier.assess_layers_snr(selected_weight_types)

From 10d43fa50bec34015e8bd60ef18ae6a4e88d77a8 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 11:21:13 -0500
Subject: [PATCH 5/9] Set the batch size using a flag

---
 spectrum.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index 12b9a4a..9ead475 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import torch
-from prompt_toolkit.shortcuts import input_dialog
 from tqdm import tqdm
 from transformers import AutoConfig, AutoModelForCausalLM
 
@@ -250,7 +249,8 @@ def main():
     # Handle command-line arguments
     parser = argparse.ArgumentParser(description="Process SNR data for layers.")
     parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model')
-    parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select, overriding the default')
+    parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select')
+    parser.add_argument('--batch-size', type=int, default=1, help='Job batch size')
     parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU')
     args = parser.parse_args()
 
@@ -264,15 +264,13 @@ def main():
         modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent)
     else:
         print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.")
-        batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run()
-        batch_size = int(batch_size) if batch_size else 1
-        modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda)
+        modifier = ModelModifier(model_name=args.model_name, batch_size=args.batch_size, cuda=args.cuda)
         selected_weight_types = modifier.select_weights()
 
         if selected_weight_types:
             modifier.assess_layers_snr(selected_weight_types)
             modifier.save_snr_to_json()
-            print("Finished SNR scanning and data saved.")
+            print("Finished SNR rating.")
         else:
             print("No weight types selected.")
 

From bda98e64b1c2777b3d3a21d3359a7e94be575679 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 11:24:44 -0500
Subject: [PATCH 6/9] Code formatting (chore)

---
 spectrum.py | 127 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index 9ead475..f31ff31 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -11,7 +11,6 @@
 
 
 class ModelModifier:
-
     def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
         self.model_name = model_name
         self.top_percent = top_percent
@@ -26,14 +25,14 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
                         torch_dtype=torch.float32,
                         low_cpu_mem_usage=True,
                         trust_remote_code=True,
-                        device_map="auto"
+                        device_map="auto",
                     )
                 else:
                     self.model = AutoModelForCausalLM.from_pretrained(
                         model_name,
                         torch_dtype=torch.float32,
                         trust_remote_code=True,
-                        device_map="cpu"
+                        device_map="cpu",
                     )
             except KeyError as e:
                 print(f"Error loading model: {e}")
@@ -47,63 +46,73 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False):
                     torch_dtype=torch.float32,
                     low_cpu_mem_usage=True,
                     trust_remote_code=True,
-                    device_map="auto"
+                    device_map="auto",
                 )
-            except (RuntimeError or NotImplementedError) as e:
+            except RuntimeError or NotImplementedError as e:
                 print(f"Error loading model: {e}")
                 print("Try passing --cuda=False!")
 
             # Check if the model config has rope_scaling
-            if not hasattr(self.model.config, 'rope_scaling'):
-                self.model.config.rope_scaling = {'type': 'linear'}
+            if not hasattr(self.model.config, "rope_scaling"):
+                self.model.config.rope_scaling = {"type": "linear"}
             elif not isinstance(self.model.config.rope_scaling, dict):
-                self.model.config.rope_scaling = {'type': 'linear'}
-            elif 'type' not in self.model.config.rope_scaling:
-                self.model.config.rope_scaling['type'] = 'linear'
+                self.model.config.rope_scaling = {"type": "linear"}
+            elif "type" not in self.model.config.rope_scaling:
+                self.model.config.rope_scaling["type"] = "linear"
         else:
             self.model = None
 
         self.layer_snr = dict()
 
-
     def get_weight_types(self):
         weight_types = set()
 
         for name, module in self.model.named_modules():
-            parts = name.split('.')
+            parts = name.split(".")
 
-            if any(hasattr(module, attr) for attr in ['weight', 'bias', 'inv_freq']):
-                layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1)
-                weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name
+            if any(hasattr(module, attr) for attr in ["weight", "bias", "inv_freq"]):
+                layer_index = next(
+                    (i for i, part in enumerate(parts) if part.isdigit()), -1
+                )
+                weight_type = (".".join(parts[layer_index + 1 :]) if layer_index != -1 else name)
                 weight_types.add(weight_type)
 
         return list(weight_types)
 
-
     def select_weights(self):
         weight_types = self.get_weight_types()
         categories = dict()
 
         for wt in weight_types:
             print(wt)
-            category, layer = wt.split('.')
+            category, layer = wt.split(".")
 
             if category in ["mlp", "self_attn"] and layer != "rotary_emb":
                 categories.setdefault(category, list()).append(wt)
 
-        sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])}
-        sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist]
+        sorted_categories = {
+            k: sorted(v)
+            for k, v in sorted(categories.items(), key=lambda item: item[0])
+        }
+        sorted_weight_types = [
+            wt for sublist in sorted_categories.values() for wt in sublist
+        ]
 
         return sorted_weight_types
 
-
     def calculate_snr_for_layer(self, layer_type):
-        layers = [(name, module) for name, module in self.model.named_modules() if layer_type in name and hasattr(module, 'weight')]
+        layers = [
+            (name, module)
+            for name, module in self.model.named_modules()
+            if layer_type in name and hasattr(module, "weight")
+        ]
         num_batches = (len(layers) + self.batch_size - 1) // self.batch_size
 
-        with tqdm(total=num_batches, unit='batch', desc=f'Calculating SNR for {layer_type}') as progress_bar:
+        with tqdm(
+            total=num_batches, unit="batch", desc=f"Calculating SNR for {layer_type}"
+        ) as progress_bar:
             for i in range(0, len(layers), self.batch_size):
-                batch_layers = layers[i:i + self.batch_size]
+                batch_layers = layers[i : i + self.batch_size]
 
                 for name, module in batch_layers:
                     weights = module.weight.detach()
@@ -118,20 +127,18 @@ def calculate_snr_for_layer(self, layer_type):
                     mp_threshold = self.marchenko_pastur_threshold(sigma_estimated, n, m)
                     signal = S[S > mp_threshold].sum()
                     noise = S[S <= mp_threshold].sum()
-                    snr = signal / noise if noise != 0 else float('inf')
+                    snr = signal / noise if noise != 0 else float("inf")
                     snr_ratio = snr / max_singular_value
-                    self.layer_snr[name] = {'type': layer_type, 'snr': snr_ratio.item()}
+                    self.layer_snr[name] = {"type": layer_type, "snr": snr_ratio.item()}
 
                 progress_bar.update(1)
 
-
     @staticmethod
     def marchenko_pastur_threshold(sigma, n, m):
         beta = n / m if n < m else m / n
         threshold = sigma * np.sqrt((1 + np.sqrt(beta)) ** 2)
         return threshold
 
-
     @staticmethod
     def estimate_sigma_with_full_iqr(S):
         q75 = torch.quantile(S, 0.75)
@@ -140,12 +147,15 @@ def estimate_sigma_with_full_iqr(S):
         sigma_estimated = iqr / 1.349
         return sigma_estimated
 
-
     def assess_layers_snr(self, selected_weight_types):
         # total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight'))
         start_time = time.time()
 
-        with tqdm(total=len(selected_weight_types), unit='type', desc='Calculating SNR for types') as progress_bar:
+        with tqdm(
+            total=len(selected_weight_types),
+            unit="type",
+            desc="Calculating SNR for types",
+        ) as progress_bar:
             for layer_type in selected_weight_types:
                 self.calculate_snr_for_layer(layer_type)
                 progress_bar.update(1)
@@ -154,57 +164,65 @@ def assess_layers_snr(self, selected_weight_types):
         total_time = end_time - start_time
         print(f"Total time taken: {total_time:.2f} seconds")
 
-
     def save_snr_to_json(self):
-        model_name_slug = os.path.basename(self.model_name) if os.path.exists(self.model_name) else self.model_name.replace('/', '-').replace('_', '-')
-        directory = 'model_snr_results'
-        filename = os.path.join(directory, f'snr_results_{model_name_slug}.json')
+        model_name_slug = (
+            os.path.basename(self.model_name)
+            if os.path.exists(self.model_name)
+            else self.model_name.replace("/", "-").replace("_", "-")
+        )
+        directory = "model_snr_results"
+        filename = os.path.join(directory, f"snr_results_{model_name_slug}.json")
 
         os.makedirs(directory, exist_ok=True)
 
         serializable_data = dict()
 
         for layer_name, info in self.layer_snr.items():
-            snr_value = info['snr'].item() if isinstance(info['snr'], torch.Tensor) else info['snr']
-            layer_type = str(info['type'])
-            serializable_data[layer_name] = {'snr': snr_value, 'type': layer_type}
-
-        with open(filename, 'w') as file:
+            snr_value = (
+                info["snr"].item()
+                if isinstance(info["snr"], torch.Tensor)
+                else info["snr"]
+            )
+            layer_type = str(info["type"])
+            serializable_data[layer_name] = {"snr": snr_value, "type": layer_type}
+
+        with open(filename, "w") as file:
             json.dump(serializable_data, file, indent=4)
 
         print(f"Results saved to {filename}")
         self.save_top_snr_ratios_to_json(filename)
         self.generate_unfrozen_params_yaml(filename)
 
-
     def generate_unfrozen_params_yaml(self, json_filename, top_percent=None):
         top_percent = top_percent if top_percent is not None else self.top_percent
 
-        with open(json_filename, 'r') as file:
+        with open(json_filename, "r") as file:
             snr_data = json.load(file)
 
         unfrozen_parameters = dict()
 
         for layer_name, info in snr_data.items():
-            layer_type = info['type']
+            layer_type = info["type"]
 
             if layer_type not in unfrozen_parameters:
                 unfrozen_parameters[layer_type] = list()
 
-            unfrozen_parameters[layer_type].append((layer_name, info['snr']))
+            unfrozen_parameters[layer_type].append((layer_name, info["snr"]))
 
         top_layers_by_type = dict()
 
         for layer_type, layers in unfrozen_parameters.items():
             layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True)
             num_top_layers = int(len(layers) * top_percent / 100)
-            top_layers_by_type[layer_type] = [layer[0] for layer in layers_sorted[:num_top_layers]]
+            top_layers_by_type[layer_type] = [
+                layer[0] for layer in layers_sorted[:num_top_layers]
+            ]
 
         # Modify the yaml_filename to include the input json name and top_percent
         json_file_base = os.path.splitext(os.path.basename(json_filename))[0]
         yaml_filename = f"{json_file_base}_unfrozenparameters_{top_percent}percent.yaml"
 
-        with open(yaml_filename, 'w') as file:
+        with open(yaml_filename, "w") as file:
             file.write("unfrozen_parameters:\n")
             file.write("- ^lm_head.weight$\n")
             file.write("- ^model.embed_tokens.weight$\n")
@@ -217,20 +235,19 @@ def generate_unfrozen_params_yaml(self, json_filename, top_percent=None):
 
         print(f"Top {top_percent}% SNR layers saved to {yaml_filename}")
 
-
     def save_top_snr_ratios_to_json(self, json_filename, filename=None):
-        with open(json_filename, 'r') as file:
+        with open(json_filename, "r") as file:
             snr_data = json.load(file)
 
         all_snr_layers = dict()
 
         for layer_name, info in snr_data.items():
-            layer_type = info['type']
+            layer_type = info["type"]
 
             if layer_type not in all_snr_layers:
                 all_snr_layers[layer_type] = list()
 
-            all_snr_layers[layer_type].append((layer_name, info['snr']))
+            all_snr_layers[layer_type].append((layer_name, info["snr"]))
 
         for layer_type, layers in all_snr_layers.items():
             layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True)
@@ -239,7 +256,7 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None):
         json_file_base = os.path.splitext(os.path.basename(json_filename))[0]
         filename = f"{json_file_base}_sorted.json" if filename is None else filename
 
-        with open(filename, 'w') as file:
+        with open(filename, "w") as file:
             json.dump(all_snr_layers, file, indent=4)
 
         print(f"All SNR layers sorted and saved to {filename}")
@@ -248,15 +265,15 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None):
 def main():
     # Handle command-line arguments
     parser = argparse.ArgumentParser(description="Process SNR data for layers.")
-    parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model')
-    parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select')
-    parser.add_argument('--batch-size', type=int, default=1, help='Job batch size')
-    parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU')
+    parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model")
+    parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select")
+    parser.add_argument("--batch-size", type=int, default=1, help="Job batch size")
+    parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU")
     args = parser.parse_args()
 
     # Check for existing SNR results file
-    model_name_slug = args.model_name.replace('/', '-').replace('_', '-')
-    snr_file_path = os.path.join('model_snr_results', f'snr_results_{model_name_slug}.json')
+    model_name_slug = args.model_name.replace("/", "-").replace("_", "-")
+    snr_file_path = os.path.join("model_snr_results", f"snr_results_{model_name_slug}.json")
 
     if os.path.exists(snr_file_path):
         print(f"Found existing SNR results file for {args.model_name}")

From ffa19d971995a3895176b9e80cee07e370873baf Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 11:33:09 -0500
Subject: [PATCH 7/9] Cleaned up the requirements

---
 requirements.txt | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index cb9dc99..cff1e9c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,4 @@
-auto_mix_prep
-datasets
-icecream
 numpy
-pandas
-prompt_toolkit
-pydantic
 torch
 tqdm
 transformers
-accelerate
\ No newline at end of file

From 6e25e2c0f4d1a5a0a6fdd4d563ab095093c6628d Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 11:44:27 -0500
Subject: [PATCH 8/9] Moved main logic to a helper ftn

---
 spectrum.py | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/spectrum.py b/spectrum.py
index f31ff31..e13ddf1 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -166,7 +166,7 @@ def assess_layers_snr(self, selected_weight_types):
 
     def save_snr_to_json(self):
         model_name_slug = (
-            os.path.basename(self.model_name)
+            os.path.basename(self.model_name).replace("_", "-")
             if os.path.exists(self.model_name)
             else self.model_name.replace("/", "-").replace("_", "-")
         )
@@ -262,26 +262,21 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None):
         print(f"All SNR layers sorted and saved to {filename}")
 
 
-def main():
-    # Handle command-line arguments
-    parser = argparse.ArgumentParser(description="Process SNR data for layers.")
-    parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model")
-    parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select")
-    parser.add_argument("--batch-size", type=int, default=1, help="Job batch size")
-    parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU")
-    args = parser.parse_args()
-
-    # Check for existing SNR results file
-    model_name_slug = args.model_name.replace("/", "-").replace("_", "-")
+def calculate_model_snr(model: str, top_percent: int, batch_size: int = 1, cuda: bool = False):
+    model_name_slug = (
+        os.path.basename(model).replace("_", "-")
+        if os.path.exists(model)
+        else model.replace("/", "-").replace("_", "-")
+    )
     snr_file_path = os.path.join("model_snr_results", f"snr_results_{model_name_slug}.json")
 
     if os.path.exists(snr_file_path):
-        print(f"Found existing SNR results file for {args.model_name}")
-        modifier = ModelModifier(top_percent=args.top_percent, cuda=args.cuda)
-        modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent)
+        print(f"Found existing SNR results file for {model}")
+        modifier = ModelModifier(top_percent=top_percent, cuda=cuda)
+        modifier.generate_unfrozen_params_yaml(snr_file_path, top_percent)
     else:
-        print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.")
-        modifier = ModelModifier(model_name=args.model_name, batch_size=args.batch_size, cuda=args.cuda)
+        print(f"No existing SNR results file found for {model}. Proceeding with SNR calculation.")
+        modifier = ModelModifier(model_name=model, batch_size=batch_size, cuda=cuda)
         selected_weight_types = modifier.select_weights()
 
         if selected_weight_types:
@@ -292,5 +287,17 @@ def main():
             print("No weight types selected.")
 
 
+def main():
+    # Handle command-line arguments
+    parser = argparse.ArgumentParser(description="Process SNR data for layers.")
+    parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model")
+    parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select")
+    parser.add_argument("--batch-size", type=int, default=1, help="Job batch size")
+    parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU")
+    args = parser.parse_args()
+
+    calculate_model_snr(args.model_name, args.top_percent, args.batch_size, args.cuda)
+
+
 if __name__ == "__main__":
     main()

From 34b74011106d4668176ebd0335b78af3d0d77f48 Mon Sep 17 00:00:00 2001
From: T145 <T145@protonmail.com>
Date: Sun, 22 Dec 2024 15:57:04 -0500
Subject: [PATCH 9/9] Runtime fix

---
 __init__.py      |  0
 requirements.txt |  1 +
 spectrum.py      | 11 +++++++++--
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
index cff1e9c..1433d7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ numpy
 torch
 tqdm
 transformers
+accelerate
\ No newline at end of file
diff --git a/spectrum.py b/spectrum.py
index e13ddf1..02d855f 100644
--- a/spectrum.py
+++ b/spectrum.py
@@ -83,9 +83,16 @@ def select_weights(self):
         weight_types = self.get_weight_types()
         categories = dict()
 
+        def get_layer(weight: list):
+            try:
+                return weight[1]
+            except IndexError:
+                return None
+
         for wt in weight_types:
-            print(wt)
-            category, layer = wt.split(".")
+            arr = wt.split(".")
+            category = arr[0]
+            layer = get_layer(arr)
 
             if category in ["mlp", "self_attn"] and layer != "rotary_emb":
                 categories.setdefault(category, list()).append(wt)