From 55e3cc8deb857ed6720194bb8da81b03081b1f27 Mon Sep 17 00:00:00 2001 From: T145 Date: Fri, 13 Dec 2024 16:15:32 -0500 Subject: [PATCH 1/9] Improved code legibility --- spectrum.py | 100 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 27 deletions(-) diff --git a/spectrum.py b/spectrum.py index 38ae27c..14ab2dd 100644 --- a/spectrum.py +++ b/spectrum.py @@ -1,15 +1,18 @@ # spectrum.py -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig -import numpy as np -import json -from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog import argparse -from tqdm import tqdm +import json import os import time +import numpy as np +import torch +from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog +from tqdm import tqdm +from transformers import AutoConfig, AutoModelForCausalLM + + class ModelModifier: + def __init__(self, model_name=None, top_percent=50, batch_size=1): self.model_name = model_name self.top_percent = top_percent @@ -18,15 +21,16 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): if model_name: try: self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float32, - low_cpu_mem_usage=True, - trust_remote_code=True, + model_name, + torch_dtype=torch.float32, + low_cpu_mem_usage=True, + trust_remote_code=True, device_map="auto" ) except KeyError as e: print(f"Error loading model: {e}") print("Attempting to load with custom configuration...") + config = AutoConfig.from_pretrained(model_name) config.rope_scaling = {"type": "linear", "factor": 1.0} self.model = AutoModelForCausalLM.from_pretrained( @@ -37,7 +41,7 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): trust_remote_code=True, device_map="auto" ) - + # Check if the model config has rope_scaling if not hasattr(self.model.config, 'rope_scaling'): self.model.config.rope_scaling = {'type': 'linear'} @@ -48,40 +52,51 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): else: self.model = None - self.layer_snr = {} - self.layer_types = [] + self.layer_snr = dict() + self.layer_types = list() + def get_weight_types(self): weight_types = set() + for name, module in self.model.named_modules(): parts = name.split('.') + if any(hasattr(module, attr) for attr in ['weight', 'bias','inv_freq']): layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1) weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name weight_types.add(weight_type) + return list(weight_types) + def interactive_select_weights(self): weight_types = self.get_weight_types() sorted_weight_types = self.sort_weight_types(weight_types) selected_types = checkboxlist_dialog( - title="Select Weight Types", + title="Select Weight Types", text="Deselect the weight types you do not want to scan for SNR:", values=[(wt, wt) for wt in sorted_weight_types], default_values=sorted_weight_types ).run() self.layer_types = selected_types + return selected_types + def sort_weight_types(self, weight_types): - categories = {} + categories = dict() + for wt in weight_types: category = wt.split('.')[0] - categories.setdefault(category, []).append(wt) + categories.setdefault(category, list()).append(wt) + sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])} sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist] + return sorted_weight_types + def calculate_snr_for_layer(self, layer_type): layers = [(name, module) for name, module in self.model.named_modules() if layer_type in name and hasattr(module, 'weight')] num_batches = (len(layers) + self.batch_size - 1) // self.batch_size @@ -89,10 +104,13 @@ def calculate_snr_for_layer(self, layer_type): with tqdm(total=num_batches, unit='batch', desc=f'Calculating SNR for {layer_type}') as progress_bar: for i in range(0, len(layers), self.batch_size): batch_layers = layers[i:i + self.batch_size] + for name, module in batch_layers: weights = module.weight.detach() + if weights.ndim < 2: weights = weights.unsqueeze(0) + S = torch.linalg.svdvals(weights) max_singular_value = S[0] sigma_estimated = self.estimate_sigma_with_full_iqr(S) @@ -103,14 +121,17 @@ def calculate_snr_for_layer(self, layer_type): snr = signal / noise if noise != 0 else float('inf') snr_ratio = snr / max_singular_value self.layer_snr[name] = {'type': layer_type, 'snr': snr_ratio.item()} + progress_bar.update(1) + @staticmethod def marchenko_pastur_threshold(sigma, n, m): beta = n / m if n < m else m / n threshold = sigma * np.sqrt((1 + np.sqrt(beta)) ** 2) return threshold + @staticmethod def estimate_sigma_with_full_iqr(S): q75 = torch.quantile(S, 0.75) @@ -119,8 +140,9 @@ def estimate_sigma_with_full_iqr(S): sigma_estimated = iqr / 1.349 return sigma_estimated + def assess_layers_snr(self, selected_weight_types): - total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight')) + # total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight')) start_time = time.time() with tqdm(total=len(selected_weight_types), unit='type', desc='Calculating SNR for types') as progress_bar: @@ -132,65 +154,85 @@ def assess_layers_snr(self, selected_weight_types): total_time = end_time - start_time print(f"Total time taken: {total_time:.2f} seconds") + def save_snr_to_json(self): model_name_slug = self.model_name.replace('/', '-').replace('_', '-') directory = 'model_snr_results' filename = os.path.join(directory, f'snr_results_{model_name_slug}.json') - + # Ensure the directory exists if not os.path.exists(directory): os.makedirs(directory) - - serializable_data = {} + + serializable_data = dict() + for layer_name, info in self.layer_snr.items(): snr_value = info['snr'].item() if isinstance(info['snr'], torch.Tensor) else info['snr'] layer_type = str(info['type']) serializable_data[layer_name] = {'snr': snr_value, 'type': layer_type} - + with open(filename, 'w') as file: json.dump(serializable_data, file, indent=4) - + print(f"Results saved to {filename}") self.save_top_snr_ratios_to_json(filename) self.generate_unfrozen_params_yaml(filename) + def generate_unfrozen_params_yaml(self, json_filename, top_percent=None): top_percent = top_percent if top_percent is not None else self.top_percent + with open(json_filename, 'r') as file: snr_data = json.load(file) - unfrozen_parameters = {} + + unfrozen_parameters = dict() + for layer_name, info in snr_data.items(): layer_type = info['type'] + if layer_type not in unfrozen_parameters: - unfrozen_parameters[layer_type] = [] + unfrozen_parameters[layer_type] = list() + unfrozen_parameters[layer_type].append((layer_name, info['snr'])) - top_layers_by_type = {} + + top_layers_by_type = dict() + for layer_type, layers in unfrozen_parameters.items(): layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True) num_top_layers = int(len(layers) * top_percent / 100) top_layers_by_type[layer_type] = [layer[0] for layer in layers_sorted[:num_top_layers]] + # Modify the yaml_filename to include the input json name and top_percent json_file_base = os.path.splitext(os.path.basename(json_filename))[0] yaml_filename = f"{json_file_base}_unfrozenparameters_{top_percent}percent.yaml" + with open(yaml_filename, 'w') as file: file.write("unfrozen_parameters:\n") file.write("- ^lm_head.weight$\n") file.write("- ^model.embed_tokens.weight$\n") + for layer_type, layer_names in top_layers_by_type.items(): file.write(f"# {layer_type} layers\n") + for layer_name in layer_names: file.write(f"- {layer_name}\n") + print(f"Top {top_percent}% SNR layers saved to {yaml_filename}") def save_top_snr_ratios_to_json(self, json_filename, filename=None): with open(json_filename, 'r') as file: snr_data = json.load(file) - all_snr_layers = {} + + all_snr_layers = dict() + for layer_name, info in snr_data.items(): layer_type = info['type'] + if layer_type not in all_snr_layers: - all_snr_layers[layer_type] = [] + all_snr_layers[layer_type] = list() + all_snr_layers[layer_type].append((layer_name, info['snr'])) + for layer_type, layers in all_snr_layers.items(): layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True) all_snr_layers[layer_type] = {layer[0]: layer[1] for layer in layers_sorted} @@ -200,8 +242,10 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None): with open(filename, 'w') as file: json.dump(all_snr_layers, file, indent=4) + print(f"All SNR layers sorted and saved to {filename}") + def main(): # Handle command-line arguments parser = argparse.ArgumentParser(description="Process SNR data for layers.") @@ -223,6 +267,7 @@ def main(): batch_size = int(batch_size) if batch_size else 1 modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size) selected_weight_types = modifier.interactive_select_weights() + if selected_weight_types: modifier.assess_layers_snr(selected_weight_types) modifier.save_snr_to_json() @@ -230,5 +275,6 @@ def main(): else: print("No weight types selected.") + if __name__ == "__main__": main() From d27eb4542d27132c95503f8dda2357001471e842 Mon Sep 17 00:00:00 2001 From: T145 Date: Fri, 13 Dec 2024 16:17:51 -0500 Subject: [PATCH 2/9] Fixed runtime on consumer systems --- spectrum.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spectrum.py b/spectrum.py index 14ab2dd..ec3edc0 100644 --- a/spectrum.py +++ b/spectrum.py @@ -41,6 +41,16 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): trust_remote_code=True, device_map="auto" ) + except RuntimeError as e: + print(f"Error loading model: {e}") + print("Attempting to load on the CPU...") + + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float32, + trust_remote_code=True, + device_map="cpu" + ) # Check if the model config has rope_scaling if not hasattr(self.model.config, 'rope_scaling'): From 4611f0fb3f5b78082f0eb22d516e8c6e60f555b4 Mon Sep 17 00:00:00 2001 From: T145 Date: Wed, 18 Dec 2024 13:59:30 -0500 Subject: [PATCH 3/9] Added a cuda param to toggle CPU usage --- spectrum.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/spectrum.py b/spectrum.py index ec3edc0..b2ec229 100644 --- a/spectrum.py +++ b/spectrum.py @@ -13,7 +13,7 @@ class ModelModifier: - def __init__(self, model_name=None, top_percent=50, batch_size=1): + def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): self.model_name = model_name self.top_percent = top_percent self.batch_size = batch_size @@ -26,6 +26,11 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): low_cpu_mem_usage=True, trust_remote_code=True, device_map="auto" + ) if cuda else AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float32, + trust_remote_code=True, + device_map="cpu" ) except KeyError as e: print(f"Error loading model: {e}") @@ -41,16 +46,9 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1): trust_remote_code=True, device_map="auto" ) - except RuntimeError as e: + except (RuntimeError or NotImplementedError) as e: print(f"Error loading model: {e}") - print("Attempting to load on the CPU...") - - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float32, - trust_remote_code=True, - device_map="cpu" - ) + print("Try passing --cuda=False!") # Check if the model config has rope_scaling if not hasattr(self.model.config, 'rope_scaling'): @@ -166,13 +164,11 @@ def assess_layers_snr(self, selected_weight_types): def save_snr_to_json(self): - model_name_slug = self.model_name.replace('/', '-').replace('_', '-') + model_name_slug = os.path.basename(self.model_name) if os.path.exists(self.model_name) else self.model_name.replace('/', '-').replace('_', '-') directory = 'model_snr_results' filename = os.path.join(directory, f'snr_results_{model_name_slug}.json') - # Ensure the directory exists - if not os.path.exists(directory): - os.makedirs(directory) + os.makedirs(directory, exist_ok=True) serializable_data = dict() @@ -229,6 +225,7 @@ def generate_unfrozen_params_yaml(self, json_filename, top_percent=None): print(f"Top {top_percent}% SNR layers saved to {yaml_filename}") + def save_top_snr_ratios_to_json(self, json_filename, filename=None): with open(json_filename, 'r') as file: snr_data = json.load(file) @@ -260,7 +257,8 @@ def main(): # Handle command-line arguments parser = argparse.ArgumentParser(description="Process SNR data for layers.") parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model') - parser.add_argument('--top-percent', type=int, default=None, help='Top percentage of layers to select, overriding the default') + parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select, overriding the default') + parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU') args = parser.parse_args() # Check for existing SNR results file @@ -269,13 +267,13 @@ def main(): if os.path.exists(snr_file_path): print(f"Found existing SNR results file for {args.model_name}") - modifier = ModelModifier(top_percent=args.top_percent) + modifier = ModelModifier(top_percent=args.top_percent, cuda=args.cuda) modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent) else: print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.") batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run() batch_size = int(batch_size) if batch_size else 1 - modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size) + modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda) selected_weight_types = modifier.interactive_select_weights() if selected_weight_types: From be3298a157bd36fb3a45de285fac35a5b3d3e416 Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 11:15:47 -0500 Subject: [PATCH 4/9] Calculate SNR for layers that get non-Inf results --- spectrum.py | 57 +++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/spectrum.py b/spectrum.py index b2ec229..12b9a4a 100644 --- a/spectrum.py +++ b/spectrum.py @@ -6,7 +6,7 @@ import numpy as np import torch -from prompt_toolkit.shortcuts import checkboxlist_dialog, input_dialog +from prompt_toolkit.shortcuts import input_dialog from tqdm import tqdm from transformers import AutoConfig, AutoModelForCausalLM @@ -20,18 +20,22 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): if model_name: try: - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float32, - low_cpu_mem_usage=True, - trust_remote_code=True, - device_map="auto" - ) if cuda else AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype=torch.float32, - trust_remote_code=True, - device_map="cpu" - ) + if cuda: + torch.cuda.empty_cache() + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float32, + low_cpu_mem_usage=True, + trust_remote_code=True, + device_map="auto" + ) + else: + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float32, + trust_remote_code=True, + device_map="cpu" + ) except KeyError as e: print(f"Error loading model: {e}") print("Attempting to load with custom configuration...") @@ -61,7 +65,6 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): self.model = None self.layer_snr = dict() - self.layer_types = list() def get_weight_types(self): @@ -70,7 +73,7 @@ def get_weight_types(self): for name, module in self.model.named_modules(): parts = name.split('.') - if any(hasattr(module, attr) for attr in ['weight', 'bias','inv_freq']): + if any(hasattr(module, attr) for attr in ['weight', 'bias', 'inv_freq']): layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1) weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name weight_types.add(weight_type) @@ -78,26 +81,16 @@ def get_weight_types(self): return list(weight_types) - def interactive_select_weights(self): + def select_weights(self): weight_types = self.get_weight_types() - sorted_weight_types = self.sort_weight_types(weight_types) - selected_types = checkboxlist_dialog( - title="Select Weight Types", - text="Deselect the weight types you do not want to scan for SNR:", - values=[(wt, wt) for wt in sorted_weight_types], - default_values=sorted_weight_types - ).run() - self.layer_types = selected_types - - return selected_types - - - def sort_weight_types(self, weight_types): categories = dict() for wt in weight_types: - category = wt.split('.')[0] - categories.setdefault(category, list()).append(wt) + print(wt) + category, layer = wt.split('.') + + if category in ["mlp", "self_attn"] and layer != "rotary_emb": + categories.setdefault(category, list()).append(wt) sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])} sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist] @@ -274,7 +267,7 @@ def main(): batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run() batch_size = int(batch_size) if batch_size else 1 modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda) - selected_weight_types = modifier.interactive_select_weights() + selected_weight_types = modifier.select_weights() if selected_weight_types: modifier.assess_layers_snr(selected_weight_types) From 10d43fa50bec34015e8bd60ef18ae6a4e88d77a8 Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 11:21:13 -0500 Subject: [PATCH 5/9] Set the batch size using a flag --- spectrum.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/spectrum.py b/spectrum.py index 12b9a4a..9ead475 100644 --- a/spectrum.py +++ b/spectrum.py @@ -6,7 +6,6 @@ import numpy as np import torch -from prompt_toolkit.shortcuts import input_dialog from tqdm import tqdm from transformers import AutoConfig, AutoModelForCausalLM @@ -250,7 +249,8 @@ def main(): # Handle command-line arguments parser = argparse.ArgumentParser(description="Process SNR data for layers.") parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model') - parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select, overriding the default') + parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select') + parser.add_argument('--batch-size', type=int, default=1, help='Job batch size') parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU') args = parser.parse_args() @@ -264,15 +264,13 @@ def main(): modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent) else: print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.") - batch_size = input_dialog(title="Batch Size", text="Enter the batch size:").run() - batch_size = int(batch_size) if batch_size else 1 - modifier = ModelModifier(model_name=args.model_name, batch_size=batch_size, cuda=args.cuda) + modifier = ModelModifier(model_name=args.model_name, batch_size=args.batch_size, cuda=args.cuda) selected_weight_types = modifier.select_weights() if selected_weight_types: modifier.assess_layers_snr(selected_weight_types) modifier.save_snr_to_json() - print("Finished SNR scanning and data saved.") + print("Finished SNR rating.") else: print("No weight types selected.") From bda98e64b1c2777b3d3a21d3359a7e94be575679 Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 11:24:44 -0500 Subject: [PATCH 6/9] Code formatting (chore) --- spectrum.py | 127 +++++++++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 55 deletions(-) diff --git a/spectrum.py b/spectrum.py index 9ead475..f31ff31 100644 --- a/spectrum.py +++ b/spectrum.py @@ -11,7 +11,6 @@ class ModelModifier: - def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): self.model_name = model_name self.top_percent = top_percent @@ -26,14 +25,14 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): torch_dtype=torch.float32, low_cpu_mem_usage=True, trust_remote_code=True, - device_map="auto" + device_map="auto", ) else: self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, trust_remote_code=True, - device_map="cpu" + device_map="cpu", ) except KeyError as e: print(f"Error loading model: {e}") @@ -47,63 +46,73 @@ def __init__(self, model_name=None, top_percent=50, batch_size=1, cuda=False): torch_dtype=torch.float32, low_cpu_mem_usage=True, trust_remote_code=True, - device_map="auto" + device_map="auto", ) - except (RuntimeError or NotImplementedError) as e: + except RuntimeError or NotImplementedError as e: print(f"Error loading model: {e}") print("Try passing --cuda=False!") # Check if the model config has rope_scaling - if not hasattr(self.model.config, 'rope_scaling'): - self.model.config.rope_scaling = {'type': 'linear'} + if not hasattr(self.model.config, "rope_scaling"): + self.model.config.rope_scaling = {"type": "linear"} elif not isinstance(self.model.config.rope_scaling, dict): - self.model.config.rope_scaling = {'type': 'linear'} - elif 'type' not in self.model.config.rope_scaling: - self.model.config.rope_scaling['type'] = 'linear' + self.model.config.rope_scaling = {"type": "linear"} + elif "type" not in self.model.config.rope_scaling: + self.model.config.rope_scaling["type"] = "linear" else: self.model = None self.layer_snr = dict() - def get_weight_types(self): weight_types = set() for name, module in self.model.named_modules(): - parts = name.split('.') + parts = name.split(".") - if any(hasattr(module, attr) for attr in ['weight', 'bias', 'inv_freq']): - layer_index = next((i for i, part in enumerate(parts) if part.isdigit()), -1) - weight_type = '.'.join(parts[layer_index + 1:]) if layer_index != -1 else name + if any(hasattr(module, attr) for attr in ["weight", "bias", "inv_freq"]): + layer_index = next( + (i for i, part in enumerate(parts) if part.isdigit()), -1 + ) + weight_type = (".".join(parts[layer_index + 1 :]) if layer_index != -1 else name) weight_types.add(weight_type) return list(weight_types) - def select_weights(self): weight_types = self.get_weight_types() categories = dict() for wt in weight_types: print(wt) - category, layer = wt.split('.') + category, layer = wt.split(".") if category in ["mlp", "self_attn"] and layer != "rotary_emb": categories.setdefault(category, list()).append(wt) - sorted_categories = {k: sorted(v) for k, v in sorted(categories.items(), key=lambda item: item[0])} - sorted_weight_types = [wt for sublist in sorted_categories.values() for wt in sublist] + sorted_categories = { + k: sorted(v) + for k, v in sorted(categories.items(), key=lambda item: item[0]) + } + sorted_weight_types = [ + wt for sublist in sorted_categories.values() for wt in sublist + ] return sorted_weight_types - def calculate_snr_for_layer(self, layer_type): - layers = [(name, module) for name, module in self.model.named_modules() if layer_type in name and hasattr(module, 'weight')] + layers = [ + (name, module) + for name, module in self.model.named_modules() + if layer_type in name and hasattr(module, "weight") + ] num_batches = (len(layers) + self.batch_size - 1) // self.batch_size - with tqdm(total=num_batches, unit='batch', desc=f'Calculating SNR for {layer_type}') as progress_bar: + with tqdm( + total=num_batches, unit="batch", desc=f"Calculating SNR for {layer_type}" + ) as progress_bar: for i in range(0, len(layers), self.batch_size): - batch_layers = layers[i:i + self.batch_size] + batch_layers = layers[i : i + self.batch_size] for name, module in batch_layers: weights = module.weight.detach() @@ -118,20 +127,18 @@ def calculate_snr_for_layer(self, layer_type): mp_threshold = self.marchenko_pastur_threshold(sigma_estimated, n, m) signal = S[S > mp_threshold].sum() noise = S[S <= mp_threshold].sum() - snr = signal / noise if noise != 0 else float('inf') + snr = signal / noise if noise != 0 else float("inf") snr_ratio = snr / max_singular_value - self.layer_snr[name] = {'type': layer_type, 'snr': snr_ratio.item()} + self.layer_snr[name] = {"type": layer_type, "snr": snr_ratio.item()} progress_bar.update(1) - @staticmethod def marchenko_pastur_threshold(sigma, n, m): beta = n / m if n < m else m / n threshold = sigma * np.sqrt((1 + np.sqrt(beta)) ** 2) return threshold - @staticmethod def estimate_sigma_with_full_iqr(S): q75 = torch.quantile(S, 0.75) @@ -140,12 +147,15 @@ def estimate_sigma_with_full_iqr(S): sigma_estimated = iqr / 1.349 return sigma_estimated - def assess_layers_snr(self, selected_weight_types): # total_layers = sum(1 for name, module in self.model.named_modules() if any(layer_type in name for layer_type in selected_weight_types) and hasattr(module, 'weight')) start_time = time.time() - with tqdm(total=len(selected_weight_types), unit='type', desc='Calculating SNR for types') as progress_bar: + with tqdm( + total=len(selected_weight_types), + unit="type", + desc="Calculating SNR for types", + ) as progress_bar: for layer_type in selected_weight_types: self.calculate_snr_for_layer(layer_type) progress_bar.update(1) @@ -154,57 +164,65 @@ def assess_layers_snr(self, selected_weight_types): total_time = end_time - start_time print(f"Total time taken: {total_time:.2f} seconds") - def save_snr_to_json(self): - model_name_slug = os.path.basename(self.model_name) if os.path.exists(self.model_name) else self.model_name.replace('/', '-').replace('_', '-') - directory = 'model_snr_results' - filename = os.path.join(directory, f'snr_results_{model_name_slug}.json') + model_name_slug = ( + os.path.basename(self.model_name) + if os.path.exists(self.model_name) + else self.model_name.replace("/", "-").replace("_", "-") + ) + directory = "model_snr_results" + filename = os.path.join(directory, f"snr_results_{model_name_slug}.json") os.makedirs(directory, exist_ok=True) serializable_data = dict() for layer_name, info in self.layer_snr.items(): - snr_value = info['snr'].item() if isinstance(info['snr'], torch.Tensor) else info['snr'] - layer_type = str(info['type']) - serializable_data[layer_name] = {'snr': snr_value, 'type': layer_type} - - with open(filename, 'w') as file: + snr_value = ( + info["snr"].item() + if isinstance(info["snr"], torch.Tensor) + else info["snr"] + ) + layer_type = str(info["type"]) + serializable_data[layer_name] = {"snr": snr_value, "type": layer_type} + + with open(filename, "w") as file: json.dump(serializable_data, file, indent=4) print(f"Results saved to {filename}") self.save_top_snr_ratios_to_json(filename) self.generate_unfrozen_params_yaml(filename) - def generate_unfrozen_params_yaml(self, json_filename, top_percent=None): top_percent = top_percent if top_percent is not None else self.top_percent - with open(json_filename, 'r') as file: + with open(json_filename, "r") as file: snr_data = json.load(file) unfrozen_parameters = dict() for layer_name, info in snr_data.items(): - layer_type = info['type'] + layer_type = info["type"] if layer_type not in unfrozen_parameters: unfrozen_parameters[layer_type] = list() - unfrozen_parameters[layer_type].append((layer_name, info['snr'])) + unfrozen_parameters[layer_type].append((layer_name, info["snr"])) top_layers_by_type = dict() for layer_type, layers in unfrozen_parameters.items(): layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True) num_top_layers = int(len(layers) * top_percent / 100) - top_layers_by_type[layer_type] = [layer[0] for layer in layers_sorted[:num_top_layers]] + top_layers_by_type[layer_type] = [ + layer[0] for layer in layers_sorted[:num_top_layers] + ] # Modify the yaml_filename to include the input json name and top_percent json_file_base = os.path.splitext(os.path.basename(json_filename))[0] yaml_filename = f"{json_file_base}_unfrozenparameters_{top_percent}percent.yaml" - with open(yaml_filename, 'w') as file: + with open(yaml_filename, "w") as file: file.write("unfrozen_parameters:\n") file.write("- ^lm_head.weight$\n") file.write("- ^model.embed_tokens.weight$\n") @@ -217,20 +235,19 @@ def generate_unfrozen_params_yaml(self, json_filename, top_percent=None): print(f"Top {top_percent}% SNR layers saved to {yaml_filename}") - def save_top_snr_ratios_to_json(self, json_filename, filename=None): - with open(json_filename, 'r') as file: + with open(json_filename, "r") as file: snr_data = json.load(file) all_snr_layers = dict() for layer_name, info in snr_data.items(): - layer_type = info['type'] + layer_type = info["type"] if layer_type not in all_snr_layers: all_snr_layers[layer_type] = list() - all_snr_layers[layer_type].append((layer_name, info['snr'])) + all_snr_layers[layer_type].append((layer_name, info["snr"])) for layer_type, layers in all_snr_layers.items(): layers_sorted = sorted(layers, key=lambda x: x[1], reverse=True) @@ -239,7 +256,7 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None): json_file_base = os.path.splitext(os.path.basename(json_filename))[0] filename = f"{json_file_base}_sorted.json" if filename is None else filename - with open(filename, 'w') as file: + with open(filename, "w") as file: json.dump(all_snr_layers, file, indent=4) print(f"All SNR layers sorted and saved to {filename}") @@ -248,15 +265,15 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None): def main(): # Handle command-line arguments parser = argparse.ArgumentParser(description="Process SNR data for layers.") - parser.add_argument('--model-name', type=str, required=True, help='Model name or path to the model') - parser.add_argument('--top-percent', type=int, default=50, help='Top percentage of layers to select') - parser.add_argument('--batch-size', type=int, default=1, help='Job batch size') - parser.add_argument('--cuda', type=bool, default=False, help='Whether to use the GPU') + parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model") + parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select") + parser.add_argument("--batch-size", type=int, default=1, help="Job batch size") + parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU") args = parser.parse_args() # Check for existing SNR results file - model_name_slug = args.model_name.replace('/', '-').replace('_', '-') - snr_file_path = os.path.join('model_snr_results', f'snr_results_{model_name_slug}.json') + model_name_slug = args.model_name.replace("/", "-").replace("_", "-") + snr_file_path = os.path.join("model_snr_results", f"snr_results_{model_name_slug}.json") if os.path.exists(snr_file_path): print(f"Found existing SNR results file for {args.model_name}") From ffa19d971995a3895176b9e80cee07e370873baf Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 11:33:09 -0500 Subject: [PATCH 7/9] Cleaned up the requirements --- requirements.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index cb9dc99..cff1e9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,4 @@ -auto_mix_prep -datasets -icecream numpy -pandas -prompt_toolkit -pydantic torch tqdm transformers -accelerate \ No newline at end of file From 6e25e2c0f4d1a5a0a6fdd4d563ab095093c6628d Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 11:44:27 -0500 Subject: [PATCH 8/9] Moved main logic to a helper ftn --- spectrum.py | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/spectrum.py b/spectrum.py index f31ff31..e13ddf1 100644 --- a/spectrum.py +++ b/spectrum.py @@ -166,7 +166,7 @@ def assess_layers_snr(self, selected_weight_types): def save_snr_to_json(self): model_name_slug = ( - os.path.basename(self.model_name) + os.path.basename(self.model_name).replace("_", "-") if os.path.exists(self.model_name) else self.model_name.replace("/", "-").replace("_", "-") ) @@ -262,26 +262,21 @@ def save_top_snr_ratios_to_json(self, json_filename, filename=None): print(f"All SNR layers sorted and saved to {filename}") -def main(): - # Handle command-line arguments - parser = argparse.ArgumentParser(description="Process SNR data for layers.") - parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model") - parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select") - parser.add_argument("--batch-size", type=int, default=1, help="Job batch size") - parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU") - args = parser.parse_args() - - # Check for existing SNR results file - model_name_slug = args.model_name.replace("/", "-").replace("_", "-") +def calculate_model_snr(model: str, top_percent: int, batch_size: int = 1, cuda: bool = False): + model_name_slug = ( + os.path.basename(model).replace("_", "-") + if os.path.exists(model) + else model.replace("/", "-").replace("_", "-") + ) snr_file_path = os.path.join("model_snr_results", f"snr_results_{model_name_slug}.json") if os.path.exists(snr_file_path): - print(f"Found existing SNR results file for {args.model_name}") - modifier = ModelModifier(top_percent=args.top_percent, cuda=args.cuda) - modifier.generate_unfrozen_params_yaml(snr_file_path, args.top_percent) + print(f"Found existing SNR results file for {model}") + modifier = ModelModifier(top_percent=top_percent, cuda=cuda) + modifier.generate_unfrozen_params_yaml(snr_file_path, top_percent) else: - print(f"No existing SNR results file found for {args.model_name}. Proceeding with SNR calculation.") - modifier = ModelModifier(model_name=args.model_name, batch_size=args.batch_size, cuda=args.cuda) + print(f"No existing SNR results file found for {model}. Proceeding with SNR calculation.") + modifier = ModelModifier(model_name=model, batch_size=batch_size, cuda=cuda) selected_weight_types = modifier.select_weights() if selected_weight_types: @@ -292,5 +287,17 @@ def main(): print("No weight types selected.") +def main(): + # Handle command-line arguments + parser = argparse.ArgumentParser(description="Process SNR data for layers.") + parser.add_argument("--model-name", type=str, required=True, help="Model name or path to the model") + parser.add_argument("--top-percent", type=int, default=50, help="Top percentage of layers to select") + parser.add_argument("--batch-size", type=int, default=1, help="Job batch size") + parser.add_argument("--cuda", type=bool, default=False, help="Whether to use the GPU") + args = parser.parse_args() + + calculate_model_snr(args.model_name, args.top_percent, args.batch_size, args.cuda) + + if __name__ == "__main__": main() From 34b74011106d4668176ebd0335b78af3d0d77f48 Mon Sep 17 00:00:00 2001 From: T145 Date: Sun, 22 Dec 2024 15:57:04 -0500 Subject: [PATCH 9/9] Runtime fix --- __init__.py | 0 requirements.txt | 1 + spectrum.py | 11 +++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index cff1e9c..1433d7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ numpy torch tqdm transformers +accelerate \ No newline at end of file diff --git a/spectrum.py b/spectrum.py index e13ddf1..02d855f 100644 --- a/spectrum.py +++ b/spectrum.py @@ -83,9 +83,16 @@ def select_weights(self): weight_types = self.get_weight_types() categories = dict() + def get_layer(weight: list): + try: + return weight[1] + except IndexError: + return None + for wt in weight_types: - print(wt) - category, layer = wt.split(".") + arr = wt.split(".") + category = arr[0] + layer = get_layer(arr) if category in ["mlp", "self_attn"] and layer != "rotary_emb": categories.setdefault(category, list()).append(wt)