From f95702c8332bcdca73a5e4e418628361d3f31fe1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 4 Feb 2024 20:21:08 -0800 Subject: [PATCH 01/16] Add sampler order support --- modules/sampler_hijack.py | 47 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 59b90b02ce..be607d5ab5 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -209,6 +209,7 @@ class MirostatLogitsWarper(LogitsWarper): def __init__(self, mirostat_mode: int, mirostat_tau: float, mirostat_eta: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1): if mirostat_mode not in [2]: raise ValueError(f"`mirostat` has to be a an integer 2, but is {mirostat_mode}") + self.mirostat_mode = mirostat_mode self.mirostat_eta = mirostat_eta self.mirostat_tau = mirostat_tau @@ -301,6 +302,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to def get_logits_warper_patch(self, generation_config): + # Make sure that temperature is float and not int if isinstance(generation_config.temperature, int): generation_config.temperature = float(generation_config.temperature) @@ -346,7 +348,44 @@ def get_logits_warper_patch(self, generation_config): normalize = None warpers += warpers_to_add - if generation_config.temperature_last: + + # Custom sampler order + if generation_config.sampler_priority != "": + + # Create a dictionary to map class names to their nicknames + class_name_to_nickname = { + 'EpsilonLogitsWarper': 'epsilon_cutoff', + 'EtaLogitsWarper': 'eta_cutoff', + 'MinPLogitsWarper': 'min_p', + 'MirostatLogitsWarper': 'mirostat', + 'ModifiedTemperatureLogitsWarper': 'temperature', + 'TailFreeLogitsWarper': 'tfs', + 'TopALogitsWarper': 'top_a', + 'TopKLogitsWarper': 'top_k', + 'TopPLogitsWarper': 'top_p', + 'TypicalLogitsWarper': 'typical_p' + } + + # Assumed to be passed as a comma-separated list of parameters + # Example: eta_cutoff,typical_p,temperature,min_p + sampler_priority = [x.strip() for x in generation_config.sampler_priority.split(',')] + + def custom_sort_key(obj): + class_name = obj.__class__.__name__ + + # Return a large value if class name is not mapped or if the mapped nickname is not in priority + if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority: + return float('inf') + + # Return the index of the nickname in the priority list for sorting + return sampler_priority.index(class_name_to_nickname[class_name]) + + # Sort the list using the custom key function + warpers = sorted(warpers, key=custom_sort_key) + + # Move temperature to the end if temperature_last is set + # and sampler_priority is not set + elif generation_config.temperature_last: temperature_idx = None for i in range(len(warpers)): if warpers[i].__class__.__name__ in ['TemperatureLogitsWarper', 'ModifiedTemperatureLogitsWarper']: @@ -361,8 +400,9 @@ def get_logits_warper_patch(self, generation_config): warpers.append(SpyLogitsWarper()) warpers = LogitsProcessorList(warpers) - # for i in range(len(warpers)): - # print(warpers[i].__class__.__name__) + for i in range(len(warpers)): + print(warpers[i].__class__.__name__) + return warpers @@ -402,6 +442,7 @@ def generation_config_init_patch(self, **kwargs): self.presence_penalty = kwargs.pop("presence_penalty", 0) self.frequency_penalty = kwargs.pop("frequency_penalty", 0) self.temperature_last = kwargs.pop("temperature_last", False) + self.sampler_priority = kwargs.pop("sampler_priority", "") def hijack_samplers(): From ed92c80a2ba41607cbb2e24e11d4d32b8b98435e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:06:09 -0800 Subject: [PATCH 02/16] Better handle temperature --- modules/sampler_hijack.py | 285 +++++++++++++++++++++----------------- 1 file changed, 159 insertions(+), 126 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index be607d5ab5..61efe55c50 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -7,7 +7,6 @@ LogitNormalization, LogitsProcessor, LogitsProcessorList, - TemperatureLogitsWarper ) from modules import shared @@ -17,10 +16,10 @@ class ModifiedTemperatureLogitsWarper(LogitsWarper): ''' - Based on the original Transformers temperature logits warper, this - adds support for dynamic temperature and quadratic sampling. + A copy of the original Transformers temperature logits warper. ''' - def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float, smoothing_factor: float): + + def __init__(self, temperature: float): if not isinstance(temperature, float) or not (temperature > 0): except_msg = ( f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token " @@ -32,81 +31,90 @@ def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: raise ValueError(except_msg) self.temperature = temperature - self.dynamic_temperature = dynamic_temperature + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + scores = scores / self.temperature + return scores + + +class DynamicTemperatureLogitsWarper(LogitsWarper): + ''' + Dynamic temperature. + ''' + + def __init__(self, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float): self.dynatemp_low = dynatemp_low self.dynatemp_high = dynatemp_high self.dynatemp_exponent = dynatemp_exponent - self.smoothing_factor = smoothing_factor def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + min_temp = self.dynatemp_low + max_temp = self.dynatemp_high + exponent_val = self.dynatemp_exponent - # Quadratic sampling - if self.smoothing_factor > 0: + # Convert logits to probabilities + probs = torch.softmax(scores, dim=-1) - # Compute the maximum logit value - max_logit = scores.max() + # Calculate entropy of the softmax probabilities + entropy = -1.0 * torch.where(probs > 0, probs * torch.log(probs), torch.zeros_like(probs)).sum() - # Apply the quadratic transformation - transformed_logits = -(self.smoothing_factor * (scores - max_logit)**2) + max_logit + # Guard against future possible division by zero + entropy = max(entropy, torch.tensor(1e-10)) # Ensures entropy is slightly greater than 0 - # No need to print the top 5 logits since this is not required - # print("Original top 5 logits: ", torch.topk(scores, 5)) - # print("New top 5 logits: ", torch.topk(transformed_logits, 5)) + # Any logits which are not -Infinity will be considered for calculating max entropy. + num_valid_tokens = torch.sum(scores > -float('inf')).item() - return transformed_logits + # Now, calculate the max entropy by using only the valid tokens' count + max_entropy = math.log(num_valid_tokens) - # Dynamic temperature - elif self.dynamic_temperature: - min_temp = self.dynatemp_low - max_temp = self.dynatemp_high - exponent_val = self.dynatemp_exponent + # Guard against future possible division by zero + max_entropy = max_entropy if max_entropy > 0.0 else 1e-10 - # Convert logits to probabilities - probs = torch.softmax(scores, dim=-1) + # Normalize the entropy + normalized_entropy = entropy / max_entropy - # Calculate entropy of the softmax probabilities - entropy = -1.0 * torch.where(probs > 0, probs * torch.log(probs), torch.zeros_like(probs)).sum() + # Map the normalized entropy to the desired temperature range using the power function + dyn_temp = min_temp + (max_temp - min_temp) * (normalized_entropy.pow(exponent_val)) - # Guard against future possible division by zero - entropy = max(entropy, torch.tensor(1e-10)) # Ensures entropy is slightly greater than 0 + # Apply the dynamically calculated temperature scaling + scores = scores / dyn_temp - # Any logits which are not -Infinity will be considered for calculating max entropy. - num_valid_tokens = torch.sum(scores > -float('inf')).item() + # print("----------------------\nTemperature from generation_config:", self.temperature) + # print("min_temp:", min_temp) + # print("max_temp:", max_temp) + # print("Entropy:", entropy.item()) + # print("Max Possible Entropy considering valid tokens only:", max_entropy) + # print("Normalized Entropy:", normalized_entropy.item()) + # print("Dynamic Temperature (dyn_temp):", dyn_temp.item()) + # print("----------------------") - # Now, calculate the max entropy by using only the valid tokens' count - max_entropy = math.log(num_valid_tokens) + # max_prob_token_id = torch.argmax(scores, dim=-1) # Get the token ID with the highest probability + # max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id)) # Convert ID to token + # print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp, "exponent=", exponent_val) - # Guard against future possible division by zero - max_entropy = max_entropy if max_entropy > 0.0 else 1e-10 + return scores - # Normalize the entropy - normalized_entropy = entropy / max_entropy - # Map the normalized entropy to the desired temperature range using the power function - dyn_temp = min_temp + (max_temp - min_temp) * (normalized_entropy.pow(exponent_val)) +class QuadraticSamplingLogitsWarper(LogitsWarper): + ''' + Quadratic sampling. + ''' - # Apply the dynamically calculated temperature scaling - scores = scores / dyn_temp + def __init__(self, smoothing_factor: float): + self.smoothing_factor = smoothing_factor - # print("----------------------\nTemperature from generation_config:", self.temperature) - # print("min_temp:", min_temp) - # print("max_temp:", max_temp) - # print("Entropy:", entropy.item()) - # print("Max Possible Entropy considering valid tokens only:", max_entropy) - # print("Normalized Entropy:", normalized_entropy.item()) - # print("Dynamic Temperature (dyn_temp):", dyn_temp.item()) - # print("----------------------") + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + # Compute the maximum logit value + max_logit = scores.max() - # max_prob_token_id = torch.argmax(scores, dim=-1) # Get the token ID with the highest probability - # max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id)) # Convert ID to token - # print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp, "exponent=", exponent_val) + # Apply the quadratic transformation + transformed_logits = -(self.smoothing_factor * (scores - max_logit)**2) + max_logit - return scores + # No need to print the top 5 logits since this is not required + # print("Original top 5 logits: ", torch.topk(scores, 5)) + # print("New top 5 logits: ", torch.topk(transformed_logits, 5)) - # Regular temperature - else: - scores = scores / self.temperature - return scores + return transformed_logits class MinPLogitsWarper(LogitsWarper): @@ -303,44 +311,73 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to def get_logits_warper_patch(self, generation_config): - # Make sure that temperature is float and not int + # Parameter sanitization if isinstance(generation_config.temperature, int): - generation_config.temperature = float(generation_config.temperature) - - temperature = generation_config.temperature - if generation_config.dynamic_temperature or generation_config.smoothing_factor > 0: - # Make sure TemperatureLogitsWarper will be created by temporarily - # setting temperature to a value != 1. - generation_config.temperature = 1.1 + generation_config.temperature = float(generation_config.temperature) # Must be float + # Get the original warpers warpers = self._get_logits_warper_old(generation_config) + + # Replace temperature with our modified class. + # Currently, it behaves identically to the original. for i in range(len(warpers)): if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': warpers[i] = ModifiedTemperatureLogitsWarper( - temperature, - generation_config.dynamic_temperature, - generation_config.dynatemp_low, - generation_config.dynatemp_high, - generation_config.dynatemp_exponent, - generation_config.smoothing_factor + generation_config.temperature, ) + # Add custom warpers warpers_to_add = LogitsProcessorList() min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1 + if generation_config.tfs is not None and 0.0 <= generation_config.tfs < 1.0: + warpers_to_add.append( + TailFreeLogitsWarper( + tfs=generation_config.tfs, + min_tokens_to_keep=min_tokens_to_keep + ) + ) + + if generation_config.top_a is not None and 0.0 < generation_config.top_a <= 1.0: + warpers_to_add.append( + TopALogitsWarper( + top_a=generation_config.top_a, + min_tokens_to_keep=min_tokens_to_keep + ) + ) + + if generation_config.min_p is not None and 0.0 < generation_config.min_p <= 1.0: + warpers_to_add.append( + MinPLogitsWarper( + min_p=generation_config.min_p, + min_tokens_to_keep=min_tokens_to_keep + ) + ) + + if generation_config.dynamic_temperature > 0: + warpers_to_add.append( + DynamicTemperatureLogitsWarper( + dynatemp_low=generation_config.dynatemp_low, + dynatemp_high=generation_config.dynatemp_high, + dynatemp_exponent=generation_config.dynatemp_exponent, + ) + ) + + if generation_config.smoothing_factor > 0: + warpers_to_add.append( + QuadraticSamplingLogitsWarper( + smoothing_factor=generation_config.smoothing_factor + ) + ) if generation_config.mirostat_mode is not None and generation_config.mirostat_mode == 2: - warpers_to_add.append(MirostatLogitsWarper(mirostat_mode=generation_config.mirostat_mode, mirostat_eta=generation_config.mirostat_eta, mirostat_tau=generation_config.mirostat_tau, min_tokens_to_keep=min_tokens_to_keep)) - # We need to disable samplers other than temperature - for warper in warpers: - if not isinstance(warper, TemperatureLogitsWarper): - warpers.remove(warper) - else: - if generation_config.tfs is not None and 0.0 <= generation_config.tfs < 1.0: - warpers_to_add.append(TailFreeLogitsWarper(tfs=generation_config.tfs, min_tokens_to_keep=min_tokens_to_keep)) - if generation_config.top_a is not None and 0.0 < generation_config.top_a <= 1.0: - warpers_to_add.append(TopALogitsWarper(top_a=generation_config.top_a, min_tokens_to_keep=min_tokens_to_keep)) - if generation_config.min_p is not None and 0.0 < generation_config.min_p <= 1.0: - warpers_to_add.append(MinPLogitsWarper(min_p=generation_config.min_p, min_tokens_to_keep=min_tokens_to_keep)) + warpers_to_add.append( + MirostatLogitsWarper( + mirostat_mode=generation_config.mirostat_mode, + mirostat_eta=generation_config.mirostat_eta, + mirostat_tau=generation_config.mirostat_tau, + min_tokens_to_keep=min_tokens_to_keep + ) + ) if len(warpers) > 0 and isinstance(warpers[-1], LogitNormalization): normalize = warpers.pop(-1) @@ -349,52 +386,48 @@ def get_logits_warper_patch(self, generation_config): warpers += warpers_to_add - # Custom sampler order - if generation_config.sampler_priority != "": - - # Create a dictionary to map class names to their nicknames - class_name_to_nickname = { - 'EpsilonLogitsWarper': 'epsilon_cutoff', - 'EtaLogitsWarper': 'eta_cutoff', - 'MinPLogitsWarper': 'min_p', - 'MirostatLogitsWarper': 'mirostat', - 'ModifiedTemperatureLogitsWarper': 'temperature', - 'TailFreeLogitsWarper': 'tfs', - 'TopALogitsWarper': 'top_a', - 'TopKLogitsWarper': 'top_k', - 'TopPLogitsWarper': 'top_p', - 'TypicalLogitsWarper': 'typical_p' - } - - # Assumed to be passed as a comma-separated list of parameters - # Example: eta_cutoff,typical_p,temperature,min_p - sampler_priority = [x.strip() for x in generation_config.sampler_priority.split(',')] - - def custom_sort_key(obj): - class_name = obj.__class__.__name__ - - # Return a large value if class name is not mapped or if the mapped nickname is not in priority - if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority: - return float('inf') - - # Return the index of the nickname in the priority list for sorting - return sampler_priority.index(class_name_to_nickname[class_name]) - - # Sort the list using the custom key function + # Sort the samplers + # sampler_priority is assumed to be passed as a comma-separated list of parameters. + # Example: eta_cutoff,typical_p,temperature,min_p + sampler_priority = [x.strip() for x in generation_config.sampler_priority.split(',')] + + # Handle temperature_last + if generation_config.temperature_last: + for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']: + if param_name in sampler_priority: + index = sampler_priority.index(param_name) + sampler_priority.append(sampler_priority.pop(index)) + + class_name_to_nickname = { + 'DynamicTemperatureLogitsWarper': 'dynamic_temperature', + 'EpsilonLogitsWarper': 'epsilon_cutoff', + 'EtaLogitsWarper': 'eta_cutoff', + 'MinPLogitsWarper': 'min_p', + 'MirostatLogitsWarper': 'mirostat', + 'ModifiedTemperatureLogitsWarper': 'temperature', + 'QuadraticSamplingLogitsWarper': 'quadratic_sampling', + 'TailFreeLogitsWarper': 'tfs', + 'TopALogitsWarper': 'top_a', + 'TopKLogitsWarper': 'top_k', + 'TopPLogitsWarper': 'top_p', + 'TypicalLogitsWarper': 'typical_p' + } + + def custom_sort_key(obj): + class_name = obj.__class__.__name__ + + # Return a large value if class name is not mapped or if the mapped nickname is not in priority + if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority: + print("----------------------------->", class_name) + return float('inf') + + # Return the index of the nickname in the priority list for sorting + return sampler_priority.index(class_name_to_nickname[class_name]) + + # Sort the list using the custom key function + if False: warpers = sorted(warpers, key=custom_sort_key) - # Move temperature to the end if temperature_last is set - # and sampler_priority is not set - elif generation_config.temperature_last: - temperature_idx = None - for i in range(len(warpers)): - if warpers[i].__class__.__name__ in ['TemperatureLogitsWarper', 'ModifiedTemperatureLogitsWarper']: - temperature_idx = i - break - - if temperature_idx is not None: - warpers.append(warpers.pop(temperature_idx)) - if normalize is not None: warpers.append(normalize) @@ -442,7 +475,7 @@ def generation_config_init_patch(self, **kwargs): self.presence_penalty = kwargs.pop("presence_penalty", 0) self.frequency_penalty = kwargs.pop("frequency_penalty", 0) self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", "") + self.sampler_priority = kwargs.pop("sampler_priority", "temperature,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,dynamic_temperature,quadratic_sampling,mirostat") def hijack_samplers(): From 0fcbc1fffcec37f148c9820fd457016cc7422063 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:20:53 -0800 Subject: [PATCH 03/16] Add to loaders --- extensions/openai/typing.py | 1 + modules/loaders.py | 4 ++++ modules/presets.py | 1 + modules/sampler_hijack.py | 10 +++++----- modules/shared.py | 1 + modules/text_generation.py | 2 +- modules/ui.py | 1 + modules/ui_parameters.py | 5 ++++- 8 files changed, 18 insertions(+), 7 deletions(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 3deb464fa9..13b14751d4 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -40,6 +40,7 @@ class GenerationOptions(BaseModel): max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 custom_token_bans: str = "" + sampler_priority: str = "" auto_max_new_tokens: bool = False ban_eos_token: bool = False add_bos_token: bool = True diff --git a/modules/loaders.py b/modules/loaders.py index 6107dac7ab..5be83357d4 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -182,6 +182,7 @@ def transformers_samplers(): 'negative_prompt', 'ban_eos_token', 'custom_token_bans', + 'sampler_priority', 'add_bos_token', 'skip_special_tokens', 'auto_max_new_tokens', @@ -230,6 +231,7 @@ def transformers_samplers(): 'negative_prompt', 'ban_eos_token', 'custom_token_bans', + 'sampler_priority', 'add_bos_token', 'skip_special_tokens', 'auto_max_new_tokens', @@ -252,6 +254,7 @@ def transformers_samplers(): 'grammar_string', 'ban_eos_token', 'custom_token_bans', + 'sampler_priority', }, 'llamacpp_HF': { 'temperature', @@ -287,6 +290,7 @@ def transformers_samplers(): 'negative_prompt', 'ban_eos_token', 'custom_token_bans', + 'sampler_priority', 'add_bos_token', 'skip_special_tokens', 'auto_max_new_tokens', diff --git a/modules/presets.py b/modules/presets.py index 966c706ef6..f3cf82c77e 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -42,6 +42,7 @@ def default_preset(): 'num_beams': 1, 'length_penalty': 1, 'early_stopping': False, + 'sampler_priority': 'temperature\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\ndynamic_temperature\nquadratic_sampling\nmirostat' } diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 61efe55c50..14bca1255d 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -14,7 +14,7 @@ global_scores = None -class ModifiedTemperatureLogitsWarper(LogitsWarper): +class TemperatureLogitsWarperCustom(LogitsWarper): ''' A copy of the original Transformers temperature logits warper. ''' @@ -322,7 +322,7 @@ def get_logits_warper_patch(self, generation_config): # Currently, it behaves identically to the original. for i in range(len(warpers)): if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': - warpers[i] = ModifiedTemperatureLogitsWarper( + warpers[i] = TemperatureLogitsWarperCustom( generation_config.temperature, ) @@ -387,9 +387,9 @@ def get_logits_warper_patch(self, generation_config): warpers += warpers_to_add # Sort the samplers - # sampler_priority is assumed to be passed as a comma-separated list of parameters. + # sampler_priority is assumed to contain parameters separated by newlines or commas. # Example: eta_cutoff,typical_p,temperature,min_p - sampler_priority = [x.strip() for x in generation_config.sampler_priority.split(',')] + sampler_priority = [x.strip() for x in generation_config.sampler_priority.replace('\n', ',').split(',')] # Handle temperature_last if generation_config.temperature_last: @@ -404,7 +404,7 @@ def get_logits_warper_patch(self, generation_config): 'EtaLogitsWarper': 'eta_cutoff', 'MinPLogitsWarper': 'min_p', 'MirostatLogitsWarper': 'mirostat', - 'ModifiedTemperatureLogitsWarper': 'temperature', + 'TemperatureLogitsWarperCustom': 'temperature', 'QuadraticSamplingLogitsWarper': 'quadratic_sampling', 'TailFreeLogitsWarper': 'tfs', 'TopALogitsWarper': 'top_a', diff --git a/modules/shared.py b/modules/shared.py index 38d083494a..eea3d27f2c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -50,6 +50,7 @@ 'prompt_lookup_num_tokens': 0, 'custom_stopping_strings': '', 'custom_token_bans': '', + 'sampler_priority': 'temperature,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,dynamic_temperature,quadratic_sampling,mirostat', 'auto_max_new_tokens': False, 'ban_eos_token': False, 'add_bos_token': True, diff --git a/modules/text_generation.py b/modules/text_generation.py index 198b7575d0..0f9e0b0632 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -286,7 +286,7 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping', 'sampler_priority']: if k in state: generate_params[k] = state[k] diff --git a/modules/ui.py b/modules/ui.py index acd959a07c..06498f69bf 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -149,6 +149,7 @@ def list_interface_input_elements(): 'add_bos_token', 'ban_eos_token', 'custom_token_bans', + 'sampler_priority', 'truncation_length', 'custom_stopping_strings', 'skip_special_tokens', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index a81ed27a52..0afb28596e 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -54,7 +54,7 @@ def create_ui(default_preset): shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature']) - shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Makes temperature the last sampler instead of the first.') + shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Deprecated parameter; use sampler_priority instead. Makes temperature the last sampler instead of the first.') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') with gr.Accordion('Other parameters', open=False): @@ -85,6 +85,9 @@ def create_ui(default_preset): shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming') + with gr.Blocks(): + shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by newlines or commas.') + with gr.Row() as shared.gradio['grammar_file_row']: shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown') ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu) From 0e9e285b780ff2ef0d6da0e24379c042a4fbc333 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:24:18 -0800 Subject: [PATCH 04/16] Minor fixes --- modules/sampler_hijack.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 14bca1255d..cc827cc19a 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -389,7 +389,7 @@ def get_logits_warper_patch(self, generation_config): # Sort the samplers # sampler_priority is assumed to contain parameters separated by newlines or commas. # Example: eta_cutoff,typical_p,temperature,min_p - sampler_priority = [x.strip() for x in generation_config.sampler_priority.replace('\n', ',').split(',')] + sampler_priority = [x.strip() for x in generation_config.sampler_priority.replace('\n', ',').split(',') if x.strip()] # Handle temperature_last if generation_config.temperature_last: @@ -425,8 +425,7 @@ def custom_sort_key(obj): return sampler_priority.index(class_name_to_nickname[class_name]) # Sort the list using the custom key function - if False: - warpers = sorted(warpers, key=custom_sort_key) + warpers = sorted(warpers, key=custom_sort_key) if normalize is not None: warpers.append(normalize) From 6418a14e5d907dbd619a0526c9597c0ca76b93ad Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:42:52 -0800 Subject: [PATCH 05/16] Move all temperatures to the beginning, for consistency --- modules/presets.py | 2 +- modules/sampler_hijack.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/presets.py b/modules/presets.py index f3cf82c77e..2a4a4dde3e 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -42,7 +42,7 @@ def default_preset(): 'num_beams': 1, 'length_penalty': 1, 'early_stopping': False, - 'sampler_priority': 'temperature\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\ndynamic_temperature\nquadratic_sampling\nmirostat' + 'sampler_priority': 'temperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat' } diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index cc827cc19a..409c4dbf57 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -474,7 +474,7 @@ def generation_config_init_patch(self, **kwargs): self.presence_penalty = kwargs.pop("presence_penalty", 0) self.frequency_penalty = kwargs.pop("frequency_penalty", 0) self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", "temperature,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,dynamic_temperature,quadratic_sampling,mirostat") + self.sampler_priority = kwargs.pop("sampler_priority", "temperature,dynamic_temperature,quadratic_sampling,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,mirostat") def hijack_samplers(): From 74882d514603d4185b7027cb5ef3b26c6014632f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:54:31 -0800 Subject: [PATCH 06/16] Better handle temperature_last --- modules/sampler_hijack.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 409c4dbf57..a6df49b335 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -395,8 +395,11 @@ def get_logits_warper_patch(self, generation_config): if generation_config.temperature_last: for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']: if param_name in sampler_priority: - index = sampler_priority.index(param_name) - sampler_priority.append(sampler_priority.pop(index)) + if param_name in sampler_priority: + index = sampler_priority.index(param_name) + sampler_priority.append(sampler_priority.pop(index)) + else: + sampler_priority.append(param_name) class_name_to_nickname = { 'DynamicTemperatureLogitsWarper': 'dynamic_temperature', From d6bb14da5ffee8ab45da77c762721f5959fffa4b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:58:01 -0800 Subject: [PATCH 07/16] Lint --- modules/sampler_hijack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index a6df49b335..10eff8c298 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -407,9 +407,9 @@ def get_logits_warper_patch(self, generation_config): 'EtaLogitsWarper': 'eta_cutoff', 'MinPLogitsWarper': 'min_p', 'MirostatLogitsWarper': 'mirostat', - 'TemperatureLogitsWarperCustom': 'temperature', 'QuadraticSamplingLogitsWarper': 'quadratic_sampling', 'TailFreeLogitsWarper': 'tfs', + 'TemperatureLogitsWarperCustom': 'temperature', 'TopALogitsWarper': 'top_a', 'TopKLogitsWarper': 'top_k', 'TopPLogitsWarper': 'top_p', From 2211786796bacf4160690a9c6a25372a70166b2b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:59:09 -0800 Subject: [PATCH 08/16] Update docs --- docs/03 - Parameters Tab.md | 2 +- modules/ui_parameters.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/03 - Parameters Tab.md b/docs/03 - Parameters Tab.md index affa9e7347..97665cad36 100644 --- a/docs/03 - Parameters Tab.md +++ b/docs/03 - Parameters Tab.md @@ -55,7 +55,7 @@ For more information about the parameters, the [transformers documentation](http * **mirostat_tau**: No idea, see the paper for details. According to the Preset Arena, 8 is a good value. * **mirostat_eta**: No idea, see the paper for details. According to the Preset Arena, 0.1 is a good value. * **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between "dynatemp_low" (minimum) and "dynatemp_high" (maximum), with an entropy-based scaling. The steepness of the curve is controlled by "dynatemp_exponent". -* **smoothing_factor**: Activates Quadratic Sampling. This takes precedence over regular temperature and dynamic temperature, and replaces those samplers. When `0 < smoothing_factor < 1`, the logits distribution becomes flatter. When `smoothing_factor > 1`, it becomes more peaked. +* **smoothing_factor**: Activates Quadratic Sampling. When `0 < smoothing_factor < 1`, the logits distribution becomes flatter. When `smoothing_factor > 1`, it becomes more peaked. * **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. * **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked). * **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (notably ExLlama v1 and v2). For these loaders, the seed has no effect. diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 0afb28596e..d1bd057b79 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -49,7 +49,7 @@ def create_ui(default_preset): shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') - shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Replaces temperature with Quadratic Sampling.') + shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.') shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature') shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature']) From ac8953a398b9fe2df8868b9183770d458643e21c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 08:23:59 -0800 Subject: [PATCH 09/16] Remove a debug statement --- modules/sampler_hijack.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 10eff8c298..d36343b370 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -421,7 +421,6 @@ def custom_sort_key(obj): # Return a large value if class name is not mapped or if the mapped nickname is not in priority if class_name not in class_name_to_nickname or class_name_to_nickname[class_name] not in sampler_priority: - print("----------------------------->", class_name) return float('inf') # Return the index of the nickname in the priority list for sorting From 220c6d340ed5f8bf15e1ba498985d28bde764c93 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 08:46:12 -0800 Subject: [PATCH 10/16] Minor changes --- modules/sampler_hijack.py | 4 ++-- modules/ui_parameters.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index d36343b370..33c2e22663 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -353,7 +353,7 @@ def get_logits_warper_patch(self, generation_config): ) ) - if generation_config.dynamic_temperature > 0: + if generation_config.dynamic_temperature: warpers_to_add.append( DynamicTemperatureLogitsWarper( dynatemp_low=generation_config.dynatemp_low, @@ -386,7 +386,7 @@ def get_logits_warper_patch(self, generation_config): warpers += warpers_to_add - # Sort the samplers + # Sort the samplers. # sampler_priority is assumed to contain parameters separated by newlines or commas. # Example: eta_cutoff,typical_p,temperature,min_p sampler_priority = [x.strip() for x in generation_config.sampler_priority.replace('\n', ',').split(',') if x.strip()] diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index d1bd057b79..6542b5ba4b 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -54,7 +54,7 @@ def create_ui(default_preset): shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature']) shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature']) - shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Deprecated parameter; use sampler_priority instead. Makes temperature the last sampler instead of the first.') + shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') with gr.Accordion('Other parameters', open=False): From 558c8f6957e3c2e16a743a68d07a0c9f2a282108 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 08:51:08 -0800 Subject: [PATCH 11/16] Update docs --- docs/03 - Parameters Tab.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/03 - Parameters Tab.md b/docs/03 - Parameters Tab.md index 97665cad36..c274a9c5f2 100644 --- a/docs/03 - Parameters Tab.md +++ b/docs/03 - Parameters Tab.md @@ -56,7 +56,7 @@ For more information about the parameters, the [transformers documentation](http * **mirostat_eta**: No idea, see the paper for details. According to the Preset Arena, 0.1 is a good value. * **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between "dynatemp_low" (minimum) and "dynatemp_high" (maximum), with an entropy-based scaling. The steepness of the curve is controlled by "dynatemp_exponent". * **smoothing_factor**: Activates Quadratic Sampling. When `0 < smoothing_factor < 1`, the logits distribution becomes flatter. When `smoothing_factor > 1`, it becomes more peaked. -* **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. +* **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. Note: this parameter takes precedence over "Sampler priority". That means that `temperature`/`dynamic_temperature`/`quadratic_sampling` will be removed from wherever they are and moved to the end of the stack. * **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked). * **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (notably ExLlama v1 and v2). For these loaders, the seed has no effect. * **encoder_repetition_penalty**: Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. @@ -77,6 +77,7 @@ To the right (or below if you are on mobile), the following parameters are prese * **Add the bos_token to the beginning of prompts**: By default, the tokenizer will add a BOS (Beginning of Sequence) token to your prompt. During training, BOS tokens are used to separate different documents. If unchecked, no BOS token will be added, and the model will interpret your prompt as being in the middle of a document instead of at the start of one. This significantly changes the output and can make it more creative. * **Skip special tokens**: When decoding the generated tokens, skip special tokens from being converted to their text representation. Otherwise, BOS appears as ``, EOS as ``, etc. * **Activate text streaming**: When unchecked, the full response is outputted at once, without streaming the words one at a time. I recommend unchecking this parameter on high latency networks like running the webui on Google Colab or using `--share`. +* **Sampler priority**: Allows you to customize the order in which the different samplers are applied. The first sampler on the list gets applied first. With this, custom orders like `top_p -> temperature -> top_k` can be defined. * **Load grammar from file**: Loads a GBNF grammar from a file under `text-generation-webui/grammars`. The output is written to the "Grammar" box below. You can also save and delete custom grammars using this menu. * **Grammar**: Allows you to constrain the model output to a particular format. For instance, you can make the model generate lists, JSON, specific words, etc. Grammar is extremely powerful and I highly recommend it. The syntax looks a bit daunting at first sight, but it gets very easy once you understand it. See the [GBNF Guide](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) for details. From 5df9fdf0895385259f46bef0c541fec5e173d613 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 5 Feb 2024 08:52:24 -0800 Subject: [PATCH 12/16] Not in llama.cpp, just llamacpp_HF --- modules/loaders.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/loaders.py b/modules/loaders.py index 5be83357d4..687a9e929d 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -254,7 +254,6 @@ def transformers_samplers(): 'grammar_string', 'ban_eos_token', 'custom_token_bans', - 'sampler_priority', }, 'llamacpp_HF': { 'temperature', From 27e30084a7ca0b9f68619cd5eb16cbdceaf97b26 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Feb 2024 05:36:57 -0800 Subject: [PATCH 13/16] debug --- modules/text_generation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/text_generation.py b/modules/text_generation.py index 04625ab901..1b61815719 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -270,9 +270,13 @@ def apply_stopping_strings(reply, all_stop_strings): return reply, stop_found +time_lost = 0 +import time + def get_reply_from_output_ids(output_ids, state=None, starting_from=0): reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True) + a = time.time() # Handle tokenizers that do not add the leading space for the first token if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '): first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])) @@ -282,10 +286,14 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): if first_token.startswith('▁'): reply = ' ' + reply + global time_lost + time_lost += time.time() - a return reply def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): + global time_lost + time_lost = 0 generate_params = {} for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: if k in state: @@ -407,6 +415,7 @@ def generate_with_streaming(**kwargs): except Exception: traceback.print_exc() finally: + print("TIME_LOST=", time_lost) t1 = time.time() original_tokens = len(original_input_ids[0]) new_tokens = len(output) - (original_tokens if not shared.is_seq2seq else 0) From c40194a38b83efdb504958b896ecb41d51dd79fe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Feb 2024 06:00:01 -0800 Subject: [PATCH 14/16] Provide the param as a list of strings over API --- extensions/openai/typing.py | 2 +- modules/sampler_hijack.py | 4 +--- modules/text_generation.py | 7 ++++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 13b14751d4..ec35116731 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -40,7 +40,7 @@ class GenerationOptions(BaseModel): max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 custom_token_bans: str = "" - sampler_priority: str = "" + sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") auto_max_new_tokens: bool = False ban_eos_token: bool = False add_bos_token: bool = True diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 33c2e22663..6c7ab4187f 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -387,9 +387,7 @@ def get_logits_warper_patch(self, generation_config): warpers += warpers_to_add # Sort the samplers. - # sampler_priority is assumed to contain parameters separated by newlines or commas. - # Example: eta_cutoff,typical_p,temperature,min_p - sampler_priority = [x.strip() for x in generation_config.sampler_priority.replace('\n', ',').split(',') if x.strip()] + sampler_priority = generation_config.sampler_priority # Handle temperature_last if generation_config.temperature_last: diff --git a/modules/text_generation.py b/modules/text_generation.py index 0f9e0b0632..0b210f5be1 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -286,10 +286,15 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping', 'sampler_priority']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: if k in state: generate_params[k] = state[k] + if isinstance(state['sampler_priority'], list): + generate_params['sampler_priority'] = state['sampler_priority'] + elif isinstance(state['sampler_priority'], str): + generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()] + if state['negative_prompt'] != '': generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) From 058d2f9ec34f1bef001418c120c25fa4697b15f3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Feb 2024 06:10:48 -0800 Subject: [PATCH 15/16] Print the warpers with --verbose --- modules/sampler_hijack.py | 11 +++++++---- modules/ui_parameters.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 6c7ab4187f..9701b03434 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -1,4 +1,5 @@ import math +import pprint import torch import transformers @@ -6,10 +7,11 @@ from transformers.generation.logits_process import ( LogitNormalization, LogitsProcessor, - LogitsProcessorList, + LogitsProcessorList ) from modules import shared +from modules.logging_colors import logger global_scores = None @@ -432,8 +434,9 @@ def custom_sort_key(obj): warpers.append(SpyLogitsWarper()) warpers = LogitsProcessorList(warpers) - for i in range(len(warpers)): - print(warpers[i].__class__.__name__) + if shared.args.verbose: + logger.info("WARPERS=") + pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint([x.__class__.__name__ for x in warpers]) return warpers @@ -474,7 +477,7 @@ def generation_config_init_patch(self, **kwargs): self.presence_penalty = kwargs.pop("presence_penalty", 0) self.frequency_penalty = kwargs.pop("frequency_penalty", 0) self.temperature_last = kwargs.pop("temperature_last", False) - self.sampler_priority = kwargs.pop("sampler_priority", "temperature,dynamic_temperature,quadratic_sampling,top_k,top_p,typical_p,epsilon_cutoff,eta_cutoff,tfs,top_a,min_p,mirostat") + self.sampler_priority = kwargs.pop("sampler_priority", ['temperature', 'dynamic_temperature', 'quadratic_sampling', 'top_k', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', 'min_p', 'mirostat']) def hijack_samplers(): diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 6542b5ba4b..078590dc11 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -86,7 +86,7 @@ def create_ui(default_preset): shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming') with gr.Blocks(): - shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by newlines or commas.') + shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.') with gr.Row() as shared.gradio['grammar_file_row']: shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown') From 2087a0806ec49d052353465d1f0a4cb461fc2263 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 6 Feb 2024 06:12:10 -0800 Subject: [PATCH 16/16] Revert "debug" This reverts commit 27e30084a7ca0b9f68619cd5eb16cbdceaf97b26. --- modules/text_generation.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index d8df5bfb9c..1808f8bf12 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -270,13 +270,9 @@ def apply_stopping_strings(reply, all_stop_strings): return reply, stop_found -time_lost = 0 -import time - def get_reply_from_output_ids(output_ids, state=None, starting_from=0): reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True) - a = time.time() # Handle tokenizers that do not add the leading space for the first token if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '): first_token = shared.tokenizer.convert_ids_to_tokens(int(output_ids[starting_from])) @@ -286,14 +282,10 @@ def get_reply_from_output_ids(output_ids, state=None, starting_from=0): if first_token.startswith('▁'): reply = ' ' + reply - global time_lost - time_lost += time.time() - a return reply def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): - global time_lost - time_lost = 0 generate_params = {} for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: if k in state: @@ -420,7 +412,6 @@ def generate_with_streaming(**kwargs): except Exception: traceback.print_exc() finally: - print("TIME_LOST=", time_lost) t1 = time.time() original_tokens = len(original_input_ids[0]) new_tokens = len(output) - (original_tokens if not shared.is_seq2seq else 0)