Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
c3e0fcf
Merge pull request #4927 from oobabooga/dev
oobabooga Dec 15, 2023
443be39
Merge pull request #4937 from oobabooga/dev
oobabooga Dec 15, 2023
7be0983
Merge pull request #4961 from oobabooga/dev
oobabooga Dec 17, 2023
b28020a
Merge pull request #4980 from oobabooga/dev
oobabooga Dec 18, 2023
781367b
Merge pull request #4988 from oobabooga/dev
oobabooga Dec 19, 2023
71eb744
Merge pull request #5002 from oobabooga/dev
oobabooga Dec 19, 2023
5b791ca
Merge pull request #5005 from oobabooga/dev
oobabooga Dec 19, 2023
c1f78db
Merge pull request #5011 from oobabooga/dev
oobabooga Dec 20, 2023
489f4a2
Merge pull request #5012 from oobabooga/dev
oobabooga Dec 20, 2023
11288d1
Merge pull request #5022 from oobabooga/dev
oobabooga Dec 20, 2023
4b25acf
Merge pull request #5039 from oobabooga/dev
oobabooga Dec 21, 2023
af87609
Merge pull request #5073 from oobabooga/dev
oobabooga Dec 25, 2023
19d1374
Merge pull request #5078 from oobabooga/dev
oobabooga Dec 25, 2023
3fd7073
Merge pull request #5100 from oobabooga/dev
oobabooga Dec 27, 2023
3e3a66e
Merge pull request #5132 from oobabooga/dev
oobabooga Dec 31, 2023
3f28925
Merge pull request #5152 from oobabooga/dev
oobabooga Jan 2, 2024
c54d1da
Merge pull request #5163 from oobabooga/dev
oobabooga Jan 4, 2024
8ea3f31
Merge pull request #5181 from oobabooga/dev
oobabooga Jan 5, 2024
e169993
Merge pull request #5195 from oobabooga/dev
oobabooga Jan 7, 2024
ad1ff53
Merge pull request #5199 from oobabooga/dev
oobabooga Jan 7, 2024
2dc8db8
Merge pull request #5220 from oobabooga/dev
oobabooga Jan 10, 2024
61e4bfe
Merge pull request #5253 from oobabooga/dev
oobabooga Jan 14, 2024
d8c3a5b
Merge pull request #5266 from oobabooga/dev
oobabooga Jan 14, 2024
f86339b
Noisy sampling HF implementation
kalomaze Jan 22, 2024
34597d7
do min-max normalization before noising the logits
kalomaze Jan 23, 2024
c67833d
replace noisy sampling logic with quadratic transformation
kalomaze Jan 30, 2024
a7fceea
rebrand it into the new quadratic sampler
kalomaze Jan 30, 2024
a3c41af
missed one xd
kalomaze Jan 30, 2024
3e10e45
the scale was way off lol
kalomaze Jan 30, 2024
1b848fd
attempt moving quadratic sampling to logitswarper
kalomaze Jan 31, 2024
142831c
Create ModifiedTemperatureLogitsWarper
oobabooga Feb 4, 2024
e155245
Reorder UI elements
oobabooga Feb 4, 2024
ca328f6
Remove the old class
oobabooga Feb 4, 2024
da281dd
Add documentation
oobabooga Feb 4, 2024
0bb1e44
Lint
oobabooga Feb 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/03 - Parameters Tab.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ For more information about the parameters, the [transformers documentation](http
* **mirostat_tau**: No idea, see the paper for details. According to the Preset Arena, 8 is a good value.
* **mirostat_eta**: No idea, see the paper for details. According to the Preset Arena, 0.1 is a good value.
* **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between "dynatemp_low" (minimum) and "dynatemp_high" (maximum), with an entropy-based scaling. The steepness of the curve is controlled by "dynatemp_exponent".
* **smoothing_factor**: Activates Quadratic Sampling. This takes precedence over regular temperature and dynamic temperature, and replaces those samplers. When `0 < smoothing_factor < 1`, the logits distribution becomes flatter. When `smoothing_factor > 1`, it becomes more peaked.
* **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency.
* **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked).
* **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (notably ExLlama v1 and v2). For these loaders, the seed has no effect.
Expand Down
1 change: 1 addition & 0 deletions extensions/openai/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class GenerationOptions(BaseModel):
dynatemp_low: float = 1
dynatemp_high: float = 1
dynatemp_exponent: float = 1
smoothing_factor: float = 0
top_k: int = 0
repetition_penalty: float = 1
repetition_penalty_range: int = 1024
Expand Down
3 changes: 3 additions & 0 deletions modules/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def transformers_samplers():
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'top_p',
'min_p',
'top_k',
Expand Down Expand Up @@ -228,6 +229,7 @@ def transformers_samplers():
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'top_p',
'min_p',
'top_k',
Expand Down Expand Up @@ -284,6 +286,7 @@ def transformers_samplers():
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'top_p',
'min_p',
'top_k',
Expand Down
1 change: 1 addition & 0 deletions modules/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def default_preset():
'dynatemp_low': 1,
'dynatemp_high': 1,
'dynatemp_exponent': 1,
'smoothing_factor': 0,
'top_p': 1,
'min_p': 0,
'top_k': 0,
Expand Down
47 changes: 34 additions & 13 deletions modules/sampler_hijack.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@
global_scores = None


class TemperatureLogitsWarperWithDynatemp(LogitsWarper):
def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float):
class ModifiedTemperatureLogitsWarper(LogitsWarper):
'''
Based on the original Transformers temperature logits warper, this
adds support for dynamic temperature and quadratic sampling.
'''
def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float, smoothing_factor: float):
if not isinstance(temperature, float) or not (temperature > 0):
except_msg = (
f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token "
Expand All @@ -32,16 +36,27 @@ def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low:
self.dynatemp_low = dynatemp_low
self.dynatemp_high = dynatemp_high
self.dynatemp_exponent = dynatemp_exponent
self.smoothing_factor = smoothing_factor

def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:

# Regular temperature
if not self.dynamic_temperature:
scores = scores / self.temperature
return scores
# Quadratic sampling
if self.smoothing_factor > 0:

# Compute the maximum logit value
max_logit = scores.max()

# Apply the quadratic transformation
transformed_logits = -(self.smoothing_factor * (scores - max_logit)**2) + max_logit

# No need to print the top 5 logits since this is not required
# print("Original top 5 logits: ", torch.topk(scores, 5))
# print("New top 5 logits: ", torch.topk(transformed_logits, 5))

return transformed_logits

# Dynamic temperature
else:
elif self.dynamic_temperature:
min_temp = self.dynatemp_low
max_temp = self.dynatemp_high
exponent_val = self.dynatemp_exponent
Expand Down Expand Up @@ -88,6 +103,11 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to

return scores

# Regular temperature
else:
scores = scores / self.temperature
return scores


class MinPLogitsWarper(LogitsWarper):
def __init__(self, min_p: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
Expand Down Expand Up @@ -286,20 +306,21 @@ def get_logits_warper_patch(self, generation_config):
generation_config.temperature = float(generation_config.temperature)

temperature = generation_config.temperature
if generation_config.dynamic_temperature:
if generation_config.dynamic_temperature or generation_config.smoothing_factor > 0:
# Make sure TemperatureLogitsWarper will be created by temporarily
# setting temperature to a value != 1.
generation_config.temperature = 1.1

warpers = self._get_logits_warper_old(generation_config)
for i in range(len(warpers)):
if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper':
warpers[i] = TemperatureLogitsWarperWithDynatemp(
warpers[i] = ModifiedTemperatureLogitsWarper(
temperature,
generation_config.dynamic_temperature,
generation_config.dynatemp_low,
generation_config.dynatemp_high,
generation_config.dynatemp_exponent
generation_config.dynatemp_exponent,
generation_config.smoothing_factor
)

warpers_to_add = LogitsProcessorList()
Expand Down Expand Up @@ -328,7 +349,7 @@ def get_logits_warper_patch(self, generation_config):
if generation_config.temperature_last:
temperature_idx = None
for i in range(len(warpers)):
if warpers[i].__class__.__name__ in ['TemperatureLogitsWarper', 'TemperatureLogitsWarperWithDynatemp']:
if warpers[i].__class__.__name__ in ['TemperatureLogitsWarper', 'ModifiedTemperatureLogitsWarper']:
temperature_idx = i
break

Expand All @@ -352,8 +373,7 @@ def get_logits_processor_patch(self, **kwargs):
repetition_penalty_range = kwargs['generation_config'].repetition_penalty_range
do_rep_pen_hijack = (repetition_penalty > 1) or (presence_penalty != 0) or (frequency_penalty != 0)
if do_rep_pen_hijack:
# Make sure that a RepetitionPenaltyLogitsProcessor will be created
kwargs['generation_config'].repetition_penalty = 1.1 # must set to some value > 1
kwargs['generation_config'].repetition_penalty = 1.1 # Set to value > 1 to ensure RepetitionPenaltyLogitsProcessor is created

result = self._get_logits_processor_old(**kwargs)

Expand All @@ -372,6 +392,7 @@ def generation_config_init_patch(self, **kwargs):
self.dynatemp_low = kwargs.pop("dynatemp_low", 1)
self.dynatemp_high = kwargs.pop("dynatemp_high", 1)
self.dynatemp_exponent = kwargs.pop("dynatemp_exponent", 1)
self.smoothing_factor = kwargs.pop("smoothing_factor", 0.0)
self.tfs = kwargs.pop("tfs", 1.0)
self.top_a = kwargs.pop("top_a", 0.0)
self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
Expand Down
5 changes: 3 additions & 2 deletions modules/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,9 @@ def get_reply_from_output_ids(output_ids, state, starting_from=0):

def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
generate_params = {}
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']:
generate_params[k] = state[k]
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']:
if k in state:
generate_params[k] = state[k]

if state['negative_prompt'] != '':
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
Expand Down
1 change: 1 addition & 0 deletions modules/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def list_interface_input_elements():
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'top_p',
'min_p',
'top_k',
Expand Down
1 change: 1 addition & 0 deletions modules/ui_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def create_ui(default_preset):
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Replaces temperature with Quadratic Sampling.')
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
Expand Down