Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
'truncation_length_min': 0,
'truncation_length_max': 200000,
'max_tokens_second': 0,
'max_updates_second': 0,
'prompt_lookup_num_tokens': 0,
'custom_stopping_strings': '',
'custom_token_bans': '',
Expand Down
14 changes: 3 additions & 11 deletions modules/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,33 +80,25 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
state = copy.deepcopy(state)
state['stream'] = True

min_update_interval = 0
if state.get('max_updates_second', 0) > 0:
min_update_interval = 1 / state['max_updates_second']

# Generate
for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat):
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
if escape_html:
reply = html.escape(reply)

if is_stream:
cur_time = time.time()

# Maximum number of tokens/second
# Limit number of tokens/second to make text readable in real time
if state['max_tokens_second'] > 0:
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
if diff > 0:
time.sleep(diff)

last_update = time.time()
yield reply

# Limit updates to avoid lag in the Gradio UI
# API updates are not limited
else:
if cur_time - last_update > min_update_interval:
last_update = cur_time
yield reply
yield reply

if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
break
Expand Down
1 change: 0 additions & 1 deletion modules/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def list_interface_input_elements():
'max_new_tokens',
'auto_max_new_tokens',
'max_tokens_second',
'max_updates_second',
'prompt_lookup_num_tokens',
'seed',
'temperature',
Expand Down
1 change: 0 additions & 1 deletion modules/ui_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def create_ui(default_preset):
with gr.Column():
shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')

shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
Expand Down
1 change: 0 additions & 1 deletion settings-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ truncation_length: 2048
truncation_length_min: 0
truncation_length_max: 200000
max_tokens_second: 0
max_updates_second: 0
prompt_lookup_num_tokens: 0
custom_stopping_strings: ''
custom_token_bans: ''
Expand Down