-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
111 lines (86 loc) · 4.08 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Configuration for tuning and/or shared settings across the system
log:
dir: ./logs
level: DEBUG
images: true
gemini:
location: europe-west1
model:
flash:
name: gemini-1.5-flash-001 # Discontinued on May 24, 2025
# Costs in dollar for <128k context per https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
cost_per_image: 0.0001315
cost_per_input_char: 0.000000125
cost_per_output_char: 0.000000375
pro:
name: gemini-1.5-pro-001 # Discontinued on May 24, 2025
slow:
embed:
model:
name: BAAI/bge-small-en-v1.5
reddit:
model:
name: flash
vet_prompt_file: data/prompts/slow/vet.prompt
hf_repo_id: mvsoom/gedankenpolizei
hf_slow_thoughts_file: slow_thoughts_{{EMBED_MODEL_BASENAME}}.feather
bias:
# The `intensity` parameter in [0, 1] controls the amount of bias in the walk in SLOW space
# A value of 0. means no bias is applied to the step
# A value of 1. means that the projections of the step onto the bias directions are enforced to be at least unity, even if they were negative originally
# Turned off for now.
intensity: 0.
# Bias directions are relative vectors in SLOW space that are used to bias the direction of the walk in SLOW space
# They are computed as `bias_direction = overall_multiplier*multiplier*(embed(to) - embed(from))`
# Note: the `*multiplier` parameters can be negative
# Note: embeddings are usually normalized to have unit norm, so the `*multipliers` can be used to dominate the step; a very large `overall_multiplier` will always align the step with the bias direction for any `intensity > 0`
directions:
- from: "neutral"
to: "happy" # TODO: embed emojis, e.g. `to: "😊"`
multiplier: 1.0
- from: "angry"
to: "giggly"
multiplier: 1.0
overall_multiplier: 1.0
# The fractional pace of the SLOW stream (0 <= `pace` <= 1)
# A value of 1. means the SLOW stream is running at full speed, resulting in a feverish sequence of SLOW thoughts which inhibits completing generated RAW thoughts (and thus requires more generations, which increases LLM API costs)
# A value of 0. means the SLOW stream only advances when the current RAW thought is (almost) completed, resulting in less feverish, more coherent RAW thought
pace: 0.5
walk:
max_steps: 10
fast:
# If both dimensions of an image's aspect ratio are less than or equal to 384, then 258 tokens are used (per https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements)
max_size: [384, 288] # Can be left empty to disable resizing
memory:
# Number of frames to keep in memory (excludes current `now` frame)
max_size: 4
# Downscale the (-i)th frame by (scaling**i)
# Note that Gemini does not charge more for larger images, so this is only useful for speeding up processing
scaling: 0.9
model:
name: flash
system_prompt_file: data/prompts/fast/gemini.system_prompt
temperature: 0.3
novelty_threshold: 15 # Ignore narrations with novelty below this threshold
raw:
model:
name: pro
system_prompt_file: data/prompts/raw/gemini.system_prompt
prompt_file: data/prompts/raw/gemini.prompt
stop_sequences: # These discourage the MODEL from very occasionally revealing the prompt setup
- "```"
- "RAW"
top_k: 40 # Higher value for more random responses. Max(pro) = 40
top_p: 0.95 # Higher value for more random responses
temperature: 2.0
# Condition a single request with at most `memory_size` chars of the RAW stream
# Note: this is approximate as it doesn't take into account thoughts running ahead
# Shorter memory (like 32) means increased responsiveness to FAST stream
# Longer memory means more preciese semantic direction in SLOW stream
memory_size: 512
# The pace of the RAW stream in average output rate in chars per second
pace: 18.
# Relatve jitter in `pace` (as a fraction of `pace`)
jitter: 0.10
# Condition a single request with at most `max_fast_inputs` FAST narrations
max_fast_inputs: 20