config.yaml

# Configuration for tuning and/or shared settings across the system
log:
  dir: ./logs
  level: DEBUG
  images: true

gemini:
  location: europe-west1
  model:
    flash:
      name: gemini-1.5-flash-001 # Discontinued on May 24, 2025

      # Costs in dollar for <128k context per https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models
      cost_per_image: 0.0001315
      cost_per_input_char: 0.000000125
      cost_per_output_char: 0.000000375
    
    pro:
      name: gemini-1.5-pro-001 # Discontinued on May 24, 2025

slow:
  embed:
    model:
      name: BAAI/bge-small-en-v1.5

  reddit:
    model:
      name: flash

    vet_prompt_file: data/prompts/slow/vet.prompt
    hf_repo_id: mvsoom/gedankenpolizei
    hf_slow_thoughts_file: slow_thoughts_{{EMBED_MODEL_BASENAME}}.feather

  bias:
    # The `intensity` parameter in [0, 1] controls the amount of bias in the walk in SLOW space
    # A value of 0. means no bias is applied to the step
    # A value of 1. means that the projections of the step onto the bias directions are enforced to be at least unity, even if they were negative originally
    # Turned off for now.
    intensity: 0.

    # Bias directions are relative vectors in SLOW space that are used to bias the direction of the walk in SLOW space
    # They are computed as `bias_direction = overall_multiplier*multiplier*(embed(to) - embed(from))`
    # Note: the `*multiplier` parameters can be negative
    # Note: embeddings are usually normalized to have unit norm, so the `*multipliers` can be used to dominate the step; a very large `overall_multiplier` will always align the step with the bias direction for any `intensity > 0`
    directions:
      - from: "neutral"
        to: "happy"  # TODO: embed emojis, e.g. `to: "😊"`
        multiplier: 1.0
      - from: "angry"
        to: "giggly"
        multiplier: 1.0
    
    overall_multiplier: 1.0
  
  # The fractional pace of the SLOW stream (0 <= `pace` <= 1)
  # A value of 1. means the SLOW stream is running at full speed, resulting in a feverish sequence of SLOW thoughts which inhibits completing generated RAW thoughts (and thus requires more generations, which increases LLM API costs)
  # A value of 0. means the SLOW stream only advances when the current RAW thought is (almost) completed, resulting in less feverish, more coherent RAW thought
  pace: 0.5
  
  walk:
    max_steps: 10

fast:
  # If both dimensions of an image's aspect ratio are less than or equal to 384, then 258 tokens are used (per https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements)
  max_size: [384, 288]  # Can be left empty to disable resizing

  memory:
    # Number of frames to keep in memory (excludes current `now` frame)
    max_size: 4

    # Downscale the (-i)th frame by (scaling**i)
    # Note that Gemini does not charge more for larger images, so this is only useful for speeding up processing
    scaling: 0.9
  
  model:
    name: flash
    system_prompt_file: data/prompts/fast/gemini.system_prompt
    temperature: 0.3

  novelty_threshold: 15  # Ignore narrations with novelty below this threshold

raw:
  model:
    name: pro
    system_prompt_file: data/prompts/raw/gemini.system_prompt
    prompt_file: data/prompts/raw/gemini.prompt
    stop_sequences:  # These discourage the MODEL from very occasionally revealing the prompt setup
      - "```"
      - "RAW"
    top_k: 40    # Higher value for more random responses. Max(pro) = 40
    top_p: 0.95  # Higher value for more random responses
    temperature: 2.0
  
  # Condition a single request with at most `memory_size` chars of the RAW stream
  # Note: this is approximate as it doesn't take into account thoughts running ahead
  # Shorter memory (like 32) means increased responsiveness to FAST stream
  # Longer memory means more preciese semantic direction in SLOW stream
  memory_size: 512

  # The pace of the RAW stream in average output rate in chars per second
  pace: 18.

  # Relatve jitter in `pace` (as a fraction of `pace`)
  jitter: 0.10

  # Condition a single request with at most `max_fast_inputs` FAST narrations
  max_fast_inputs: 20