fine_tuned/vmas/conf/config.yaml

defaults:
  - experiment: base_experiment
  - algorithm: ???
  - task: ???
  - model: layers/mlp
  - model@critic_model: layers/mlp
  - _self_

hydra:
  searchpath:
   # Tells hydra to add the default benchmarl configuration to its path
    - pkg://benchmarl/conf

seed: 0

experiment:

  sampling_device: "cuda"
  train_device: "cuda"
  buffer_device: "cuda"

  share_policy_params: True
  prefer_continuous_actions: True

  gamma: 0.9
  lr: 0.00005
  clip_grad_norm: True
  clip_grad_val: 5

  soft_target_update: True
  polyak_tau: 0.005
  hard_target_update_frequency: 5

  exploration_eps_init: 0.8
  exploration_eps_end: 0.01
  exploration_anneal_frames: 1_000_000

  max_n_iters: null
  max_n_frames: 10_000_000

  on_policy_collected_frames_per_batch: 60_000
  on_policy_n_envs_per_worker: 600
  on_policy_n_minibatch_iters: 45
  on_policy_minibatch_size: 4096

  off_policy_collected_frames_per_batch: 6000
  off_policy_n_envs_per_worker: 60
  off_policy_n_optimizer_steps: 1000
  off_policy_train_batch_size: 128
  off_policy_memory_size: 1_000_000

  evaluation: True
  render: True
  evaluation_interval: 120_000
  evaluation_episodes: 200

  loggers: [wandb]
  create_json: True

  save_folder: null
  restore_file: null
  checkpoint_interval: 0