data/params.yml

trainer_class: PPOTrainer
trainer_config:
    # PPO parameters.  See https://ray.readthedocs.io/en/latest/rllib-algorithms.html#proximal-policy-optimization-ppo
    entropy_coeff: 0.01
    gamma: 0.99
    kl_target: 0.0036
    lambda: 0.95
    lr: 0.000005
    num_workers: 4
    train_batch_size: 50000
    vf_clip_param: 100
    model:
        custom_action_dist: q1_phys_action_dist

    # Environment settings.  See Config in q1physrl/train.py.
    env_config:
        action_range: 10
        allow_jump: true
        allow_yaw: true
        auto_jump: false
        discrete_yaw_steps: -1
        fmove_max: 800
        smove_max: 1060
        hover: false
        initial_yaw_range: [0, 360]
        key_press_delay: 0.3
        max_initial_speed: 700
        num_envs: 100
        smooth_keys: true
        speed_reward: false
        time_delta: 0.013888888888888
        time_limit: 10
        zero_start_prob: 0.01

# Pass the path to a checkpoint file here to restore from a previous run.
checkpoint_fname: null

# How often to record angle plots (if using wandb)
plot_frequency: 5