You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Linux Ubuntu 18.04.02 (GNU/Linux 4.15.0-52-generic x86_64)
Ray installed from (source or binary): Source
Ray version: ray 0.7.3
Python version: python 3.6.8
Exact command to reproduce: See below
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
LOG_FILENAME = 'logging2.out'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.debug('This message should go to the log file')
"""Example of running StarCraft2 with RLlib PPO.
In this setup, each agent will be controlled by an independent PPO policy.
However the policies share weights.
Increase the level of parallelism by changing --num-workers.
"""
import argparse
import numpy as np
import ray
from ray import tune
from ray.tune import run_experiments, register_env
from ray.rllib.models import ModelCatalog
from smac.examples.rllib.env import RLlibStarCraft2Env
from smac.examples.rllib.rnn4a import MaskedActionsLSTM
def on_episode_start(info):
episode = info["episode"]
episode.user_data["step_wins"] = []
def on_episode_step(info):
episode = info["episode"]
try:
outcome = float(episode.last_info_for(0)["battle_won"])
except:
outcome = 0.
episode.user_data["step_wins"].append(outcome)
def on_episode_end(info):
episode = info["episode"]
episode_wins = np.sum(episode.user_data["step_wins"])
episode.custom_metrics["episode_wins"] = episode_wins
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--num-iters", type=int, default=300)
parser.add_argument("--num-workers", type=int, default=13)
parser.add_argument("--num-envs-per-worker", type=int, default=2)
parser.add_argument("--num-gpus", type=int, default=4)
parser.add_argument("--map-name", type=str, default="MMM")
args = parser.parse_args()
ray.init()
register_env("smac", lambda smac_args: RLlibStarCraft2Env(**smac_args))
ModelCatalog.register_custom_model("mask_model", MaskedActionsLSTM)
try:
run_experiments({
"appo_sc2": {
"run": "APPO",
"env": "smac",
"stop": {
"training_iteration": args.num_iters,
},
"config": {
"num_workers": args.num_workers,
"num_envs_per_worker": args.num_envs_per_worker,
"num_gpus": args.num_gpus,
"train_batch_size": 3000,
#"sgd_minibatch_size": 10000, #Remove for APPO
"sample_batch_size": 30, #Add for APPO, remove for PPO
"lr": 1e-4,
"lambda": .995,
#"kl_coeff": 1.0, #Remove for APPO
"num_sgd_iter": 20,
"observation_filter": "NoFilter", # breaks the action mask
#"vf_share_layers": True, # don't create a separate value model (remove for APPO)
"vf_loss_coeff": 1e-3, #VF loss is error^2, so it can be really out of scale compared to the policy loss.
#Ref: https://github.com/ray-project/ray/issues/5278
"env_config": {
"map_name": args.map_name,
#"obs_instead_of_state": True
},
"model": {
"custom_model": "mask_model",
"fcnet_hiddens": [1024, 512, 256, 128],
"lstm_cell_size": 64,
"max_seq_len": 100
},
"callbacks": {
"on_episode_start": tune.function(on_episode_start),
"on_episode_step": tune.function(on_episode_step),
"on_episode_end": tune.function(on_episode_end)
},
},
},
})
except:
logging.exception('Got exception on main handler')
raise
Describe the problem
I am running RLlib with the SMAC environment (version 0.1.0b1). I just updated RLlib to 0.7.3 from 0.7.0 and it appears that APPO does not work properly. I was able to run APPO on the prior version of RLlib. I believe the issue is with ./miniconda3/envs/rlenv/lib/python3.6/site-packages/ray/rllib/optimizers/aso_multi_gpu_learner.py
In my error log the following message appears starting on line 3703: �[2m�[36m(pid=1807)�[0m tuples = s.policy._get_loss_inputs_dict(batch) �[2m�[36m(pid=1807)�[0m TypeError: _get_loss_inputs_dict() missing 1 required positional argument: 'shuffle'
I believe that s.policy._get_loss_inputs_dict(batch) needs to additionally include shuffle as an argument and potentially set it to default of False since I am using a custom rnn model.
System information
Describe the problem
I am running RLlib with the SMAC environment (version 0.1.0b1). I just updated RLlib to 0.7.3 from 0.7.0 and it appears that APPO does not work properly. I was able to run APPO on the prior version of RLlib. I believe the issue is with
./miniconda3/envs/rlenv/lib/python3.6/site-packages/ray/rllib/optimizers/aso_multi_gpu_learner.py
In my error log the following message appears starting on line 3703:
�[2m�[36m(pid=1807)�[0m tuples = s.policy._get_loss_inputs_dict(batch) �[2m�[36m(pid=1807)�[0m TypeError: _get_loss_inputs_dict() missing 1 required positional argument: 'shuffle'
I believe that
s.policy._get_loss_inputs_dict(batch)
needs to additionally includeshuffle
as an argument and potentially set it to default ofFalse
since I am using a custom rnn model.Source code / logs
See attached error log
output4c.log
The text was updated successfully, but these errors were encountered: