Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update evaluate and train to evaluate per episode #639

Merged
merged 34 commits into from
Apr 6, 2021
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
db8785f
Update evaluate and train to eval per episode
AlexLewandowski Mar 3, 2021
c670d41
Fix tests and change normalization
AlexLewandowski Mar 3, 2021
eba23a4
Fix formatting
AlexLewandowski Mar 3, 2021
8816495
Record episode for train/eval at the same time
AlexLewandowski Mar 3, 2021
a022a78
Train performance averages last eval_rate episodes
AlexLewandowski Mar 4, 2021
57b0b03
Add gap_mode and calculate generalization gap
AlexLewandowski Mar 4, 2021
a1a8bdf
Enable explore during eval for better comparison
AlexLewandowski Mar 4, 2021
360e0eb
Fix formatting
AlexLewandowski Mar 4, 2021
83e7c1b
Merge branch 'develop' into ultra-eval-per-episode
AlexLewandowski Mar 8, 2021
37eca41
Fix averaging in record_episode
AlexLewandowski Mar 9, 2021
35db258
Only average scalars in record_episode
AlexLewandowski Mar 9, 2021
dc9b9d5
Check for scalar when calculating gap
AlexLewandowski Mar 12, 2021
d0de563
Merge branch 'develop' into ultra-eval-per-episode
AlexLewandowski Mar 19, 2021
2eb3b90
Fix formatting
AlexLewandowski Mar 19, 2021
0aaf456
Fix bugs introduced by merge
AlexLewandowski Mar 19, 2021
ff266b5
Fix calculate_gap
AlexLewandowski Mar 19, 2021
57b2e0e
Fix averaging over episodes
AlexLewandowski Mar 22, 2021
b0c904e
Fix formatting
AlexLewandowski Mar 22, 2021
5277c5c
Check eval_episodes is 0 before evaluation
AlexLewandowski Mar 23, 2021
ae024e2
Evaluate on train and test without exploration
AlexLewandowski Mar 23, 2021
9d5b002
Make train/test occur in one ray.get
AlexLewandowski Mar 24, 2021
e07f770
Remove episode_count
AlexLewandowski Mar 31, 2021
ef66753
Remove episode_count from tests
AlexLewandowski Mar 31, 2021
1f578d4
Remove averaging over training episodes
AlexLewandowski Apr 5, 2021
afab3c3
Merge branch 'ultra-develop' into ultra-eval-per-episode
AlexLewandowski Apr 5, 2021
3bd5468
Restructure evaluation_check with eval_mode = False
AlexLewandowski Apr 5, 2021
af226e3
Fix formatting
AlexLewandowski Apr 5, 2021
64ae798
try split ci test method
JenishPatel99 Apr 5, 2021
053649a
Merge branch 'ultra-speedup-rllib-tests' into ultra-eval-per-episode
AlexLewandowski Apr 6, 2021
dfefee5
Check for valid eval_episodes inside evaluation_check
AlexLewandowski Apr 6, 2021
5124373
Merge branch 'ultra-speedup-rllib-tests' into ultra-eval-per-episode
AlexLewandowski Apr 6, 2021
eb45d51
Merge branch 'ultra-eval-per-episode' of github.com:huawei-noah/SMART…
AlexLewandowski Apr 6, 2021
9136a31
Removed averaging feature of record_episode
AlexLewandowski Apr 6, 2021
8f99c33
Fix formatting
AlexLewandowski Apr 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ultra/tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
eval_rate=10,
eval_episodes=1,
max_episode_steps=2,
episode_count=1,
policy_classes=agent_classes,
scenario_info=scenario_info,
timestep_sec=0.1,
Expand Down
15 changes: 9 additions & 6 deletions ultra/ultra/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def evaluation_check(
eval_rate,
eval_episodes,
max_episode_steps,
episode_count,
scenario_info,
timestep_sec,
headless,
Expand All @@ -58,8 +59,8 @@ def evaluation_check(
agent_ids_to_evaluate = [
agent_id
for agent_id in agent_ids
if (episode.get_itr(agent_id) + 1) % eval_rate == 0
and episode.last_eval_iterations[agent_id] != episode.get_itr(agent_id)
if (episode_count + 1) % eval_rate == 0
and episode.last_eval_iterations[agent_id] != episode_count
]

# Skip evaluation if there are no agents needing an evaluation.
Expand All @@ -71,9 +72,7 @@ def evaluation_check(

for agent_id in agent_ids_to_evaluate:
# Get the checkpoint directory for the current agent and save its model.
checkpoint_directory = episode.checkpoint_dir(
agent_id, episode.get_itr(agent_id)
)
checkpoint_directory = episode.checkpoint_dir(agent_id, episode_count)
agents[agent_id].save(checkpoint_directory)

# Perform the evaluation on this agent and save the data.
Expand All @@ -97,11 +96,14 @@ def evaluation_check(
)[0]
)
episode.eval_count += 1
episode.last_eval_iterations[agent_id] = episode.get_itr(agent_id)
episode.last_eval_iterations[agent_id] = episode_count

# Put the evaluation data for all agents into the episode and record the TensorBoard.
episode.info[episode.active_tag] = evaluation_data
episode.record_tensorboard()
episode.gap_mode()
episode.calculate_gap()
episode.record_tensorboard()
episode.train_mode()


Expand All @@ -119,6 +121,7 @@ def evaluate(
headless,
timestep_sec,
log_dir,
explore=False,
):
torch.set_num_threads(1)

Expand Down
43 changes: 24 additions & 19 deletions ultra/ultra/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def train(
# policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes])

episode_count = 0
old_episode = None
for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):
# Reset the environment and retrieve the initial observations.
observations = env.reset()
Expand Down Expand Up @@ -136,19 +138,6 @@ def train(
if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
finished = True
break

# Perform the evaluation check.
evaluation_check(
agents=agents,
agent_ids=agent_ids,
policy_classes=agent_classes,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
**eval_info,
**env.info,
)

# Request and perform actions on each agent that received an observation.
actions = {
agent_id: agents[agent_id].act(observation, explore=True)
Expand Down Expand Up @@ -184,10 +173,26 @@ def train(
total_step += 1
observations = next_observations

# Normalize the data and record this episode on tensorboard.
episode.record_episode()
episode.record_tensorboard()
episode.record_episode(old_episode, eval_info["eval_rate"], count=episode_count)
old_episode = episode

if (episode_count + 1) % eval_info["eval_rate"] == 0:
episode.record_tensorboard()
old_episode = None

if eval_info["eval_episodes"] != 0:
evaluation_check(
agents=agents,
agent_ids=agent_ids,
policy_classes=agent_classes,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
episode_count=episode_count,
**eval_info,
**env.info,
)
episode_count += 1
if finished:
break

Expand Down Expand Up @@ -231,9 +236,9 @@ def train(
)
parser.add_argument(
"--eval-rate",
help="Evaluation rate based on number of observations",
help="Evaluation rate based on number of episodes",
type=int,
default=10000,
default=100,
)
parser.add_argument(
"--seed",
Expand Down Expand Up @@ -283,7 +288,7 @@ def train(
num_episodes=int(args.episodes),
max_episode_steps=int(args.max_episode_steps),
eval_info={
"eval_rate": float(args.eval_rate),
"eval_rate": int(args.eval_rate),
"eval_episodes": int(args.eval_episodes),
},
timestep_sec=float(args.timestep),
Expand Down
43 changes: 33 additions & 10 deletions ultra/ultra/utils/episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
class LogInfo:
def __init__(self):
self.data = {
"env_score": 0,
"episode_reward": 0,
"env_score": 0.0,
"episode_reward": 0.0,
"dist_center": 0,
"goal_dist": 0,
"speed": 0,
Expand All @@ -66,7 +66,7 @@ def __init__(self):
}

def add(self, infos, rewards):
self.data["env_score"] += int(infos["logs"]["env_score"])
self.data["env_score"] += infos["logs"]["env_score"]
self.data["speed"] += infos["logs"]["speed"]
self.data["max_speed_violation"] += (
1 if infos["logs"]["speed"] > infos["logs"]["closest_wp"].speed_limit else 0
Expand All @@ -80,7 +80,7 @@ def add(self, infos, rewards):
self.data["ego_linear_jerk"] += infos["logs"]["linear_jerk"]
self.data["ego_angular_jerk"] += infos["logs"]["angular_jerk"]
self.data["episode_reward"] += rewards
self.data["final_pos"] = infos["logs"]["position"]
self.data["final_pos"] = infos["logs"]["position"][:2]
self.data["start_pos"] = infos["logs"]["start"].position
self.data["dist_travelled"] = math.sqrt(
(self.data["final_pos"][1] - self.data["start_pos"][1]) ** 2
Expand All @@ -104,14 +104,13 @@ def step(self):

def normalize(self):
steps = self.data["episode_length"]
self.data["env_score"] /= steps
self.data["dist_center"] /= steps
self.data["speed"] /= steps
self.data["ego_linear_jerk"] /= steps
self.data["ego_angular_jerk"] /= steps
self.data["ego_num_violations"] /= steps
self.data["social_num_violations"] /= steps
self.data["max_speed_violation"] /= steps
# self.data["ego_num_violations"] /= steps
# self.data["social_num_violations"] /= steps
# self.data["max_speed_violation"] /= steps


class Episode:
Expand Down Expand Up @@ -186,6 +185,19 @@ def train_mode(self):
def eval_mode(self):
self.active_tag = "Evaluation"

def gap_mode(self):
self.active_tag = "Gap"

def calculate_gap(self):
gap_info = self.info["Gap"]
for agent_id, agent_info in self.info["Train"].items():
for key in agent_info.data:
if np.isscalar(gap_info[agent_id].data[key]):
gap_info[agent_id].data[key] = (
self.info["Train"][agent_id].data[key]
- self.info["Evaluation"][agent_id].data[key]
)

def reset(self, mode="Train"):
self.start_time = time.time()
self.timestep_sec = 0.1
Expand Down Expand Up @@ -238,10 +250,21 @@ def record_step(
# Increment this episode's step count.
self.steps += 1

def record_episode(self):
def record_episode(self, old_episode=None, eval_rate=None, count=None):
for _, agent_info in self.info[self.active_tag].items():
agent_info.normalize()

if (old_episode is not None) and (eval_rate is not None):
count = count % eval_rate
for agent_id, agent_info in self.info[self.active_tag].items():
for key in agent_info.data:
if np.isscalar(agent_info.data[key]):
agent_info.data[key] = (
agent_info.data[key]
+ old_episode.info[self.active_tag][agent_id].data[key]
* count
) / (count + 1)

def initialize_tb_writer(self):
if self.tb_writer is None:
self.tb_writer = SummaryWriter(
Expand Down Expand Up @@ -333,7 +356,7 @@ def episodes(n, etag=None, log_dir=None):
for agent_id, agent_info in e.info[e.active_tag].items()
]
row = (
f"{e.index}/{n}",
f"{e.index + 1}/{n}",
f"{e.sim2wall_ratio:.2f}",
f"{e.steps}",
f"{e.steps_per_second:.2f}",
Expand Down