Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update evaluate and train to evaluate per episode #639

Merged
merged 34 commits into from
Apr 6, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
db8785f
Update evaluate and train to eval per episode
AlexLewandowski Mar 3, 2021
c670d41
Fix tests and change normalization
AlexLewandowski Mar 3, 2021
eba23a4
Fix formatting
AlexLewandowski Mar 3, 2021
8816495
Record episode for train/eval at the same time
AlexLewandowski Mar 3, 2021
a022a78
Train performance averages last eval_rate episodes
AlexLewandowski Mar 4, 2021
57b0b03
Add gap_mode and calculate generalization gap
AlexLewandowski Mar 4, 2021
a1a8bdf
Enable explore during eval for better comparison
AlexLewandowski Mar 4, 2021
360e0eb
Fix formatting
AlexLewandowski Mar 4, 2021
83e7c1b
Merge branch 'develop' into ultra-eval-per-episode
AlexLewandowski Mar 8, 2021
37eca41
Fix averaging in record_episode
AlexLewandowski Mar 9, 2021
35db258
Only average scalars in record_episode
AlexLewandowski Mar 9, 2021
dc9b9d5
Check for scalar when calculating gap
AlexLewandowski Mar 12, 2021
d0de563
Merge branch 'develop' into ultra-eval-per-episode
AlexLewandowski Mar 19, 2021
2eb3b90
Fix formatting
AlexLewandowski Mar 19, 2021
0aaf456
Fix bugs introduced by merge
AlexLewandowski Mar 19, 2021
ff266b5
Fix calculate_gap
AlexLewandowski Mar 19, 2021
57b2e0e
Fix averaging over episodes
AlexLewandowski Mar 22, 2021
b0c904e
Fix formatting
AlexLewandowski Mar 22, 2021
5277c5c
Check eval_episodes is 0 before evaluation
AlexLewandowski Mar 23, 2021
ae024e2
Evaluate on train and test without exploration
AlexLewandowski Mar 23, 2021
9d5b002
Make train/test occur in one ray.get
AlexLewandowski Mar 24, 2021
e07f770
Remove episode_count
AlexLewandowski Mar 31, 2021
ef66753
Remove episode_count from tests
AlexLewandowski Mar 31, 2021
1f578d4
Remove averaging over training episodes
AlexLewandowski Apr 5, 2021
afab3c3
Merge branch 'ultra-develop' into ultra-eval-per-episode
AlexLewandowski Apr 5, 2021
3bd5468
Restructure evaluation_check with eval_mode = False
AlexLewandowski Apr 5, 2021
af226e3
Fix formatting
AlexLewandowski Apr 5, 2021
64ae798
try split ci test method
JenishPatel99 Apr 5, 2021
053649a
Merge branch 'ultra-speedup-rllib-tests' into ultra-eval-per-episode
AlexLewandowski Apr 6, 2021
dfefee5
Check for valid eval_episodes inside evaluation_check
AlexLewandowski Apr 6, 2021
5124373
Merge branch 'ultra-speedup-rllib-tests' into ultra-eval-per-episode
AlexLewandowski Apr 6, 2021
eb45d51
Merge branch 'ultra-eval-per-episode' of github.com:huawei-noah/SMART…
AlexLewandowski Apr 6, 2021
9136a31
Removed averaging feature of record_episode
AlexLewandowski Apr 6, 2021
8f99c33
Fix formatting
AlexLewandowski Apr 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions ultra/tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def run_experiment():
policy_class = "ultra.baselines.sac:sac-v0"
log_dir = "tests/output_eval_check_logs"

episode_count = 0
for episode in episodes(1, etag=policy_class, log_dir=log_dir):
observations = env.reset()
state = observations[AGENT_ID]
Expand All @@ -95,19 +96,6 @@ def run_experiment():
dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL)

while not dones["__all__"]:
evaluation_check(
agent=agent,
agent_id=AGENT_ID,
episode=episode,
eval_rate=10,
eval_episodes=1,
max_episode_steps=2,
policy_class=policy_class,
scenario_info=("00", "eval_test"),
timestep_sec=0.1,
headless=True,
log_dir=log_dir,
)
action = agent.act(state, explore=True)
observations, rewards, dones, infos = env.step({AGENT_ID: action})
next_state = observations[AGENT_ID]
Expand All @@ -130,6 +118,21 @@ def run_experiment():
)
total_step += 1
state = next_state
evaluation_check(
agent=agent,
agent_id=AGENT_ID,
episode=episode,
eval_rate=10,
eval_episodes=1,
max_episode_steps=2,
episode_count=episode_count,
policy_class=policy_class,
scenario_info=("00", "eval_test"),
timestep_sec=0.1,
headless=True,
log_dir=log_dir,
)
episode_count += 1

env.close()

Expand Down Expand Up @@ -182,7 +185,6 @@ def test_evaluate_agent(self):
agent_id="AGENT_001",
policy_class=policy_class,
seed=seed,
itr_count=0,
checkpoint_dir=model,
scenario_info=("00", "eval_test"),
num_episodes=1,
Expand Down
21 changes: 12 additions & 9 deletions ultra/ultra/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def evaluation_check(
eval_rate,
eval_episodes,
max_episode_steps,
episode_count,
scenario_info,
timestep_sec,
headless,
Expand All @@ -61,10 +62,12 @@ def evaluation_check(
agent_itr = episode.get_itr(agent_id)

print(
f"Agent iteration : {agent_itr}, Eval rate : {eval_rate}, last_eval_iter : {episode.last_eval_iteration}"
f"Agent iteration : {agent_itr}, Episode count : {episode_count}, Eval rate : {eval_rate}, last_eval_iter : {episode.last_eval_iteration}"
)
if (agent_itr + 1) % eval_rate == 0 and episode.last_eval_iteration != agent_itr:
checkpoint_dir = episode.checkpoint_dir(agent_itr)
if (
episode_count + 1
) % eval_rate == 0 and episode.last_eval_iteration != episode_count:
checkpoint_dir = episode.checkpoint_dir(episode_count)
agent.save(checkpoint_dir)
episode.eval_mode()
episode.info[episode.active_tag][agent_id] = ray.get(
Expand All @@ -74,7 +77,6 @@ def evaluation_check(
agent_id=agent_id,
policy_class=policy_class,
seed=episode.eval_count,
itr_count=agent_itr,
checkpoint_dir=checkpoint_dir,
scenario_info=scenario_info,
num_episodes=eval_episodes,
Expand All @@ -86,7 +88,10 @@ def evaluation_check(
]
)[0]
episode.eval_count += 1
episode.last_eval_iteration = agent_itr
episode.last_eval_iteration = episode_count
episode.record_tensorboard()
episode.gap_mode()
episode.calculate_gap()
episode.record_tensorboard()
episode.train_mode()

Expand All @@ -98,14 +103,14 @@ def evaluate(
seed,
agent_id,
policy_class,
itr_count,
checkpoint_dir,
scenario_info,
num_episodes,
max_episode_steps,
headless,
timestep_sec,
log_dir,
explore=False,
):

torch.set_num_threads(1)
Expand Down Expand Up @@ -137,7 +142,7 @@ def evaluate(

episode.reset(mode="Evaluation")
while not dones["__all__"]:
action = agent.act(state, explore=False)
action = agent.act(state, explore=True)
observations, rewards, dones, infos = env.step({agent_id: action})

next_state = observations[agent_id]
Expand Down Expand Up @@ -238,7 +243,6 @@ def evaluate(
):
model = sorted_models[episode.index]
print("model: ", model)
episode_count = model.split("/")[-1]
episode.eval_mode()
episode.info[episode.active_tag][AGENT_ID] = ray.get(
[
Expand All @@ -247,7 +251,6 @@ def evaluate(
agent_id=AGENT_ID,
policy_class=policy_class,
seed=episode.eval_count,
itr_count=0,
checkpoint_dir=model,
scenario_info=(args.task, args.level),
num_episodes=int(args.episodes),
Expand Down
39 changes: 24 additions & 15 deletions ultra/ultra/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def train(

agent = spec.build_agent()

episode_count = 0
old_episode = None
for episode in episodes(num_episodes, etag=policy_class, log_dir=log_dir):
observations = env.reset()
state = observations[AGENT_ID]
Expand All @@ -93,16 +95,6 @@ def train(
if episode.get_itr(AGENT_ID) >= 1000000:
finished = True
break
evaluation_check(
agent=agent,
agent_id=AGENT_ID,
policy_class=policy_class,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
**eval_info,
**env.info,
)
action = agent.act(state, explore=True)
observations, rewards, dones, infos = env.step({AGENT_ID: action})
next_state = observations[AGENT_ID]
Expand All @@ -124,8 +116,25 @@ def train(
total_step += 1
state = next_state

episode.record_episode()
episode.record_tensorboard()
episode.record_episode(old_episode, eval_info["eval_rate"])
old_episode = episode

if (episode_count + 1) % eval_info["eval_rate"] == 0:
episode.record_tensorboard()
old_episode = None

evaluation_check(
agent=agent,
agent_id=AGENT_ID,
policy_class=policy_class,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
episode_count=episode_count,
**eval_info,
**env.info,
)
episode_count += 1
if finished:
break

Expand Down Expand Up @@ -169,9 +178,9 @@ def train(
)
parser.add_argument(
"--eval-rate",
help="Evaluation rate based on number of observations",
help="Evaluation rate based on number of episodes",
type=int,
default=10000,
default=100,
)
parser.add_argument(
"--seed",
Expand Down Expand Up @@ -209,7 +218,7 @@ def train(
num_episodes=int(args.episodes),
max_episode_steps=int(args.max_episode_steps),
eval_info={
"eval_rate": float(args.eval_rate),
"eval_rate": int(args.eval_rate),
"eval_episodes": int(args.eval_episodes),
},
timestep_sec=float(args.timestep),
Expand Down
41 changes: 31 additions & 10 deletions ultra/ultra/utils/episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
class LogInfo:
def __init__(self):
self.data = {
"env_score": 0,
"episode_reward": 0,
"env_score": 0.0,
"episode_reward": 0.0,
"dist_center": 0,
"goal_dist": 0,
"speed": 0,
Expand All @@ -57,7 +57,7 @@ def __init__(self):
}

def add(self, infos, rewards):
self.data["env_score"] += int(infos["logs"]["env_score"])
self.data["env_score"] += infos["logs"]["env_score"]
self.data["speed"] += infos["logs"]["speed"]
self.data["max_speed_violation"] += (
1 if infos["logs"]["speed"] > infos["logs"]["closest_wp"].speed_limit else 0
Expand All @@ -71,7 +71,7 @@ def add(self, infos, rewards):
self.data["ego_linear_jerk"] += infos["logs"]["linear_jerk"]
self.data["ego_angular_jerk"] += infos["logs"]["angular_jerk"]
self.data["episode_reward"] += rewards
self.data["final_pos"] = infos["logs"]["position"]
self.data["final_pos"] = infos["logs"]["position"][:2]
self.data["start_pos"] = infos["logs"]["start"].position
self.data["dist_travelled"] = math.sqrt(
(self.data["final_pos"][1] - self.data["start_pos"][1]) ** 2
Expand All @@ -95,14 +95,13 @@ def step(self):

def normalize(self):
steps = self.data["episode_length"]
self.data["env_score"] /= steps
self.data["dist_center"] /= steps
self.data["speed"] /= steps
self.data["ego_linear_jerk"] /= steps
self.data["ego_angular_jerk"] /= steps
self.data["ego_num_violations"] /= steps
self.data["social_num_violations"] /= steps
self.data["max_speed_violation"] /= steps
# self.data["ego_num_violations"] /= steps
# self.data["social_num_violations"] /= steps
# self.data["max_speed_violation"] /= steps


class Episode:
Expand Down Expand Up @@ -177,6 +176,18 @@ def train_mode(self):
def eval_mode(self):
self.active_tag = "Evaluation"

def gap_mode(self):
self.active_tag = "Gap"

def calculate_gap(self):
gap_info = self.info["Gap"]
for agent_id, agent_info in self.info["Train"].items():
for key in agent_info.data:
gap_info[agent_id].data[key] = (
self.info["Train"][agent_id].data[key]
- self.info["Evaluation"][agent_id].data[key]
)

def reset(self, mode="Train"):
self.start_time = time.time()
self.timestep_sec = 0.1
Expand Down Expand Up @@ -219,10 +230,20 @@ def record_step(self, agent_id, infos, rewards, total_step=0, loss_output=None):
self.steps += 1
self.agents_itr[agent_id] += 1

def record_episode(self):
def record_episode(self, old_episode=None, eval_rate=None):
for _, agent_info in self.info[self.active_tag].items():
agent_info.normalize()

if (old_episode is not None) and (eval_rate is not None):
for agent_id, agent_info in self.info[self.active_tag].items():
for key in agent_info.data:
if np.isscalar(agent_info.data[key]):
agent_info.data[key] = (
agent_info.data[key]
+ old_episode.info[self.active_tag][agent_id].data[key]
* eval_rate
) / eval_rate

def initialize_tb_writer(self):
if self.tb_writer is None:
self.tb_writer = SummaryWriter(
Expand Down Expand Up @@ -314,7 +335,7 @@ def episodes(n, etag=None, log_dir=None):
for agent_id, agent_info in e.info[e.active_tag].items()
]
row = (
f"{e.index}/{n}",
f"{e.index + 1}/{n}",
f"{e.sim2wall_ratio:.2f}",
f"{e.steps}",
f"{e.steps_per_second:.2f}",
Expand Down