Skip to content

Commit 2028808

Browse files
committed
Add examples to use custom env, agent and learner
1 parent 96bceb9 commit 2028808

File tree

8 files changed

+290
-2
lines changed

8 files changed

+290
-2
lines changed

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
numpy==1.18.0
2-
torch==1.6.0
2+
torch>=1.6.0
33
gym==0.17.3
44
atari-py==0.2.6
55
box2d-py==2.3.8

rl_algorithms/acer/agent.py

+3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def select_action(self, state: np.ndarray) -> Tuple[int, torch.Tensor]:
9393
prob = F.softmax(self.learner.actor_target(state).squeeze(), 0) + 1e-8
9494
action_dist = Categorical(prob)
9595
selected_action = action_dist.sample().item()
96+
if self.is_test:
97+
return selected_action
9698
return selected_action, prob.cpu().numpy()
9799

98100
def step(self, action: int) -> Tuple[np.ndarray, np.float64, bool, dict]:
@@ -157,6 +159,7 @@ def train(self):
157159

158160
if self.i_episode % self.save_period == 0:
159161
self.learner.save_params(self.i_episode)
162+
self.interim_test()
160163

161164
self.env.close()
162165
self.learner.save_params(self.i_episode)

rl_algorithms/common/abstract/agent.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def _test(self, interim_test: bool = False):
157157
step += 1
158158

159159
print(
160-
"[INFO] test %d\tstep: %d\ttotal score: %d" % (i_episode, step, score)
160+
"[INFO] test %d\tstep: %d\ttotal score: %.2f" % (i_episode, step, score)
161161
)
162162
score_list.append(score)
163163

rl_algorithms/example/__init__.py

Whitespace-only changes.

rl_algorithms/example/custom_agent.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# -*- coding: utf-8 -*-
2+
"""Custom Agent for DQN.
3+
This is example for using custom agent.
4+
In this example, custom agent use state as exponential.
5+
You can customize any function e.g) select_aciton, train ... etc.
6+
7+
To use custom agent just decorate class to build and import in main function.
8+
9+
- Author: Jiseong Han
10+
- Contact: [email protected]
11+
"""
12+
13+
import numpy as np
14+
import torch
15+
16+
from rl_algorithms.common.helper_functions import numpy2floattensor
17+
from rl_algorithms.dqn.agent import DQNAgent
18+
from rl_algorithms.registry import AGENTS
19+
20+
21+
@AGENTS.register_module
22+
class CustomDQN(DQNAgent):
23+
"""Example Custom Agent for DQN"""
24+
25+
# pylint: disable=no-self-use
26+
def _preprocess_state(self, state: np.ndarray) -> torch.Tensor:
27+
"""Preprocess state so that actor selects an action."""
28+
state = np.exp(state)
29+
state = numpy2floattensor(state, self.learner.device)
30+
return state
31+
32+
def train(self):
33+
"""Custom train."""
34+
pass

rl_algorithms/example/custom_dqn.yaml

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
type: "CustomDQN" #Custom Agent name
2+
hyper_params:
3+
gamma: 0.99
4+
tau: 0.005
5+
buffer_size: 10000 # openai baselines: 10000
6+
batch_size: 64 # openai baselines: 32
7+
update_starts_from: 100 # openai baselines: 10000
8+
multiple_update: 1 # multiple learning updates
9+
train_freq: 1 # in openai baselines, train_freq = 4
10+
gradient_clip: 10.0 # dueling: 10.0
11+
n_step: 3
12+
w_n_step: 1.0
13+
w_q_reg: 0.0000001
14+
per_alpha: 0.6 # openai baselines: 0.6
15+
per_beta: 0.4
16+
per_eps: 0.000001
17+
max_epsilon: 1.0
18+
min_epsilon: 0.01 # openai baselines: 0.01
19+
epsilon_decay: 0.00001 # openai baselines: 1e-7 / 1e-1
20+
21+
learner_cfg:
22+
type: "CustomDQNLearner" #Custom Learner name
23+
loss_type:
24+
type: "C51Loss"
25+
backbone:
26+
head:
27+
type: "C51DuelingMLP"
28+
configs:
29+
hidden_sizes: [128, 64]
30+
v_min: -300
31+
v_max: 300
32+
atom_size: 1530
33+
output_activation: "identity"
34+
use_noisy_net: False
35+
optim_cfg:
36+
lr_dqn: 0.0001
37+
weight_decay: 0.0000001
38+
adam_eps: 0.00000001
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""This is example to use custom learner that inherit DQNLearner.
2+
You need to decorate class to register your own Learner to build.
3+
And import custom learner on main file.
4+
5+
If you want to make custom learner, you can inherit BaseLeaner or Learner.
6+
If you make your own learner, you need to change config file to build.
7+
8+
- Author: Jiseong Han
9+
- Contact: [email protected]
10+
"""
11+
from typing import Tuple, Union
12+
13+
import numpy as np
14+
import torch
15+
16+
from rl_algorithms.common.abstract.learner import TensorTuple
17+
from rl_algorithms.dqn.learner import DQNLearner
18+
from rl_algorithms.registry import LEARNERS
19+
20+
21+
@LEARNERS.register_module
22+
class CustomDQNLearner(DQNLearner):
23+
"""Example of Custom DQN learner."""
24+
25+
def _init_network(self):
26+
return super()._init_network()
27+
28+
def update_model(
29+
self, experience: Union[TensorTuple, Tuple[TensorTuple]]
30+
) -> Tuple[torch.Tensor, torch.Tensor, list, np.ndarray]: # type: ignore
31+
"""
32+
Custom Update model with experience.
33+
"""
34+
pass
+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# -*- coding: utf-8 -*-
2+
"""Train or test algorithms on Custom Environment.
3+
4+
- Author: Jiseong Han
5+
6+
"""
7+
8+
import argparse
9+
import datetime
10+
11+
import gym
12+
import numpy as np
13+
14+
from rl_algorithms import build_agent
15+
import rl_algorithms.common.env.utils as env_utils
16+
import rl_algorithms.common.helper_functions as common_utils
17+
from rl_algorithms.utils import YamlConfig
18+
19+
20+
def parse_args() -> argparse.Namespace:
21+
# configurations
22+
parser = argparse.ArgumentParser(description="Pytorch RL algorithms")
23+
parser.add_argument(
24+
"--seed", type=int, default=777, help="random seed for reproducibility"
25+
)
26+
parser.add_argument(
27+
"--integration-test",
28+
dest="integration_test",
29+
action="store_true",
30+
help="for integration test",
31+
)
32+
parser.add_argument(
33+
"--cfg-path",
34+
type=str,
35+
default="rl_algorithms/example/custom_dqn.yaml",
36+
help="config path",
37+
)
38+
parser.add_argument(
39+
"--test", dest="test", action="store_true", help="test mode (no training)"
40+
)
41+
parser.add_argument(
42+
"--load-from",
43+
type=str,
44+
default=None,
45+
help="load the saved model and optimizer at the beginning",
46+
)
47+
parser.add_argument(
48+
"--off-render", dest="render", action="store_false", help="turn off rendering"
49+
)
50+
parser.add_argument(
51+
"--render-after",
52+
type=int,
53+
default=0,
54+
help="start rendering after the input number of episode",
55+
)
56+
parser.add_argument(
57+
"--log", dest="log", action="store_true", help="turn on logging"
58+
)
59+
parser.add_argument(
60+
"--save-period", type=int, default=100, help="save model period"
61+
)
62+
parser.add_argument(
63+
"--episode-num", type=int, default=1500, help="total episode num"
64+
)
65+
parser.add_argument(
66+
"--max-episode-steps", type=int, default=300, help="max episode step"
67+
)
68+
parser.add_argument(
69+
"--interim-test-num",
70+
type=int,
71+
default=10,
72+
help="number of test during training",
73+
)
74+
75+
return parser.parse_args()
76+
77+
78+
class CustomEnv(gym.Env):
79+
"""Custom Environment for example."""
80+
81+
metadata = {"render.modes": ["human"]}
82+
83+
def __init__(self):
84+
super(CustomEnv, self).__init__()
85+
self.action_space = gym.spaces.Discrete(2)
86+
self.observation_space = gym.spaces.Box(low=-3, high=3, shape=(1,))
87+
self.pos = 0
88+
89+
def step(self, action):
90+
"""
91+
Reach Position as 3 get +1 reward.
92+
else if Position is lower than then -1 reward.
93+
else get -0.1.
94+
"""
95+
action = -1 if action == 0 else 1
96+
self.pos += action
97+
if self.pos <= -3:
98+
reward = -1
99+
elif self.pos >= 3:
100+
reward = 1
101+
else:
102+
reward = -0.1
103+
done = abs(self.pos) >= 3
104+
105+
return np.array([self.pos]), reward, done, {}
106+
107+
def reset(self):
108+
self.pos = 0
109+
return np.array([self.pos])
110+
111+
def render(self, mode="human"):
112+
render_state = [[] for _ in range(7)]
113+
render_state[self.pos + 3] = [0]
114+
print(
115+
"################################\n",
116+
render_state,
117+
"\n################################",
118+
)
119+
120+
121+
def main(env):
122+
"""Main."""
123+
args = parse_args()
124+
125+
env_name = type(env).__name__
126+
env, max_episode_steps = env_utils.set_env(env, args.max_episode_steps)
127+
128+
# set a random seed
129+
common_utils.set_random_seed(args.seed, env)
130+
131+
# run
132+
NOWTIMES = datetime.datetime.now()
133+
curr_time = NOWTIMES.strftime("%y%m%d_%H%M%S")
134+
135+
cfg = YamlConfig(dict(agent=args.cfg_path)).get_config_dict()
136+
137+
# If running integration test, simplify experiment
138+
if args.integration_test:
139+
cfg = common_utils.set_cfg_for_intergration_test(cfg)
140+
141+
env_info = dict(
142+
name=env_name,
143+
observation_space=env.observation_space,
144+
action_space=env.action_space,
145+
is_atari=False,
146+
)
147+
log_cfg = dict(agent=cfg.agent.type, curr_time=curr_time, cfg_path=args.cfg_path)
148+
build_args = dict(
149+
env=env,
150+
env_info=env_info,
151+
log_cfg=log_cfg,
152+
is_test=args.test,
153+
load_from=args.load_from,
154+
is_render=args.render,
155+
render_after=args.render_after,
156+
is_log=args.log,
157+
save_period=args.save_period,
158+
episode_num=args.episode_num,
159+
max_episode_steps=max_episode_steps,
160+
interim_test_num=args.interim_test_num,
161+
)
162+
agent = build_agent(cfg.agent, build_args)
163+
164+
if not args.test:
165+
agent.train()
166+
else:
167+
agent.test()
168+
169+
170+
if __name__ == "__main__":
171+
###############################################################################################
172+
# To use custom agent and learner, import custom agent and learner.
173+
from custom_agent import CustomDQN
174+
from custom_learner import CustomDQNLearner
175+
176+
# Declare custom environment here.
177+
env = CustomEnv()
178+
###############################################################################################
179+
main(env)

0 commit comments

Comments
 (0)