diff --git a/rl/policy/noise.py b/rl/policy/noise.py index baa30c0..fec9507 100644 --- a/rl/policy/noise.py +++ b/rl/policy/noise.py @@ -60,7 +60,7 @@ def update(self, sys_vars): self.n_step = sys_vars['epi'] -class AnnealedGaussian(LinearNoisePolicy): +class AnnealedGaussianPolicy(LinearNoisePolicy): ''' Base class of random noise policy for DDPG @@ -71,7 +71,7 @@ class AnnealedGaussian(LinearNoisePolicy): def __init__(self, env_spec, exploration_anneal_episodes, mu, sigma, sigma_min, **kwargs): # absorb generic param without breaking - super(AnnealedGaussian, self).__init__( + super(AnnealedGaussianPolicy, self).__init__( env_spec, exploration_anneal_episodes) self.size = env_spec['action_dim'] self.mu = mu @@ -92,12 +92,12 @@ def current_sigma(self): return sigma -class GaussianWhiteNoise(AnnealedGaussian): +class GaussianWhiteNoisePolicy(AnnealedGaussianPolicy): def __init__(self, env_spec, exploration_anneal_episodes=20, mu=0., sigma=.3, sigma_min=None, **kwargs): # absorb generic param without breaking - super(GaussianWhiteNoise, self).__init__( + super(GaussianWhiteNoisePolicy, self).__init__( env_spec, exploration_anneal_episodes, mu, sigma, sigma_min) @@ -106,7 +106,7 @@ def sample(self): return sample -class OUNoise(AnnealedGaussian): +class OUNoisePolicy(AnnealedGaussianPolicy): ''' Based on @@ -116,7 +116,7 @@ class OUNoise(AnnealedGaussian): def __init__(self, env_spec, exploration_anneal_episodes=20, theta=.15, mu=0., sigma=.3, dt=1e-2, x0=None, sigma_min=None, **kwargs): # absorb generic param without breaking - super(OUNoise, self).__init__( + super(OUNoisePolicy, self).__init__( env_spec, exploration_anneal_episodes, mu, sigma, sigma_min, **kwargs) diff --git a/rl/spec/component_locks.json b/rl/spec/component_locks.json index 7b963f7..759ef0f 100644 --- a/rl/spec/component_locks.json +++ b/rl/spec/component_locks.json @@ -20,10 +20,10 @@ "DDPG" ], "Policy": [ - "GaussianWhiteNoise", + "GaussianWhiteNoisePolicy", "LinearNoisePolicy", "NoNoisePolicy", - "OUNoise" + "OUNoisePolicy" ] }, "actor_critic": { diff --git a/rl/spec/dev_experiment_specs.json b/rl/spec/dev_experiment_specs.json index be835e9..eadbd36 100644 --- a/rl/spec/dev_experiment_specs.json +++ b/rl/spec/dev_experiment_specs.json @@ -120,30 +120,5 @@ "gamma": [0.97, 0.99], "lr": [0.01, 0.1] } - }, - "dev": { - "problem": "CartPole-v0", - "Agent": "DDPG", - "HyperOptimizer": "GridSearch", - "Memory": "LinearMemoryWithForgetting", - "Optimizer": "AdamOptimizer", - "Policy": "OUNoise", - "PreProcessor": "NoPreProcessor", - "param": { - "lr": 0.01, - "decay": 0.0, - "gamma": 0.99, - "hidden_layers": [64], - "hidden_layers_activation": "sigmoid", - "output_layer_activation": "linear", - "exploration_anneal_episodes": 10, - "auto_architecture": false, - "num_hidden_layers": 3, - "first_hidden_layer_size": 512 - }, - "param_range": { - "gamma": [0.97, 0.99], - "lr": [0.01, 0.1] - } } }