Skip to content

Commit e4de826

Browse files
committed
submit MRnnPredictorV6 to test
1 parent ac81b4d commit e4de826

File tree

3 files changed

+39
-11
lines changed

3 files changed

+39
-11
lines changed

drl/test.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from __future__ import print_function
2+
3+
import gym
4+
import scipy.stats
5+
import numpy as np
6+
7+
env = gym.make("Taxi-v2")
8+
env.reset()
9+
10+
Q = np.zeros([env.observation_space.n, env.action_space.n])
11+
alpha = 0.618
12+
13+
for episode in range(1,1001):
14+
done = False
15+
G, reward = 0,0
16+
state = env.reset()
17+
while done != True:
18+
action = np.argmax(Q[state]) #1
19+
state2, reward, done, info = env.step(action) #2
20+
Q[state,action] += alpha * (reward + np.max(Q[state2]) - Q[state,action]) #3
21+
G += reward
22+
state = state2
23+
if episode % 50 == 0:
24+
print('Episode {} Total Reward: {}'.format(episode,G))

pstk/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
NUM_LAYERS = 5
1212
MAX_STEP = 60
1313
DROP_OUT = 0.4
14-
LEARNING_RATE = 3e-3
14+
LEARNING_RATE = 1e-3
1515
LOG_DIR = 'logdir'
1616

1717

pstk/model/model6.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,7 @@ def __init__(self, data, target, seqlen, classes, dropout, num_hidden=200, num_l
699699
self._num_layers = num_layers
700700
self._classes = classes
701701
self._learning_rate = learning_rate
702+
self.keep_prob
702703
self.precisions
703704
self.recalls
704705
self.f_score
@@ -737,18 +738,16 @@ def rnn(self, input):
737738
stddev=0.01),
738739
bias_initializer=tf.constant_initializer(0.1)
739740
)
740-
if i > 0:
741-
with tf.name_scope("dropout_{}".format(i)):
742-
keep_prob = 1-(self.dropout*(f**i))
743-
c = tf.nn.rnn_cell.DropoutWrapper(
744-
cell=c,
745-
input_keep_prob=keep_prob)
741+
# if i % 2 != 0:
742+
# c = tf.nn.rnn_cell.DropoutWrapper(
743+
# cell=c,
744+
# input_keep_prob=self.keep_prob)
746745
cells.append(c)
747746
mc = tf.nn.rnn_cell.MultiRNNCell(cells)
748-
# mc = tf.nn.rnn_cell.DropoutWrapper(
749-
# cell=mc,
750-
# output_keep_prob=self.keep_prob
751-
# )
747+
mc = tf.nn.rnn_cell.DropoutWrapper(
748+
cell=mc,
749+
output_keep_prob=self.keep_prob
750+
)
752751
output, _ = tf.nn.dynamic_rnn(
753752
mc,
754753
input,
@@ -765,6 +764,11 @@ def last_relevant(output, length):
765764
[tf.range(batch_size), length-1], axis=1))
766765
return relevant
767766

767+
@lazy_property
768+
def keep_prob(self):
769+
with tf.name_scope("keep_prob"):
770+
return 1.0-self.dropout
771+
768772
@lazy_property
769773
def cost(self):
770774
prediction = self.prediction

0 commit comments

Comments
 (0)