submit MRnnPredictorV6 to test

carusyte · carusyte · commit e4de826bdd8d · 2018-03-30T22:50:20.000+08:00
diff --git a/drl/test.py b/drl/test.py
@@ -0,0 +1,24 @@
+from __future__ import print_function
+
+import gym
+import scipy.stats
+import numpy as np
+
+env = gym.make("Taxi-v2")
+env.reset()
+
+Q = np.zeros([env.observation_space.n, env.action_space.n])
+alpha = 0.618
+
+for episode in range(1,1001):
+    done = False
+    G, reward = 0,0
+    state = env.reset()
+    while done != True:
+            action = np.argmax(Q[state]) #1
+            state2, reward, done, info = env.step(action) #2
+            Q[state,action] += alpha * (reward + np.max(Q[state2]) - Q[state,action]) #3
+            G += reward
+            state = state2   
+    if episode % 50 == 0:
+        print('Episode {} Total Reward: {}'.format(episode,G))
diff --git a/pstk/main.py b/pstk/main.py
@@ -11,7 +11,7 @@
 NUM_LAYERS = 5
 MAX_STEP = 60
 DROP_OUT = 0.4
-LEARNING_RATE = 3e-3
+LEARNING_RATE = 1e-3
 LOG_DIR = 'logdir'
 
 
diff --git a/pstk/model/model6.py b/pstk/model/model6.py
@@ -699,6 +699,7 @@ def __init__(self, data, target, seqlen, classes, dropout, num_hidden=200, num_l
         self._num_layers = num_layers
         self._classes = classes
         self._learning_rate = learning_rate
+        self.keep_prob
         self.precisions
         self.recalls
         self.f_score
@@ -737,18 +738,16 @@ def rnn(self, input):
                     stddev=0.01),
                 bias_initializer=tf.constant_initializer(0.1)
             )
-            if i > 0:
-                with tf.name_scope("dropout_{}".format(i)):
-                    keep_prob = 1-(self.dropout*(f**i))
-                    c = tf.nn.rnn_cell.DropoutWrapper(
-                        cell=c,
-                        input_keep_prob=keep_prob)
+            # if i % 2 != 0:
+            #     c = tf.nn.rnn_cell.DropoutWrapper(
+            #         cell=c,
+            #         input_keep_prob=self.keep_prob)
             cells.append(c)
         mc = tf.nn.rnn_cell.MultiRNNCell(cells)
-        # mc = tf.nn.rnn_cell.DropoutWrapper(
-        #     cell=mc,
-        #     output_keep_prob=self.keep_prob
-        # )
+        mc = tf.nn.rnn_cell.DropoutWrapper(
+            cell=mc,
+            output_keep_prob=self.keep_prob
+        )
         output, _ = tf.nn.dynamic_rnn(
             mc,
             input,
@@ -765,6 +764,11 @@ def last_relevant(output, length):
                 [tf.range(batch_size), length-1], axis=1))
             return relevant
 
+    @lazy_property
+    def keep_prob(self):
+        with tf.name_scope("keep_prob"):
+            return 1.0-self.dropout
+
     @lazy_property
     def cost(self):
         prediction = self.prediction