jatinmandav
diff --git a/Diff for: ‎ReadData.py
+2-2 b/Diff for: ‎ReadData.py
+2-2
diff --git a/Diff for: ‎__pycache__/ReadData.cpython-36.pyc
-4.38 KB b/Diff for: ‎__pycache__/ReadData.cpython-36.pyc
-4.38 KB
diff --git a/Diff for: ‎__pycache__/text2vector.cpython-36.pyc
-2.46 KB b/Diff for: ‎__pycache__/text2vector.cpython-36.pyc
-2.46 KB
diff --git a/Diff for: ‎keras_models/cnnlstm.py
+7-7 b/Diff for: ‎keras_models/cnnlstm.py
+7-7
diff --git a/Diff for: ‎ner_pos.py
-44 b/Diff for: ‎ner_pos.py
-44
diff --git a/Diff for: ‎text2vector.py
+2-7 b/Diff for: ‎text2vector.py
+2-7
diff --git a/Diff for: ‎tf_models/__pycache__/convlstm.cpython-36.pyc
2.21 KB b/Diff for: ‎tf_models/__pycache__/convlstm.cpython-36.pyc
2.21 KB
diff --git a/Diff for: ‎tf_models/convlstm.py
+56 b/Diff for: ‎tf_models/convlstm.py
+56
diff --git a/Diff for: ‎tf_models/lstm.py
+10-5 b/Diff for: ‎tf_models/lstm.py
+10-5
diff --git a/Diff for: ‎tf_train.py
+68-35 b/Diff for: ‎tf_train.py
+68-35
@@ -5,8 +5,8 @@
 import numpy as np
 
 class ReadData:
-    def __init__(self, path_csv, embedding_model, pos_model, batch_size=32, no_samples=10000, train_val_split=0.1):
-        self.text2vec = Text2Vector(embedding_model, pos_model, size=(75, 101))
+    def __init__(self, path_csv, embedding_model, batch_size=32, no_samples=10000, train_val_split=0.1):
+        self.text2vec = Text2Vector(embedding_model, size=(75, 101))
         self.data = pd.read_csv(path_csv, sep="|")
         self.data = self.data.sample(frac=1).reset_index(drop=True)
         self.data = self.data.sample(frac=1).reset_index(drop=True).head(no_samples)
 
@@ -10,23 +10,23 @@ def CNNLSTMModel(input_shape, output_shape):
     inp = Input(shape=input_shape)
 
     x = Conv1D(32, kernel_size=5, padding='same', activation='relu')(inp)
-    x = Conv1D(64, kernel_size=7, padding='same', activation='relu')(x)
-    x = Conv1D(128, kernel_size=7, padding='same', activation='relu')(x)
-    x = MaxPooling1D(pool_size=5, strides=2)(x)
+    #x = Conv1D(64, kernel_size=7, padding='same', activation='relu')(x)
+    #x = Conv1D(128, kernel_size=7, padding='same', activation='relu')(x)
+    #x = MaxPooling1D(pool_size=5, strides=2)(x)
 
-    x = LSTM(256, return_sequences=True)(x)
+    #x = LSTM(256, return_sequences=True)(x)
 
     x = Flatten()(x)
-    x = Dense(512, activation='relu')(x)
+    '''x = Dense(512, activation='relu')(x)
     x = Dropout(0.2)(x)
     x = Dense(512, activation='relu')(x)
     x = Dropout(0.2)(x)
-
+'''
     out = Dense(output_shape, activation='softmax')(x)
 
     model = Model(inputs=inp, outputs=out)
     return model
 
 if __name__ == "__main__":
-    model = CNNLSTMModel((100,1), 6)
+    model = CNNLSTMModel((75,101), 2)
     model.summary()
@@ -7,13 +7,10 @@
 import pandas as pd
 
 class Text2Vector:
-    def __init__(self, embed_path, pos_model, size):
+    def __init__(self, embed_path, size):
         print('Loading embedding model {}'.format(embed_path))
         self.embed_model = fasttext.load_model(embed_path)
-
-        #print('Loading POS model {}'.format(pos_model))
-        #self.pos_model = fasttext.load_model(pos_model)
-
+        
         self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
         self.size = size
 
@@ -54,8 +51,6 @@ def convert(self, text):
         for i, word in enumerate(df['word']):
             try:
                 embed_vector = np.array(self.embed_model[word])
-                #pos_vector = np.array(self.pos_model[pos_tag([word])[0][1]])
-                #vectors.append(list(embed_vector) + [df['TF_IDF'][i]] + list(pos_vector))
                 vectors.append(list(embed_vector) + [df['TF_IDF'][i]])
             except Exception as e:
                 print('In text2vector.py: {}'.format(e))
 
@@ -0,0 +1,56 @@
+import tensorflow as tf
+
+class ConvLSTMModel:
+    def __init__(self):
+        pass
+
+    def conv2d(self, x, filter, strides, padding='SAME'):
+        return tf.nn.conv2d(x, filter=filter, strides=strides, padding=padding)
+
+    def max_pool2d(self, x, ksize, strides, padding='SAME'):
+        return tf.nn.max_pool(x, ksize=ksize, strides=strides, padding=padding)
+
+    def dropout(self, x, keep_rate):
+        return tf.nn.dropout(x, keep_rate)
+
+    def relu(self, x):
+        return tf.nn.relu(x)
+
+    def batch_normalization(self, x):
+        return tf.nn.batch_normalization(x)
+
+    def model(self, x):
+        weight1 = tf.Variable(tf.random_normal([3, 3, 1, 64]))
+        bias1 = tf.Variable(tf.random_normal([64]))
+        conv1 = self.conv2d(x, filter=weight1, strides=2)
+        relu1 = self.relu(tf.add(conv1, bias1))
+        max_pool1 = self.max_pool2d(relu1, ksize=2, strides=2)
+        #max_pool1 = self.batch_normalization(max_pool1)
+
+        weight2 = tf.Variable(tf.random_normal([3, 3, 64, 128]))
+        bias2 = tf.Variable(tf.random_normal([128]))
+        conv2 = self.conv2d(max_pool1, filter=weight2, strides=2)
+        relu2 = self.relu(tf.add(conv2, bias2))
+        max_pool2 = self.max_pool2d(relu2, ksize=2, strides=2)
+        #max_pool2 = self.batch_normalization(max_pool2)
+
+        weight3 = tf.Variable(tf.random_normal([3, 3, 128, 256]))
+        bias3 = tf.Variable(tf.random_normal([256]))
+        conv3 = self.conv2d(max_pool2, filter=weight3, strides=2)
+        relu3 = self.relu(tf.add(conv3, bias3))
+        max_pool3 = self.max_pool2d(relu3, ksize=2, strides=2)
+        #max_pool3 = self.batch_normalization(max_pool3)
+
+        flatten = tf.reshape(max_pool3, [-1, 2*2*256])
+
+        weight4 = tf.Variable(tf.random_normal([2*2*256, 1024]))
+        bias4 = tf.Variable(tf.random_normal([1024]))
+        dense1 = tf.add(tf.matmul(flatten, weight4), bias4)
+
+        dropout1 = self.dropout(dense1, 0.8)
+
+        weight5 = tf.Variable(tf.random_normal([1024, 2]))
+        bias5 = tf.Variable(tf.random_normal([2]))
+        dense2 = tf.add(tf.matmul(dropout1, weight5), bias5)
+
+        return dense2
@@ -11,18 +11,23 @@ def model(self, x):
         x = tf.unstack(x, self.timesteps, 1)
 
         lstmcells = []
-        for _ in range(3):
+        for _ in range(2):
             lstmcells.append(rnn.BasicLSTMCell(self.hidden_states))
 
         multilstm= rnn.MultiRNNCell(lstmcells)
         rnn_output, states = tf.nn.static_rnn(multilstm, x, dtype=tf.float32)
 
-        weights = tf.Variable(tf.random_normal([self.hidden_states, self.no_classes]))
-        biases = tf.Variable(tf.random_normal([self.no_classes]))
+        weights1 = tf.Variable(tf.random_normal([self.hidden_states, 1024]))
+        biases1 = tf.Variable(tf.random_normal([1024]))
+        output1 = tf.add(tf.matmul(rnn_output[-1], weights1), biases1)
 
-        output = tf.add(tf.matmul(rnn_output[-1], weights), biases)
+        output1 = tf.nn.relu(output1, 0.75)
 
-        return output
+        weights2 = tf.Variable(tf.random_normal([1024, self.no_classes]))
+        biases2 = tf.Variable(tf.random_normal([self.no_classes]))
+        output2 = tf.add(tf.matmul(output1, weights2), biases2)
+
+        return output2
 
 if __name__ == "__main__":
     hidden_states = 512
 
@@ -1,7 +1,9 @@
 from tf_models.lstm import LSTMModel
+from tf_models.convlstm import ConvLSTMModel
 import tensorflow as tf
 import numpy as np
-from tqdm import tqdm
+from tqdm import tqdm_notebook, tqdm
+import os
 
 from ReadData import ReadData
 
@@ -11,14 +13,14 @@
 parser.add_argument('--model', '-m', help='Name of Model to use [lstm, cnn, cnnlstm]', required=True)
 parser.add_argument('--training_csv', '-csv', help='Path to Training CSV file', required=True)
 parser.add_argument('--embedding', '-e', help='Path to word embedding model | Default: "embeddings/skipgram-100/skipgram.bin"', default='embeddings/skipgram-100/skipgram.bin')
-parser.add_argument('--pos_model', '-pos', help='Path to POS embedding model | Default: "embeddings/skipgram-pos-100/skipgram_pos.bin"', default='embeddings/skipgram-pos-100/skipgram_pos.bin')
 parser.add_argument('--n_classes', '-n', help='No of classes to predict | Default: 2', default=2, type=int)
 parser.add_argument('--optimizer', '-o', help='which Optimizer to use? | Default: "Adam"', default='adam')
 parser.add_argument('--batch_size', '-b', help='What should be the batch size? | Default: 32', default=32, type=int)
 parser.add_argument('--epochs', '-ep', help='How many epochs to Train? | Default: 100', default=100, type=int)
 parser.add_argument('--train_val_split', '-s', help='What should be the train vs val split fraction? | Default: 0.1', default=0.1, type=float)
 parser.add_argument('--no_samples', '-ns', help='How many samples to train on? | Default: 1000', default=1000, type=int)
 parser.add_argument('--learning_rate', '-lr', help='What should be the learning rate? | Default: 0.001', default=0.001, type=float)
+parser.add_argument('--logs', '-l', help="Where should the trained model be saved? | Default: logs", default='logs')
 
 args = parser.parse_args()
 
@@ -27,69 +29,100 @@
 timesteps = 75
 embed_size = 101
 
-x = tf.placeholder("float", [None, timesteps, embed_size])
-y = tf.placeholder("float", [None, classes])
+if args.model == 'lstm':
+    x = tf.placeholder("float", [None, timesteps, embed_size], name='InputData')
+    y = tf.placeholder("float", [None, classes], name='Label')
 
-model = LSTMModel(hidden_states=hidden_states, no_classes=classes, timesteps=timesteps)
+    model = LSTMModel(hidden_states=hidden_states, no_classes=classes, timesteps=timesteps)
+elif args.model.startswith('cnn'):
+    x = tf.placeholder("float", [None, timesteps, embed_size, 1], name='InputData')
+    y = tf.placeholder("float", [None, classes], name='Label')
+    model = ConvLSTMModel()
 
-reader = ReadData(args.training_csv, args.embedding, args.pos_model,
+reader = ReadData(args.training_csv, args.embedding,
                   batch_size=args.batch_size, no_samples=args.no_samples,
                   train_val_split=args.train_val_split)
 
-'''print('Reading Training data.')
-train_x, train_y = reader.read_all_train()'''
 print('Reading Validation data.')
 val_x, val_y = reader.read_all_val()
 
-prediction = model.model(x)
-cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
-optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate).minimize(cost_func)
+with tf.name_scope('Model'):
+    prediction = model.model(x)
 
-correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
-accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
+with tf.name_scope('Loss'):
+    cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
+
+with tf.name_scope('Optimizer'):
+    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate).minimize(cost_func)
+
+with tf.name_scope('Accuracy'):
+    correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
+    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
+
+if not os.path.exists(args.logs):
+    os.mkdir(args.logs)
 
 saver = tf.train.Saver()
+weights_path = os.path.join(args.logs, 'weights')
+if not os.path.exists(weights_path):
+    os.mkdir(weights_path)
+
+tensorboard_path = os.path.join(args.logs, 'tensorboard')
+if not os.path.exists(tensorboard_path):
+    os.mkdir(tensorboard_path)
 
+train_log = os.path.join(tensorboard_path, 'training')
+val_log = os.path.join(tensorboard_path, 'validation')
+
+tf.summary.scalar('loss', cost_func)
+tf.summary.scalar('accuracy', accuracy)
+merged_summary_op = tf.summary.merge_all()
+
+prev_val_loss = float('inf')
 with tf.Session() as sess:
     sess.run(tf.global_variables_initializer())
 
+    train_summary_writer = tf.summary.FileWriter(train_log, graph=sess.graph)
+    val_summary_writer = tf.summary.FileWriter(val_log)
+
     for epoch in range(args.epochs):
         i = 0
         epoch_loss = 0
         no_batches = int(reader.train_size/args.batch_size)
-        #while i < reader.train_size:
-        loss = 0
-        acc = 0
+
+        loss = []
+        acc = []
         with tqdm(total=no_batches, desc="Epoch {}/{}: loss: {} acc: {}".format(epoch + 1, args.epochs, loss, acc)) as pbar:
-            for _ in range(no_batches):
+            for batch_num in range(no_batches):
                 start = i
                 end = i + args.batch_size
                 i = end
 
                 epoch_x, epoch_y = reader.get_next_batch(start, end)
-                #epoch_x, epoch_y = train_x[start:end], train_y[start:end]
-                #epoch_x = np.reshape(epoch_x, [args.batch_size, len(epoch_x[0]), len(epoch_x[0][0])])
-                _, c = sess.run([optimizer, cost_func], feed_dict={x: epoch_x, y:epoch_y})
+                epoch_x = np.reshape(epoch_x, (epoch_x.shape[0], timesteps, embed_size, 1))
+                _, c, summary = sess.run([optimizer, cost_func, merged_summary_op], feed_dict={x: epoch_x, y:epoch_y})
+                train_summary_writer.add_summary(summary, epoch*no_batches+batch_num)
+
                 a = accuracy.eval({x: epoch_x, y: epoch_y})
-                if loss == 0 and acc == 0:
-                    loss = c
-                    acc = a
-                else:
-                    loss += c
-                    loss /= 2
-                    acc += a
-                    acc /= 2
-
-                pbar.set_description(desc=("Epoch {}/{}: loss: {:03f}".format(epoch + 1, args.epochs, loss) + " acc: {:03f}".format(acc)))
+                loss.append(c)
+                acc.append(a)
+
+                pbar.set_description(desc=("Epoch {}/{}: loss: {:.03f}".format(epoch + 1, args.epochs, np.average(loss)) + " acc: {:.03f}".format(np.average(acc))))
                 pbar.update(1)
 
+        print('------------------------------------------------------------')
+        val_loss, val_acc, val_summary = sess.run([cost_func, accuracy, merged_summary_op], feed_dict={x: epoch_x, y:epoch_y})
 
-            #print("Loss: {}. Accuracy: {}".format(c, accuracy.eval({x: epoch_x, y: epoch_y})))
-            #epoch_loss += c
+        val_summary_writer.add_summary(val_summary, epoch)
 
-        #print("Epoch {} of {}. Loss: {}. Accuracy: {}".format(epoch + 1, args.epochs, epoch_loss, accuracy.eval({x: train_x, y: train_y})))
-        print('------------------------------------------------------------')
-        print("Val Loss: {} Val Accuracy: {}".format(cost_func.eval({x: val_x, y: val_y}), accuracy.eval({x: val_x, y: val_y})))
+        val_loss = cost_func.eval({x: np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1)), y: val_y})
+        val_acc = accuracy.eval({x: np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1)), y: val_y})
+        print("Val Loss: {} Val Accuracy: {}".format(val_loss, val_acc))
         print('------------------------------------------------------------')
 
+        if val_loss < prev_val_loss:
+            prev_val_loss = val_loss
+            model_name = 'ep{:03d}'.format(epoch+1) + '-loss{:.03f}'.format( np.average(loss)) + '-val_loss{:.03f}.ckpt'.format(val_loss)
+            saver.save(sess, os.path.join(weights_path, model_name))
+
     print("Accuracy: {}".format(accuracy.eval({x: val_x, y: val_y})))