update

brightmart · Nov 19, 2018 · 3c404c2 · 3c404c2
1 parent 63f689c
commit 3c404c2
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 31 deletions.
diff --git a/a01_FastText/p6_fastTextB_model_multilabel.py b/a01_FastText/p6_fastTextB_model_multilabel.py
@@ -21,7 +21,7 @@ def __init__(self, label_size, learning_rate, batch_size, decay_steps, decay_rat
         # 2.add placeholder (X,label)
         self.sentence = tf.placeholder(tf.int32, [None, self.sentence_len], name="sentence")     #X
         self.labels = tf.placeholder(tf.int64, [None,self.max_label_per_example], name="Labels") #y [1,2,3,3,3]
-        self.labels_l1999=tf.placeholder(tf.int64,[None,self.label_size])
+        self.labels_l1999=tf.placeholder(tf.float32,[None,self.label_size]) # int64
         #3.set some variables
         self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
         self.epoch_step=tf.Variable(0, trainable=False,name="Epoch_Step")
@@ -67,27 +67,29 @@ def loss(self,l2_lambda=0.0001):
         # Compute the average NCE loss for the batch.
         # tf.nce_loss automatically draws a new sample of the negative labels each
         # time we evaluate the loss.
-        if self.is_training:#training
+        #if self.is_training:#training
             #labels=tf.reshape(self.labels,[-1])               #3.[batch_size,max_label_per_example]------>[batch_size*max_label_per_example,]
             #labels=tf.expand_dims(labels,1)                   #[batch_size*max_label_per_example,]----->[batch_size*max_label_per_example,1]
             #nce_loss: notice-->for now, if you have a variable number of target classes, you can pad them out to a constant number by either repeating them or by padding with an otherwise unused class.
-            loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim]
-                tf.nn.nce_loss(weights=tf.transpose(self.W),  #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
-                               biases=self.b,                 #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
-                               labels=self.labels,                 #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
-                               inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`.  The forward activations of the input network.
-                               num_sampled=self.num_sampled,  #  scalar. 100
-                               num_true=self.max_label_per_example,
-                               num_classes=self.label_size,partition_strategy="div"))  #scalar. 1999
-        else:#eval(/inference)
-            labels_multi_hot = self.labels_l1999 #[batch_size,label_size]
-            #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive.  For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time.
-            loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size]
-            loss = tf.reduce_sum(loss, axis=1)
+         #   loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim]
+         #       tf.nn.nce_loss(weights=tf.transpose(self.W),  #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K.
+         #                      biases=self.b,                 #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`.
+         #                      labels=self.labels,                 #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes.
+         #                      inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`.  The forward activations of the input network.
+         #                      num_sampled=self.num_sampled,  #  scalar. 100
+         #                      num_true=self.max_label_per_example,
+         #                      num_classes=self.label_size,partition_strategy="div"))  #scalar. 1999
+        #else:#eval(/inference)
+        labels_multi_hot = self.labels_l1999 #[batch_size,label_size]
+        #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive.  For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time.
+        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size]
+        loss = tf.reduce_mean(tf.reduce_sum(loss, axis=1)) # reduce_sum
+        print("loss:",loss)
 
         # add regularization result in not converge
-        l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
-        loss=loss+l2_losses
+        self.l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
+        print("l2_losses:",self.l2_losses)
+        loss=loss+self.l2_losses
         return loss
 
     def train(self):

diff --git a/a01_FastText/p6_fastTextB_train_multilabel.py b/a01_FastText/p6_fastTextB_train_multilabel.py
@@ -21,7 +21,7 @@
 tf.app.flags.DEFINE_string("cache_file_h5py","../data/ieee_zhihu_cup/data.h5","path of training/validation/test data.") #../data/sample_multiple_label.txt
 tf.app.flags.DEFINE_string("cache_file_pickle","../data/ieee_zhihu_cup/vocab_label.pik","path of vocabulary and label files") #../data/sample_multiple_label.txt
 
-tf.app.flags.DEFINE_float("learning_rate",0.01,"learning rate")
+tf.app.flags.DEFINE_float("learning_rate",0.001,"learning rate")
 tf.app.flags.DEFINE_integer("batch_size", 128, "Batch size for training/evaluating.") #512批处理的大小 32-->128
 tf.app.flags.DEFINE_integer("decay_steps", 20000, "how many steps before decay learning rate.") #批处理的大小 32-->128
 tf.app.flags.DEFINE_float("decay_rate", 0.9, "Rate of decay for learning rate.") #0.5一次衰减多少
@@ -30,7 +30,7 @@
 tf.app.flags.DEFINE_integer("sentence_len",200,"max sentence length")
 tf.app.flags.DEFINE_integer("embed_size",128,"embedding size") #100
 tf.app.flags.DEFINE_boolean("is_training",True,"is traning.true:tranining,false:testing/inference")
-tf.app.flags.DEFINE_integer("num_epochs",16,"embedding size")
+tf.app.flags.DEFINE_integer("num_epochs",25,"embedding size")
 tf.app.flags.DEFINE_integer("validate_every", 1, "Validate every validate_every epochs.") #每10轮做一次验证
 #tf.app.flags.DEFINE_string("training_path", '/home/xul/xul/9_fastTextB/training-data/test-zhihu6-only-title-multilabel-trigram.txt', "location of traning data.") #每10轮做一次验证
 tf.app.flags.DEFINE_boolean("use_embedding",False,"whether to use embedding or not.")
@@ -83,18 +83,18 @@ def main(_):
         for epoch in range(curr_epoch,FLAGS.num_epochs):#range(start,stop,step_size)
             loss, acc, counter = 0.0, 0.0, 0
             for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data, batch_size)):
-                train_Y_batch=process_labels(trainY[start:end])
-                curr_loss,_=sess.run([fast_text.loss_val,fast_text.train_op],feed_dict={fast_text.sentence:trainX[start:end],
-                                     fast_text.labels:train_Y_batch}) #fast_text.labels_l1999:trainY1999[start:end]
+                #train_Y_batch=process_labels(trainY[start:end],number=start)
+                curr_loss,current_l2_loss,_=sess.run([fast_text.loss_val,fast_text.l2_losses,fast_text.train_op],
+                                                     feed_dict={fast_text.sentence:trainX[start:end],fast_text.labels_l1999:trainY[start:end]}) #fast_text.labels_l1999:trainY1999[start:end]
                 if epoch==0 and counter==0:
                     print("trainX[start:end]:",trainX[start:end]) #2d-array. each element slength is a 100.
-                    print("train_Y_batch:",train_Y_batch) #a list,each element is a list.element:may be has 1,2,3,4,5 labels.
+                    print("train_Y_batch:",trainY[start:end]) #a list,each element is a list.element:may be has 1,2,3,4,5 labels.
                     #print("trainY1999[start:end]:",trainY1999[start:end])
                 loss,counter=loss+curr_loss,counter+1 #acc+curr_acc,
                 if counter %50==0:
-                    print("Epoch %d\tBatch %d\tTrain Loss:%.3f" %(epoch,counter,loss/float(counter))) #\tTrain Accuracy:%.3f--->,acc/float(counter)
+                    print("Epoch %d\tBatch %d\tTrain Loss:%.3f\tL2 Loss:%.3f" %(epoch,counter,loss/float(counter),current_l2_loss)) #\tTrain Accuracy:%.3f--->,acc/float(counter)
 
-                if start%(3000*FLAGS.batch_size)==0:
+                if start%(1000*FLAGS.batch_size)==0:
                     eval_loss, eval_accuracy = do_eval(sess, fast_text, vaildX, vaildY, batch_size,index2label)  # testY1999,eval_acc
                     print("Epoch %d Validation Loss:%.3f\tValidation Accuracy: %.3f" % (epoch, eval_loss, eval_accuracy))  # ,\tValidation Accuracy: %.3f--->eval_acc
                     # save model to checkpoint
@@ -131,7 +131,7 @@ def do_eval(sess,fast_text,evalX,evalY,batch_size,vocabulary_index2word_label):
     for start,end in zip(range(0,number_examples,batch_size),range(batch_size,number_examples,batch_size)):
         evalY_batch=process_labels(evalY[start:end])
         curr_eval_loss,logit = sess.run([fast_text.loss_val,fast_text.logits], #curr_eval_acc-->fast_text.accuracy
-                                          feed_dict={fast_text.sentence: evalX[start:end],fast_text.labels: evalY_batch}) #,fast_text.labels_l1999:evalY1999[start:end]
+                                          feed_dict={fast_text.sentence: evalX[start:end],fast_text.labels_l1999: evalY[start:end]}) #,fast_text.labels_l1999:evalY1999[start:end]
         #print("do_eval.logits_",logits_.shape)
         label_list_top5 = get_label_using_logits(logit[0], vocabulary_index2word_label)
         curr_eval_acc=calculate_accuracy(list(label_list_top5),evalY_batch[0] ,eval_counter) # evalY[start:end][0]
@@ -240,11 +240,14 @@ def process_labels(trainY_batch,require_size=5,number=None):
         y_list_dense = [i for i, label in enumerate(y_list_sparse) if int(label) == 1]
         y_list=proces_label_to_algin(y_list_dense,require_size=require_size)
         trainY_batch_result[index]=y_list
-        if number is not None and number%2000==0:
-            print("####1.y_list_dense:",y_list_dense)
-            print("####2.y_list:",y_list) # 1.label_index: [315] ;2.y_list: [315, 315, 315, 315, 315] ;3.y_list: [0. 0. 0. ... 0. 0. 0.]
-
-    #print("###trainY_batch_result:",trainY_batch_result)
+        if number is not None and number%30==0:
+            pass
+            #print("####0.y_list_sparse:",y_list_sparse)
+            #print("####1.y_list_dense:",y_list_dense)
+            #print("####2.y_list:",y_list) # 1.label_index: [315] ;2.y_list: [315, 315, 315, 315, 315] ;3.y_list: [0. 0. 0. ... 0. 0. 0.]
+    if number is not None and number % 30 == 0:
+        #print("###3trainY_batch_result:",trainY_batch_result)
+        pass
     return trainY_batch_result
 
 def proces_label_to_algin(ys_list,require_size=5):