diff --git a/a01_FastText/p6_fastTextB_model_multilabel.py b/a01_FastText/p6_fastTextB_model_multilabel.py index 1505d49..8b14665 100644 --- a/a01_FastText/p6_fastTextB_model_multilabel.py +++ b/a01_FastText/p6_fastTextB_model_multilabel.py @@ -21,7 +21,7 @@ def __init__(self, label_size, learning_rate, batch_size, decay_steps, decay_rat # 2.add placeholder (X,label) self.sentence = tf.placeholder(tf.int32, [None, self.sentence_len], name="sentence") #X self.labels = tf.placeholder(tf.int64, [None,self.max_label_per_example], name="Labels") #y [1,2,3,3,3] - self.labels_l1999=tf.placeholder(tf.int64,[None,self.label_size]) + self.labels_l1999=tf.placeholder(tf.float32,[None,self.label_size]) # int64 #3.set some variables self.global_step = tf.Variable(0, trainable=False, name="Global_Step") self.epoch_step=tf.Variable(0, trainable=False,name="Epoch_Step") @@ -67,27 +67,29 @@ def loss(self,l2_lambda=0.0001): # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. - if self.is_training:#training + #if self.is_training:#training #labels=tf.reshape(self.labels,[-1]) #3.[batch_size,max_label_per_example]------>[batch_size*max_label_per_example,] #labels=tf.expand_dims(labels,1) #[batch_size*max_label_per_example,]----->[batch_size*max_label_per_example,1] #nce_loss: notice-->for now, if you have a variable number of target classes, you can pad them out to a constant number by either repeating them or by padding with an otherwise unused class. - loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim] - tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. - biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. - labels=self.labels, #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. - inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. - num_sampled=self.num_sampled, # scalar. 100 - num_true=self.max_label_per_example, - num_classes=self.label_size,partition_strategy="div")) #scalar. 1999 - else:#eval(/inference) - labels_multi_hot = self.labels_l1999 #[batch_size,label_size] - #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. - loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size] - loss = tf.reduce_sum(loss, axis=1) + # loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim] + # tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. + # biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. + # labels=self.labels, #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. + # inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. + # num_sampled=self.num_sampled, # scalar. 100 + # num_true=self.max_label_per_example, + # num_classes=self.label_size,partition_strategy="div")) #scalar. 1999 + #else:#eval(/inference) + labels_multi_hot = self.labels_l1999 #[batch_size,label_size] + #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. + loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size] + loss = tf.reduce_mean(tf.reduce_sum(loss, axis=1)) # reduce_sum + print("loss:",loss) # add regularization result in not converge - l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda - loss=loss+l2_losses + self.l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda + print("l2_losses:",self.l2_losses) + loss=loss+self.l2_losses return loss def train(self): diff --git a/a01_FastText/p6_fastTextB_train_multilabel.py b/a01_FastText/p6_fastTextB_train_multilabel.py index 10394b4..a0403ca 100644 --- a/a01_FastText/p6_fastTextB_train_multilabel.py +++ b/a01_FastText/p6_fastTextB_train_multilabel.py @@ -21,7 +21,7 @@ tf.app.flags.DEFINE_string("cache_file_h5py","../data/ieee_zhihu_cup/data.h5","path of training/validation/test data.") #../data/sample_multiple_label.txt tf.app.flags.DEFINE_string("cache_file_pickle","../data/ieee_zhihu_cup/vocab_label.pik","path of vocabulary and label files") #../data/sample_multiple_label.txt -tf.app.flags.DEFINE_float("learning_rate",0.01,"learning rate") +tf.app.flags.DEFINE_float("learning_rate",0.001,"learning rate") tf.app.flags.DEFINE_integer("batch_size", 128, "Batch size for training/evaluating.") #512批处理的大小 32-->128 tf.app.flags.DEFINE_integer("decay_steps", 20000, "how many steps before decay learning rate.") #批处理的大小 32-->128 tf.app.flags.DEFINE_float("decay_rate", 0.9, "Rate of decay for learning rate.") #0.5一次衰减多少 @@ -30,7 +30,7 @@ tf.app.flags.DEFINE_integer("sentence_len",200,"max sentence length") tf.app.flags.DEFINE_integer("embed_size",128,"embedding size") #100 tf.app.flags.DEFINE_boolean("is_training",True,"is traning.true:tranining,false:testing/inference") -tf.app.flags.DEFINE_integer("num_epochs",16,"embedding size") +tf.app.flags.DEFINE_integer("num_epochs",25,"embedding size") tf.app.flags.DEFINE_integer("validate_every", 1, "Validate every validate_every epochs.") #每10轮做一次验证 #tf.app.flags.DEFINE_string("training_path", '/home/xul/xul/9_fastTextB/training-data/test-zhihu6-only-title-multilabel-trigram.txt', "location of traning data.") #每10轮做一次验证 tf.app.flags.DEFINE_boolean("use_embedding",False,"whether to use embedding or not.") @@ -83,18 +83,18 @@ def main(_): for epoch in range(curr_epoch,FLAGS.num_epochs):#range(start,stop,step_size) loss, acc, counter = 0.0, 0.0, 0 for start, end in zip(range(0, number_of_training_data, batch_size),range(batch_size, number_of_training_data, batch_size)): - train_Y_batch=process_labels(trainY[start:end]) - curr_loss,_=sess.run([fast_text.loss_val,fast_text.train_op],feed_dict={fast_text.sentence:trainX[start:end], - fast_text.labels:train_Y_batch}) #fast_text.labels_l1999:trainY1999[start:end] + #train_Y_batch=process_labels(trainY[start:end],number=start) + curr_loss,current_l2_loss,_=sess.run([fast_text.loss_val,fast_text.l2_losses,fast_text.train_op], + feed_dict={fast_text.sentence:trainX[start:end],fast_text.labels_l1999:trainY[start:end]}) #fast_text.labels_l1999:trainY1999[start:end] if epoch==0 and counter==0: print("trainX[start:end]:",trainX[start:end]) #2d-array. each element slength is a 100. - print("train_Y_batch:",train_Y_batch) #a list,each element is a list.element:may be has 1,2,3,4,5 labels. + print("train_Y_batch:",trainY[start:end]) #a list,each element is a list.element:may be has 1,2,3,4,5 labels. #print("trainY1999[start:end]:",trainY1999[start:end]) loss,counter=loss+curr_loss,counter+1 #acc+curr_acc, if counter %50==0: - print("Epoch %d\tBatch %d\tTrain Loss:%.3f" %(epoch,counter,loss/float(counter))) #\tTrain Accuracy:%.3f--->,acc/float(counter) + print("Epoch %d\tBatch %d\tTrain Loss:%.3f\tL2 Loss:%.3f" %(epoch,counter,loss/float(counter),current_l2_loss)) #\tTrain Accuracy:%.3f--->,acc/float(counter) - if start%(3000*FLAGS.batch_size)==0: + if start%(1000*FLAGS.batch_size)==0: eval_loss, eval_accuracy = do_eval(sess, fast_text, vaildX, vaildY, batch_size,index2label) # testY1999,eval_acc print("Epoch %d Validation Loss:%.3f\tValidation Accuracy: %.3f" % (epoch, eval_loss, eval_accuracy)) # ,\tValidation Accuracy: %.3f--->eval_acc # save model to checkpoint @@ -131,7 +131,7 @@ def do_eval(sess,fast_text,evalX,evalY,batch_size,vocabulary_index2word_label): for start,end in zip(range(0,number_examples,batch_size),range(batch_size,number_examples,batch_size)): evalY_batch=process_labels(evalY[start:end]) curr_eval_loss,logit = sess.run([fast_text.loss_val,fast_text.logits], #curr_eval_acc-->fast_text.accuracy - feed_dict={fast_text.sentence: evalX[start:end],fast_text.labels: evalY_batch}) #,fast_text.labels_l1999:evalY1999[start:end] + feed_dict={fast_text.sentence: evalX[start:end],fast_text.labels_l1999: evalY[start:end]}) #,fast_text.labels_l1999:evalY1999[start:end] #print("do_eval.logits_",logits_.shape) label_list_top5 = get_label_using_logits(logit[0], vocabulary_index2word_label) curr_eval_acc=calculate_accuracy(list(label_list_top5),evalY_batch[0] ,eval_counter) # evalY[start:end][0] @@ -240,11 +240,14 @@ def process_labels(trainY_batch,require_size=5,number=None): y_list_dense = [i for i, label in enumerate(y_list_sparse) if int(label) == 1] y_list=proces_label_to_algin(y_list_dense,require_size=require_size) trainY_batch_result[index]=y_list - if number is not None and number%2000==0: - print("####1.y_list_dense:",y_list_dense) - print("####2.y_list:",y_list) # 1.label_index: [315] ;2.y_list: [315, 315, 315, 315, 315] ;3.y_list: [0. 0. 0. ... 0. 0. 0.] - - #print("###trainY_batch_result:",trainY_batch_result) + if number is not None and number%30==0: + pass + #print("####0.y_list_sparse:",y_list_sparse) + #print("####1.y_list_dense:",y_list_dense) + #print("####2.y_list:",y_list) # 1.label_index: [315] ;2.y_list: [315, 315, 315, 315, 315] ;3.y_list: [0. 0. 0. ... 0. 0. 0.] + if number is not None and number % 30 == 0: + #print("###3trainY_batch_result:",trainY_batch_result) + pass return trainY_batch_result def proces_label_to_algin(ys_list,require_size=5):