Update inference and readers to match the new dataset and Kaggle requirements

samihaija · samihaija · commit 82104c52d470 · 2018-05-03T15:56:08.000-07:00
diff --git a/inference.py b/inference.py
@@ -75,8 +75,8 @@ def format_lines(video_ids, predictions, top_k):
     line = [(class_index, predictions[video_index][class_index])
             for class_index in top_indices]
     line = sorted(line, key=lambda p: -p[1])
-    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
-                                                  for pair in line) + "\n"
+    yield video_ids[video_index].decode('utf-8') + "," + " ".join(
+        "%i" % label for (label, _) in line) + "\n"
 
 
 def get_input_data_tensors(reader, data_pattern, batch_size, num_readers=1):
@@ -152,7 +152,7 @@ def set_up_init_ops(variables):
     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
     num_examples_processed = 0
     start_time = time.time()
-    out_file.write("VideoId,LabelConfidencePairs\n")
+    out_file.write("VideoId,Labels\n")
 
     try:
       while not coord.should_stop():
diff --git a/readers.py b/readers.py
@@ -72,9 +72,9 @@ class YT8MAggregatedFeatureReader(BaseReader):
   """
 
   def __init__(self,
-               num_classes=4716,
-               feature_sizes=[1024],
-               feature_names=["mean_inc3"]):
+               num_classes=3862,
+               feature_sizes=[1024, 128],
+               feature_names=["mean_rgb", "mean_audio"]):
     """Construct a YT8MAggregatedFeatureReader.
 
     Args:
@@ -114,7 +114,7 @@ def prepare_serialized_examples(self, serialized_examples):
     "length of feature_names (={}) != length of feature_sizes (={})".format( \
     len(self.feature_names), len(self.feature_sizes))
 
-    feature_map = {"video_id": tf.FixedLenFeature([], tf.string),
+    feature_map = {"id": tf.FixedLenFeature([], tf.string),
                    "labels": tf.VarLenFeature(tf.int64)}
     for feature_index in range(num_features):
       feature_map[self.feature_names[feature_index]] = tf.FixedLenFeature(
@@ -126,7 +126,7 @@ def prepare_serialized_examples(self, serialized_examples):
     concatenated_features = tf.concat([
         features[feature_name] for feature_name in self.feature_names], 1)
 
-    return features["video_id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
+    return features["id"], concatenated_features, labels, tf.ones([tf.shape(serialized_examples)[0]])
 
 class YT8MFrameFeatureReader(BaseReader):
   """Reads TFRecords of SequenceExamples.
@@ -138,9 +138,9 @@ class YT8MFrameFeatureReader(BaseReader):
   """
 
   def __init__(self,
-               num_classes=4716,
-               feature_sizes=[1024],
-               feature_names=["inc3"],
+               num_classes=3862,
+               feature_sizes=[1024, 128],
+               feature_names=["rgb", "audio"],
                max_frames=300):
     """Construct a YT8MFrameFeatureReader.
 
@@ -215,7 +215,7 @@ def prepare_serialized_examples(self, serialized_example,
 
     contexts, features = tf.parse_single_sequence_example(
         serialized_example,
-        context_features={"video_id": tf.FixedLenFeature(
+        context_features={"id": tf.FixedLenFeature(
             [], tf.string),
                           "labels": tf.VarLenFeature(tf.int64)},
         sequence_features={
@@ -261,7 +261,7 @@ def prepare_serialized_examples(self, serialized_example,
 
     # convert to batch format.
     # TODO: Do proper batch reads to remove the IO bottleneck.
-    batch_video_ids = tf.expand_dims(contexts["video_id"], 0)
+    batch_video_ids = tf.expand_dims(contexts["id"], 0)
     batch_video_matrix = tf.expand_dims(video_matrix, 0)
     batch_labels = tf.expand_dims(labels, 0)
     batch_frames = tf.expand_dims(num_frames, 0)