[moderate] updated the default t_dim to 256 and updated the default filter sizes of the Discriminator network

dashayushman · dashayushman · commit f6e725746fa1 · 2017-07-28T15:10:14.000+02:00
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ The following flags can be set to change the hyperparameters of the network.
 FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
 --- | --- | --- | ---
 z-dim | int | 100 | Number of dimensions of the Noise vector |
-t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
+t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
 batch_size | int | 64 | Mini-Batch Size
 image_size | int | 128 | Batch size to use during training.
 gf_dim | int | 64 | Number of conv filters in the first layer of the generator.
@@ -194,7 +194,7 @@ This script will create a pickle file called ```Data/enc_text.pkl``` with featur
 To generate images for the text descriptions, run the following script,
 
 ```
-python generate_images.py --dataset=flowers --output_dir=Data/synthetic_dataset --checkpoints_dir=Data/training/TAC_GAN/checkpoints --images_per_caption=30 --data_dir=Data
+python generate_images.py --data_set=flowers --checkpoints_dir=Data/training/TAC_GAN/checkpoints --images_per_caption=30 --data_dir=Data
 ```
 
 This will create a directory  ```Data/images_generated_from_text/``` with a folder corresponding to every row of the ***text.txt*** file. Each of these folders will contain images for that text.
@@ -204,7 +204,7 @@ The following are the parameters you need to set, in case you have used differen
 FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
 --- | --- | --- | ---
 z-dim | int | 100 | Number of dimensions of the Noise vector |
-t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
+t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
 batch_size | int | 64 | Mini-Batch Size
 image_size | int | 128 | Batch size to use during training.
 gf_dim | int | 64 | Number of conv filters in the first layer of the generator.
@@ -298,7 +298,7 @@ This will generate the interpolated images in ```Data/synthetic_dataset/t_interp
 FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
 --- | --- | --- | ---
 z-dim | int | 100 | Number of dimensions of the Noise vector |
-t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
+t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
 batch_size | int | 64 | Mini-Batch Size
 image_size | int | 128 | Batch size to use during training.
 gf_dim | int | 64 | Number of conv filters in the first layer of the generator.
diff --git a/create_dataset.py b/create_dataset.py
@@ -15,7 +15,7 @@ def main():
 	parser.add_argument('--z_dim', type=int, default=100,
 	                    help='Noise dimension')
 
-	parser.add_argument('--t_dim', type=int, default=512,
+	parser.add_argument('--t_dim', type=int, default=256,
 	                    help='Text feature dimension')
 
 	parser.add_argument('--batch_size', type=int, default=64,
diff --git a/generate_images.py b/generate_images.py
@@ -16,7 +16,7 @@ def main():
 	parser.add_argument('--z_dim', type=int, default=100,
 						help='Noise dimension')
 
-	parser.add_argument('--t_dim', type=int, default=512,
+	parser.add_argument('--t_dim', type=int, default=256,
 						help='Text feature dimension')
 
 	parser.add_argument('--batch_size', type=int, default=64,
diff --git a/model.py b/model.py
@@ -263,20 +263,20 @@ def discriminator(self, image, t_text_embedding, n_classes, t_training,
 			ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv'))  # 64
 
 		h1 = ops.lrelu(slim.batch_norm(ops.conv2d(h0,
-		                                     self.options['df_dim'] * 8,
+		                                     self.options['df_dim'] * 2,
 		                                     name = 'd_h1_conv'),
 		                               reuse=reuse,
 		                               is_training = t_training,
 		                               scope = 'd_bn1'))  # 32
 
 		h2 = ops.lrelu(slim.batch_norm(ops.conv2d(h1,
-		                                     self.options['df_dim'] * 6,
+		                                     self.options['df_dim'] * 4,
 		                                     name = 'd_h2_conv'),
 		                               reuse=reuse,
 		                               is_training = t_training,
 		                               scope = 'd_bn2'))  # 16
 		h3 = ops.lrelu(slim.batch_norm(ops.conv2d(h2,
-		                                     self.options['df_dim'] * 6,
+		                                     self.options['df_dim'] * 8,
 		                                     name = 'd_h3_conv'),
 		                               reuse=reuse,
 		                               is_training = t_training,
@@ -294,7 +294,7 @@ def discriminator(self, image, t_text_embedding, n_classes, t_training,
 
 		h3_concat = tf.concat([h3, tiled_embeddings], 3, name = 'h3_concat')
 		h3_new = ops.lrelu(slim.batch_norm(ops.conv2d(h3_concat,
-												self.options['df_dim'] * 4,
+												self.options['df_dim'] * 8,
 													  1, 1, 1, 1,
 												name = 'd_h3_conv_new'),
 		                                reuse=reuse,
diff --git a/t_interpolation.py b/t_interpolation.py
@@ -17,7 +17,7 @@ def main():
     parser.add_argument('--z_dim', type=int, default=100,
                         help='Noise dimension')
 
-    parser.add_argument('--t_dim', type=int, default=512,
+    parser.add_argument('--t_dim', type=int, default=256,
                         help='Text feature dimension')
 
     parser.add_argument('--batch_size', type=int, default=64,
diff --git a/train.py b/train.py
@@ -17,7 +17,7 @@ def main():
 	parser.add_argument('--z_dim', type=int, default=100,
 						help='Noise dimension')
 
-	parser.add_argument('--t_dim', type=int, default=512,
+	parser.add_argument('--t_dim', type=int, default=256,
 						help='Text feature dimension')
 
 	parser.add_argument('--batch_size', type=int, default=64,
diff --git a/z_interpolation.py b/z_interpolation.py
@@ -16,7 +16,7 @@ def main():
     parser.add_argument('--z_dim', type=int, default=100,
                         help='Noise dimension')
 
-    parser.add_argument('--t_dim', type=int, default=512,
+    parser.add_argument('--t_dim', type=int, default=256,
                         help='Text feature dimension')
 
     parser.add_argument('--batch_size', type=int, default=64,