Skip to content

Commit f6e7257

Browse files
committed
[moderate] updated the default t_dim to 256 and updated the default filter sizes of the Discriminator network
1 parent d9261ac commit f6e7257

7 files changed

+13
-13
lines changed

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ The following flags can be set to change the hyperparameters of the network.
102102
FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
103103
--- | --- | --- | ---
104104
z-dim | int | 100 | Number of dimensions of the Noise vector |
105-
t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
105+
t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
106106
batch_size | int | 64 | Mini-Batch Size
107107
image_size | int | 128 | Batch size to use during training.
108108
gf_dim | int | 64 | Number of conv filters in the first layer of the generator.
@@ -194,7 +194,7 @@ This script will create a pickle file called ```Data/enc_text.pkl``` with featur
194194
To generate images for the text descriptions, run the following script,
195195

196196
```
197-
python generate_images.py --dataset=flowers --output_dir=Data/synthetic_dataset --checkpoints_dir=Data/training/TAC_GAN/checkpoints --images_per_caption=30 --data_dir=Data
197+
python generate_images.py --data_set=flowers --checkpoints_dir=Data/training/TAC_GAN/checkpoints --images_per_caption=30 --data_dir=Data
198198
```
199199

200200
This will create a directory ```Data/images_generated_from_text/``` with a folder corresponding to every row of the ***text.txt*** file. Each of these folders will contain images for that text.
@@ -204,7 +204,7 @@ The following are the parameters you need to set, in case you have used differen
204204
FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
205205
--- | --- | --- | ---
206206
z-dim | int | 100 | Number of dimensions of the Noise vector |
207-
t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
207+
t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
208208
batch_size | int | 64 | Mini-Batch Size
209209
image_size | int | 128 | Batch size to use during training.
210210
gf_dim | int | 64 | Number of conv filters in the first layer of the generator.
@@ -298,7 +298,7 @@ This will generate the interpolated images in ```Data/synthetic_dataset/t_interp
298298
FLAG | VALUE TYPE | DEFAULT VALUE | DESCRIPTION
299299
--- | --- | --- | ---
300300
z-dim | int | 100 | Number of dimensions of the Noise vector |
301-
t_dim | int | 512 | Number of dimensions for the latent representation of the text embedding.
301+
t_dim | int | 256 | Number of dimensions for the latent representation of the text embedding.
302302
batch_size | int | 64 | Mini-Batch Size
303303
image_size | int | 128 | Batch size to use during training.
304304
gf_dim | int | 64 | Number of conv filters in the first layer of the generator.

create_dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def main():
1515
parser.add_argument('--z_dim', type=int, default=100,
1616
help='Noise dimension')
1717

18-
parser.add_argument('--t_dim', type=int, default=512,
18+
parser.add_argument('--t_dim', type=int, default=256,
1919
help='Text feature dimension')
2020

2121
parser.add_argument('--batch_size', type=int, default=64,

generate_images.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def main():
1616
parser.add_argument('--z_dim', type=int, default=100,
1717
help='Noise dimension')
1818

19-
parser.add_argument('--t_dim', type=int, default=512,
19+
parser.add_argument('--t_dim', type=int, default=256,
2020
help='Text feature dimension')
2121

2222
parser.add_argument('--batch_size', type=int, default=64,

model.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -263,20 +263,20 @@ def discriminator(self, image, t_text_embedding, n_classes, t_training,
263263
ops.conv2d(image, self.options['df_dim'], name = 'd_h0_conv')) # 64
264264

265265
h1 = ops.lrelu(slim.batch_norm(ops.conv2d(h0,
266-
self.options['df_dim'] * 8,
266+
self.options['df_dim'] * 2,
267267
name = 'd_h1_conv'),
268268
reuse=reuse,
269269
is_training = t_training,
270270
scope = 'd_bn1')) # 32
271271

272272
h2 = ops.lrelu(slim.batch_norm(ops.conv2d(h1,
273-
self.options['df_dim'] * 6,
273+
self.options['df_dim'] * 4,
274274
name = 'd_h2_conv'),
275275
reuse=reuse,
276276
is_training = t_training,
277277
scope = 'd_bn2')) # 16
278278
h3 = ops.lrelu(slim.batch_norm(ops.conv2d(h2,
279-
self.options['df_dim'] * 6,
279+
self.options['df_dim'] * 8,
280280
name = 'd_h3_conv'),
281281
reuse=reuse,
282282
is_training = t_training,
@@ -294,7 +294,7 @@ def discriminator(self, image, t_text_embedding, n_classes, t_training,
294294

295295
h3_concat = tf.concat([h3, tiled_embeddings], 3, name = 'h3_concat')
296296
h3_new = ops.lrelu(slim.batch_norm(ops.conv2d(h3_concat,
297-
self.options['df_dim'] * 4,
297+
self.options['df_dim'] * 8,
298298
1, 1, 1, 1,
299299
name = 'd_h3_conv_new'),
300300
reuse=reuse,

t_interpolation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def main():
1717
parser.add_argument('--z_dim', type=int, default=100,
1818
help='Noise dimension')
1919

20-
parser.add_argument('--t_dim', type=int, default=512,
20+
parser.add_argument('--t_dim', type=int, default=256,
2121
help='Text feature dimension')
2222

2323
parser.add_argument('--batch_size', type=int, default=64,

train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def main():
1717
parser.add_argument('--z_dim', type=int, default=100,
1818
help='Noise dimension')
1919

20-
parser.add_argument('--t_dim', type=int, default=512,
20+
parser.add_argument('--t_dim', type=int, default=256,
2121
help='Text feature dimension')
2222

2323
parser.add_argument('--batch_size', type=int, default=64,

z_interpolation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def main():
1616
parser.add_argument('--z_dim', type=int, default=100,
1717
help='Noise dimension')
1818

19-
parser.add_argument('--t_dim', type=int, default=512,
19+
parser.add_argument('--t_dim', type=int, default=256,
2020
help='Text feature dimension')
2121

2222
parser.add_argument('--batch_size', type=int, default=64,

0 commit comments

Comments
 (0)