forked from ReidWilliams/GANs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
architecture.py
84 lines (68 loc) · 3.82 KB
/
architecture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tensorflow as tf
import numpy as np
# abstract neural network units used into a separate
# module
from ops import BN, conv2d, conv2dtr, dense, lrelu, \
flatten, reshape, sigmoid, tanh
class GAN:
def __init__(self, is_training, img_shape=(64, 64, 3), zsize=128):
# Input image shape: x, y, channels
self.img_shape = img_shape
# latent (z) vector length
self.zsize = zsize
# is the generator being trained
# Expect this to be a TF placeholder that is set true or false
# depending on whether model is training or generating
self.is_training = is_training
# latent vector of size zsize goes in, an image of size img_shape comes out
def generator(self, inputs, scope='generator', reuse=None):
with tf.variable_scope(scope, reuse=reuse):
# generator will upscale a tiny image with layers of convolution
# until it reaches the final output image dimensions. These vars
# are the starting tiny image dimensions.
# This is why this arch needs images that are divisible by 32
minirows = self.img_shape[0] // 32
minicols = self.img_shape[1] // 32
# batch normalization, which needs to know whether this is training or
# application
bn = BN(self.is_training)
# dense (i.e. fully connected) layer followed by reshaping into the tiny
# image. The tiny image has a Z dim of 512 that gradually gets reduced
# to 3 channels (r, g, b)
t = dense(inputs, minirows*minicols*512)
t = lrelu(bn(reshape(t, (tf.shape(t)[0], minirows, minicols, 512))))
t = lrelu(bn(conv2dtr(t, 512)))
t = lrelu(bn(conv2dtr(t, 256)))
t = lrelu(bn(conv2dtr(t, 128)))
t = lrelu(bn(conv2dtr(t, 64)))
# final conv2d transpose to get to filter depth of 3, for rgb channels
logits = conv2dtr(t, self.img_shape[2])
return tanh(logits) # common final activation in GANs
# image goes in, and score of 0 (fake) or 1 comes out. Actually returns more
# than just that
def discriminator(self, inputs, scope='discriminator', reuse=None):
with tf.variable_scope(scope, reuse=reuse):
# Set discriminator to always be training. Reason for doing this is
# For the WGAN gradient loss (which is not the default loss function for
# this model, still uses this architecture), the loss function has an expression
# which is the gradient of an instance of the discriminator. Putting that
# into the optimizer creates a dependency on the second order gradient of the
# disriminator. Batch normalization where the training vs running flag is itself
# a TF variable (rather than normal python boolean) seems to break this. Easier to
# just set to True because in this model we only ever use the discriminator for
# training (to train the generator).
bn = BN(True)
t = lrelu(conv2d(inputs, 64)) # no bn here
t = lrelu(bn(conv2d(t, 128)))
t = lrelu(bn(conv2d(t, 256)))
t = lrelu(bn(conv2d(t, 512)))
t = lrelu(bn(conv2d(t, 1024)))
# flatten 3D tensor into 1D to prepare for a dense (fully connected)
# layer. Flattened tensor can also be treated as vector that can be
# used for learned similarty measurements between images.
similarity = flatten(t)
# return logits (before sigmoid activation) because several TF
# accumulator functions expect logits, and do the sigmoid for you
logits = dense(similarity, 1)
classification = sigmoid(logits)
return classification, logits, similarity