Skip to content

Commit 8afddee

Browse files
authored
Merge pull request #36 from owencqueen/owen
Final touches
2 parents 6f4e2c6 + 1c55c42 commit 8afddee

File tree

1 file changed

+156
-81
lines changed

1 file changed

+156
-81
lines changed

Project4/project4.py

+156-81
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
# Owen Queen and Sai Thatigotla: Project 4, COSC 525
2+
3+
import sys
14
import numpy as np
25
import tensorflow as tf
36
from tensorflow.keras import layers
4-
5-
from sklearn.model_selection import train_test_split
7+
from tensorflow.keras.models import Sequential
8+
from tensorflow.keras.layers import Dense
9+
from tensorflow.keras.layers import LSTM, SimpleRNN
610

711
import matplotlib.pyplot as plt
812

@@ -21,7 +25,6 @@ def split_data(fname, window, stride, write = False):
2125
- Stride by which to increment on each window
2226
write: bool, optional
2327
- If True, writes the training data to file
24-
2528
Returns:
2629
--------
2730
split_lines: list of strings
@@ -48,52 +51,18 @@ def split_data(fname, window, stride, write = False):
4851

4952
f.close()
5053

51-
return split_lines
52-
53-
def train_test(test_size, window, stride):
54-
'''
55-
Basically a wrapper on train_test_split to work with our system
56-
57-
Arguments:
58-
----------
59-
test_size: float
60-
- Size of testing split
61-
window: int
62-
- Same window parameter as split_data
63-
stride: int
64-
- Same stride parameter as split_data
65-
66-
Returns:
67-
--------
68-
Xtrain, Xtest, Ytrain, Ytest
69-
Xtrain: ndarray
70-
- X training data
71-
Xtest: ndarray
72-
- X validation data
73-
Ytrain: ndarray
74-
- Y training data
75-
Ytest: ndarray
76-
- Y validation data
77-
'''
78-
79-
# Get the lines without writing file
80-
lines = split_data('beatles.txt', window, stride, write = False)
81-
X, Y, onehot_to_char = get_train(lines, file = False)
82-
83-
return train_test_split(X, Y, test_size = test_size, shuffle = True)
54+
return split_lines, lines
8455

8556
def make_onehot(vsize, ind):
8657
'''
8758
Makes a one-hot encoding for a character
88-
8959
Arguments:
9060
----------
9161
vsize: int
9262
- Size of vocabulary
9363
- Determines size of array in output
9464
ind: int
9565
- Index that will be marked 1
96-
9766
Returns:
9867
--------
9968
g: ndarray of size (vsize,)
@@ -112,15 +81,19 @@ def get_train(fname, file = True):
11281
----------
11382
fname: string
11483
- Name of file that contains the split data
115-
11684
Returns:
11785
--------
11886
Xtrain: (m, n, p) ndarray
11987
- m: number of sequences
12088
- n: length of sequences
12189
- p: vocabulary size
122-
123-
Ytrain:
90+
Ytrain: (m, p) ndarray
91+
- m: number of sequences
92+
- p: vocabulary size
93+
onehot_map: dict
94+
- Mapping from char to one-hot encoding index
95+
onehot_to_char: dict
96+
- Mapping from one-hot encoding index to char
12497
'''
12598

12699
if file:
@@ -137,56 +110,130 @@ def get_train(fname, file = True):
137110
vsize = len(onehot_map.keys())
138111

139112
X = []
113+
Y = []
140114
for l in lines:
141115
X.append([])
142-
for c in l:
116+
Y.append(make_onehot(vsize, onehot_map[l[-1]]))
117+
for c in l[:-1]:
143118
X[-1].append(make_onehot(vsize, onehot_map[c]))
144119

145120
# Leave out last sample (doesn't have next character for prediction)
146-
Xtrain = np.array(X)[:-1]
147-
148-
# Now get y labels:
149-
Y = []
150-
for i in range(len(X) - 1):
151-
# First character of i + 1 sequence
152-
Y.append(X[i + 1][0])
153-
121+
Xtrain = np.array(X)
154122
Ytrain = np.array(Y)
155123

156-
return Xtrain, Ytrain, onehot_to_char
124+
return Xtrain, Ytrain, onehot_map, onehot_to_char
157125

158-
def predict_char(initial_char, model, temp, num_char_pred, vocab_size):
126+
def predict_char(initial_char, model, temp, num_char_pred, vocab_size, window_size, orig_map, inverse_map):
127+
'''
128+
Arguments:
129+
----------
130+
intial_char: ndarray
131+
- Initial string
132+
model: keras Model object
133+
- Trained model which we use to make predictions
134+
temp: float
135+
- Sampling temperature
136+
num_char_pred: int
137+
- Number of characters that we wish to predict
138+
vocab_size: int
139+
- Size of vocabulary for entire training/prediction problem
140+
window_size: int
141+
- Size of window used in preprocessing
142+
orig_map: dict
143+
- Mapping from character to index for one-hot encoding
144+
inverse_map: dict
145+
- Reverse mapping of orig_map
146+
147+
Returns:
148+
--------
149+
generated_chars: string
150+
- Generated characters predicted by the model
151+
'''
159152
chars = initial_char
160-
generated_ix = []
153+
154+
generated_chars = ""
155+
161156
for i in range(num_char_pred):
162-
preds = model.predict(np.array([chars,]))[0]
157+
input_chars = np.zeros((1, window_size, vocab_size))
158+
159+
for j,k in enumerate(chars):
160+
input_chars[0, j, orig_map[k]] = 1.0
161+
162+
preds = model.predict(np.array(input_chars))
163+
preds = preds[0]
164+
preds = np.asarray(preds).astype('float64')
165+
166+
# Temp/Softmax on predictions:
163167
preds = np.log(preds)/temp
164168
exp_preds = np.exp(preds)
165169
preds = exp_preds / np.sum(exp_preds)
170+
171+
# Sampling based on predictions:
166172
probas = np.random.multinomial(1, preds, 1)
173+
167174
ix = np.argmax(probas)
168-
x = np.zeros((1, vocab_size))
169-
x[0][ix] = 1
170-
chars = np.append(chars, x, axis=0)
175+
next_char = inverse_map[ix]
176+
177+
# Increment strings:
178+
chars += next_char
179+
generated_chars += next_char
180+
171181
chars = chars[1:]
172-
generated_ix.append(ix)
173-
return generated_ix
182+
183+
return generated_chars
184+
185+
def train(model, X, Y, orig_map, inverse_map, lines, epochs=100, temp=[0.01, 0.25, 0.5, 0.75, 1.0]):
186+
'''
187+
Arguments:
188+
----------
189+
model: keras Model object
190+
- model object holding architecture to be trained
191+
X: nd array
192+
- Training data in the form of a tensor
193+
Y: nd array
194+
-
195+
orig_map: dict
196+
- Mapping from character to index for one-hot encoding
197+
inverse_map: dict
198+
- Reverse mapping of orig_map
199+
lines: list of strings
200+
- Separated strings that represent the training file broken into window, stride
201+
epochs: int
202+
- Number of epochs to train model
203+
temp: list of floats OR int, optional
204+
- Default: [0.01, 0.25, 0.5, 0.75, 1.0]
205+
- Sampling temperatures to use for a qualitative evaluation of the model at every fourth epoch
206+
207+
Returns:
208+
--------
209+
histories: list of History objects
210+
- Histories from each epoch that the model is ran
211+
'''
174212

175-
def train(model, X, Y, inverse_map, epochs=5):
176213
histories = []
177-
for e in range(1, epochs):
178-
history = model.fit(X, Y)
214+
for e in range(1, epochs+1):
215+
history = model.fit(X, Y, batch_size=64)
179216
histories.append(history)
180-
if (e % 1 == 0):
181-
ind = np.random.randint(0, len(X)-1)
182-
initial = X[ind]
183-
initial_ind = np.argmax(initial, axis=-1)
184-
txt = ''.join(inverse_map[ix] for ix in initial_ind)
185-
print ('\nInitial: {}'.format (txt))
186217

187-
gen = predict_char(initial, model, 0.5, 100, X.shape[2])
188-
txt = ''.join(inverse_map[ix] for ix in gen)
189-
print ('----\n {} \n----'.format (txt))
218+
# Evaluate every 4th epoch:
219+
if ((e % 4 == 0) or (e == epochs)):
220+
221+
ind = np.random.randint(0, len(lines) - X.shape[1] - 1)
222+
223+
initial = lines[ind: ind+X.shape[1]]
224+
225+
print('Initial: {}\n'.format(initial))
226+
227+
# Qualitative evaulation on predictions based on random sequence
228+
if (isinstance(temp, list)):
229+
for j in temp:
230+
gen = predict_char(initial, model, j, 100, X.shape[2], X.shape[1], orig_map, inverse_map)
231+
print ('----\nTemperature: {}\n{} \n----'.format (j, gen))
232+
233+
else:
234+
gen = predict_char(initial, model, temp, 100, X.shape[2], X.shape[1], orig_map, inverse_map)
235+
txt = ''.join(inverse_map[ix] for ix in gen)
236+
print ('----\nTemperature: {}\n{} \n----'.format (temp, txt))
190237

191238
return histories
192239

@@ -208,23 +255,51 @@ def plot_loss_epoch(histories, title = ''):
208255
'''
209256

210257
train_loss = [h.history['loss'] for h in histories]
211-
#val_loss = [h.history['val_loss'] for h in histories]
212258

213259
plt.plot(range(0, len(train_loss)), train_loss)
214260
plt.xlabel('Epoch')
215261
plt.ylabel('Loss')
216262
plt.title(title)
217263
plt.show()
218264

219-
if __name__ == '__main__':
220-
#split_data('beatles.txt', 5, 3, write = False)
221-
X, Y, i_map = get_train('lyrics_w=5_s=3.txt')
265+
if __name__ == '__main__':
266+
# Command line interface for the project
267+
268+
model_opts = {'lstm', 'rnn'}
269+
270+
# Error checking
271+
if len(sys.argv) != 7:
272+
print('usage: python3 project4.py <file> <lstm or rnn> <hidden state size> <window size> <stride> <temperature>')
273+
exit()
274+
275+
if not (sys.argv[2] in model_opts):
276+
print('usage: python3 project4.py <file> <lstm or rnn> <hidden state size> <window size> <stride> <temperature>')
277+
exit()
278+
279+
# Setting up arguments
280+
hstate = int(sys.argv[3])
281+
window = int(sys.argv[4])
282+
stride = int(sys.argv[5])
283+
temp = float(sys.argv[6])
284+
285+
splits, lines = split_data(sys.argv[1], window, stride, write = False)
286+
X, Y, orig_map, i_map = get_train(splits, file = False)
287+
288+
vocab_size = len(list(orig_map.keys()))
289+
290+
# Builds the given model architectures
222291
model = tf.keras.models.Sequential()
223-
model.add(layers.LSTM(5, input_shape=(6, 47)))
224-
model.add(layers.Dense(47, activation="softmax"))
225-
model.compile(loss="categorical_crossentropy", optimizer="adam")
292+
if sys.argv[2] == 'lstm':
293+
model.add(layers.LSTM(hstate, input_shape = (window, vocab_size)))
294+
295+
elif sys.argv[2] == 'rnn':
296+
model.add(layers.SimpleRNN(hstate, input_shape = (window, vocab_size)))
297+
298+
model.add(layers.Dense(vocab_size, activation = 'softmax'))
226299

227-
h = train(model, X, Y, i_map)
228-
plot_loss_epoch(h)
300+
# Compiles the model
301+
model.compile(loss='categorical_crossentropy', optimizer = 'adam')
229302

230-
303+
# Train the model and show the loss plot
304+
h = train(model, X, Y, orig_map, i_map, lines, epochs = 15)
305+
plot_loss_epoch(h, title = sys.argv[2].upper() + ' w = {}, stride = {}, hidden units = {} Loss'.format(window, stride, hstate))

0 commit comments

Comments
 (0)