1
+ # Owen Queen and Sai Thatigotla: Project 4, COSC 525
2
+
3
+ import sys
1
4
import numpy as np
2
5
import tensorflow as tf
3
6
from tensorflow .keras import layers
4
-
5
- from sklearn .model_selection import train_test_split
7
+ from tensorflow .keras .models import Sequential
8
+ from tensorflow .keras .layers import Dense
9
+ from tensorflow .keras .layers import LSTM , SimpleRNN
6
10
7
11
import matplotlib .pyplot as plt
8
12
@@ -21,7 +25,6 @@ def split_data(fname, window, stride, write = False):
21
25
- Stride by which to increment on each window
22
26
write: bool, optional
23
27
- If True, writes the training data to file
24
-
25
28
Returns:
26
29
--------
27
30
split_lines: list of strings
@@ -48,52 +51,18 @@ def split_data(fname, window, stride, write = False):
48
51
49
52
f .close ()
50
53
51
- return split_lines
52
-
53
- def train_test (test_size , window , stride ):
54
- '''
55
- Basically a wrapper on train_test_split to work with our system
56
-
57
- Arguments:
58
- ----------
59
- test_size: float
60
- - Size of testing split
61
- window: int
62
- - Same window parameter as split_data
63
- stride: int
64
- - Same stride parameter as split_data
65
-
66
- Returns:
67
- --------
68
- Xtrain, Xtest, Ytrain, Ytest
69
- Xtrain: ndarray
70
- - X training data
71
- Xtest: ndarray
72
- - X validation data
73
- Ytrain: ndarray
74
- - Y training data
75
- Ytest: ndarray
76
- - Y validation data
77
- '''
78
-
79
- # Get the lines without writing file
80
- lines = split_data ('beatles.txt' , window , stride , write = False )
81
- X , Y , onehot_to_char = get_train (lines , file = False )
82
-
83
- return train_test_split (X , Y , test_size = test_size , shuffle = True )
54
+ return split_lines , lines
84
55
85
56
def make_onehot (vsize , ind ):
86
57
'''
87
58
Makes a one-hot encoding for a character
88
-
89
59
Arguments:
90
60
----------
91
61
vsize: int
92
62
- Size of vocabulary
93
63
- Determines size of array in output
94
64
ind: int
95
65
- Index that will be marked 1
96
-
97
66
Returns:
98
67
--------
99
68
g: ndarray of size (vsize,)
@@ -112,15 +81,19 @@ def get_train(fname, file = True):
112
81
----------
113
82
fname: string
114
83
- Name of file that contains the split data
115
-
116
84
Returns:
117
85
--------
118
86
Xtrain: (m, n, p) ndarray
119
87
- m: number of sequences
120
88
- n: length of sequences
121
89
- p: vocabulary size
122
-
123
- Ytrain:
90
+ Ytrain: (m, p) ndarray
91
+ - m: number of sequences
92
+ - p: vocabulary size
93
+ onehot_map: dict
94
+ - Mapping from char to one-hot encoding index
95
+ onehot_to_char: dict
96
+ - Mapping from one-hot encoding index to char
124
97
'''
125
98
126
99
if file :
@@ -137,56 +110,130 @@ def get_train(fname, file = True):
137
110
vsize = len (onehot_map .keys ())
138
111
139
112
X = []
113
+ Y = []
140
114
for l in lines :
141
115
X .append ([])
142
- for c in l :
116
+ Y .append (make_onehot (vsize , onehot_map [l [- 1 ]]))
117
+ for c in l [:- 1 ]:
143
118
X [- 1 ].append (make_onehot (vsize , onehot_map [c ]))
144
119
145
120
# Leave out last sample (doesn't have next character for prediction)
146
- Xtrain = np .array (X )[:- 1 ]
147
-
148
- # Now get y labels:
149
- Y = []
150
- for i in range (len (X ) - 1 ):
151
- # First character of i + 1 sequence
152
- Y .append (X [i + 1 ][0 ])
153
-
121
+ Xtrain = np .array (X )
154
122
Ytrain = np .array (Y )
155
123
156
- return Xtrain , Ytrain , onehot_to_char
124
+ return Xtrain , Ytrain , onehot_map , onehot_to_char
157
125
158
- def predict_char (initial_char , model , temp , num_char_pred , vocab_size ):
126
+ def predict_char (initial_char , model , temp , num_char_pred , vocab_size , window_size , orig_map , inverse_map ):
127
+ '''
128
+ Arguments:
129
+ ----------
130
+ intial_char: ndarray
131
+ - Initial string
132
+ model: keras Model object
133
+ - Trained model which we use to make predictions
134
+ temp: float
135
+ - Sampling temperature
136
+ num_char_pred: int
137
+ - Number of characters that we wish to predict
138
+ vocab_size: int
139
+ - Size of vocabulary for entire training/prediction problem
140
+ window_size: int
141
+ - Size of window used in preprocessing
142
+ orig_map: dict
143
+ - Mapping from character to index for one-hot encoding
144
+ inverse_map: dict
145
+ - Reverse mapping of orig_map
146
+
147
+ Returns:
148
+ --------
149
+ generated_chars: string
150
+ - Generated characters predicted by the model
151
+ '''
159
152
chars = initial_char
160
- generated_ix = []
153
+
154
+ generated_chars = ""
155
+
161
156
for i in range (num_char_pred ):
162
- preds = model .predict (np .array ([chars ,]))[0 ]
157
+ input_chars = np .zeros ((1 , window_size , vocab_size ))
158
+
159
+ for j ,k in enumerate (chars ):
160
+ input_chars [0 , j , orig_map [k ]] = 1.0
161
+
162
+ preds = model .predict (np .array (input_chars ))
163
+ preds = preds [0 ]
164
+ preds = np .asarray (preds ).astype ('float64' )
165
+
166
+ # Temp/Softmax on predictions:
163
167
preds = np .log (preds )/ temp
164
168
exp_preds = np .exp (preds )
165
169
preds = exp_preds / np .sum (exp_preds )
170
+
171
+ # Sampling based on predictions:
166
172
probas = np .random .multinomial (1 , preds , 1 )
173
+
167
174
ix = np .argmax (probas )
168
- x = np .zeros ((1 , vocab_size ))
169
- x [0 ][ix ] = 1
170
- chars = np .append (chars , x , axis = 0 )
175
+ next_char = inverse_map [ix ]
176
+
177
+ # Increment strings:
178
+ chars += next_char
179
+ generated_chars += next_char
180
+
171
181
chars = chars [1 :]
172
- generated_ix .append (ix )
173
- return generated_ix
182
+
183
+ return generated_chars
184
+
185
+ def train (model , X , Y , orig_map , inverse_map , lines , epochs = 100 , temp = [0.01 , 0.25 , 0.5 , 0.75 , 1.0 ]):
186
+ '''
187
+ Arguments:
188
+ ----------
189
+ model: keras Model object
190
+ - model object holding architecture to be trained
191
+ X: nd array
192
+ - Training data in the form of a tensor
193
+ Y: nd array
194
+ -
195
+ orig_map: dict
196
+ - Mapping from character to index for one-hot encoding
197
+ inverse_map: dict
198
+ - Reverse mapping of orig_map
199
+ lines: list of strings
200
+ - Separated strings that represent the training file broken into window, stride
201
+ epochs: int
202
+ - Number of epochs to train model
203
+ temp: list of floats OR int, optional
204
+ - Default: [0.01, 0.25, 0.5, 0.75, 1.0]
205
+ - Sampling temperatures to use for a qualitative evaluation of the model at every fourth epoch
206
+
207
+ Returns:
208
+ --------
209
+ histories: list of History objects
210
+ - Histories from each epoch that the model is ran
211
+ '''
174
212
175
- def train (model , X , Y , inverse_map , epochs = 5 ):
176
213
histories = []
177
- for e in range (1 , epochs ):
178
- history = model .fit (X , Y )
214
+ for e in range (1 , epochs + 1 ):
215
+ history = model .fit (X , Y , batch_size = 64 )
179
216
histories .append (history )
180
- if (e % 1 == 0 ):
181
- ind = np .random .randint (0 , len (X )- 1 )
182
- initial = X [ind ]
183
- initial_ind = np .argmax (initial , axis = - 1 )
184
- txt = '' .join (inverse_map [ix ] for ix in initial_ind )
185
- print ('\n Initial: {}' .format (txt ))
186
217
187
- gen = predict_char (initial , model , 0.5 , 100 , X .shape [2 ])
188
- txt = '' .join (inverse_map [ix ] for ix in gen )
189
- print ('----\n {} \n ----' .format (txt ))
218
+ # Evaluate every 4th epoch:
219
+ if ((e % 4 == 0 ) or (e == epochs )):
220
+
221
+ ind = np .random .randint (0 , len (lines ) - X .shape [1 ] - 1 )
222
+
223
+ initial = lines [ind : ind + X .shape [1 ]]
224
+
225
+ print ('Initial: {}\n ' .format (initial ))
226
+
227
+ # Qualitative evaulation on predictions based on random sequence
228
+ if (isinstance (temp , list )):
229
+ for j in temp :
230
+ gen = predict_char (initial , model , j , 100 , X .shape [2 ], X .shape [1 ], orig_map , inverse_map )
231
+ print ('----\n Temperature: {}\n {} \n ----' .format (j , gen ))
232
+
233
+ else :
234
+ gen = predict_char (initial , model , temp , 100 , X .shape [2 ], X .shape [1 ], orig_map , inverse_map )
235
+ txt = '' .join (inverse_map [ix ] for ix in gen )
236
+ print ('----\n Temperature: {}\n {} \n ----' .format (temp , txt ))
190
237
191
238
return histories
192
239
@@ -208,23 +255,51 @@ def plot_loss_epoch(histories, title = ''):
208
255
'''
209
256
210
257
train_loss = [h .history ['loss' ] for h in histories ]
211
- #val_loss = [h.history['val_loss'] for h in histories]
212
258
213
259
plt .plot (range (0 , len (train_loss )), train_loss )
214
260
plt .xlabel ('Epoch' )
215
261
plt .ylabel ('Loss' )
216
262
plt .title (title )
217
263
plt .show ()
218
264
219
- if __name__ == '__main__' :
220
- #split_data('beatles.txt', 5, 3, write = False)
221
- X , Y , i_map = get_train ('lyrics_w=5_s=3.txt' )
265
+ if __name__ == '__main__' :
266
+ # Command line interface for the project
267
+
268
+ model_opts = {'lstm' , 'rnn' }
269
+
270
+ # Error checking
271
+ if len (sys .argv ) != 7 :
272
+ print ('usage: python3 project4.py <file> <lstm or rnn> <hidden state size> <window size> <stride> <temperature>' )
273
+ exit ()
274
+
275
+ if not (sys .argv [2 ] in model_opts ):
276
+ print ('usage: python3 project4.py <file> <lstm or rnn> <hidden state size> <window size> <stride> <temperature>' )
277
+ exit ()
278
+
279
+ # Setting up arguments
280
+ hstate = int (sys .argv [3 ])
281
+ window = int (sys .argv [4 ])
282
+ stride = int (sys .argv [5 ])
283
+ temp = float (sys .argv [6 ])
284
+
285
+ splits , lines = split_data (sys .argv [1 ], window , stride , write = False )
286
+ X , Y , orig_map , i_map = get_train (splits , file = False )
287
+
288
+ vocab_size = len (list (orig_map .keys ()))
289
+
290
+ # Builds the given model architectures
222
291
model = tf .keras .models .Sequential ()
223
- model .add (layers .LSTM (5 , input_shape = (6 , 47 )))
224
- model .add (layers .Dense (47 , activation = "softmax" ))
225
- model .compile (loss = "categorical_crossentropy" , optimizer = "adam" )
292
+ if sys .argv [2 ] == 'lstm' :
293
+ model .add (layers .LSTM (hstate , input_shape = (window , vocab_size )))
294
+
295
+ elif sys .argv [2 ] == 'rnn' :
296
+ model .add (layers .SimpleRNN (hstate , input_shape = (window , vocab_size )))
297
+
298
+ model .add (layers .Dense (vocab_size , activation = 'softmax' ))
226
299
227
- h = train ( model , X , Y , i_map )
228
- plot_loss_epoch ( h )
300
+ # Compiles the model
301
+ model . compile ( loss = 'categorical_crossentropy' , optimizer = 'adam' )
229
302
230
-
303
+ # Train the model and show the loss plot
304
+ h = train (model , X , Y , orig_map , i_map , lines , epochs = 15 )
305
+ plot_loss_epoch (h , title = sys .argv [2 ].upper () + ' w = {}, stride = {}, hidden units = {} Loss' .format (window , stride , hstate ))
0 commit comments