Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue- "BadLayout: Invalid layout of the ARFF file, at line 3 #111

Open
Malhabib opened this issue Jul 13, 2020 · 1 comment
Open

Issue- "BadLayout: Invalid layout of the ARFF file, at line 3 #111

Malhabib opened this issue Jul 13, 2020 · 1 comment

Comments

@Malhabib
Copy link

file = open("data/final-dataset.arff", 'r')

# Togglable Options

regenerate_model = False

regenerate_data = False

generate_graphs = True

save_model = True

create_model_image = False

def generate_model(shape):
# define the model
model = Sequential()

model.add(Dense(30, input_dim=shape, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
model.add(Dense(5, activation='softmax'))
print(model.summary())

return model

def scrape_data():
# decode the .arff data and change text labels into numerical
decoder = arff.ArffDecoder()
data = decoder.decode(file, encode_nominal=True)

# split the raw data into data and labels
vals = [val[0: -1] for val in data['data']]
labels = [label[-1] for label in data['data']]

for val in labels:
    if labels[val] != 0:
        labels[val] = 1

# split the labels and data into traning and validation sets
training_data = vals[0: int(.9 * len(vals))]
training_labels = labels[0: int(.9 * len(vals))]
validation_data = vals[int(.9 * len(vals)):]
validation_labels = labels[int(.9 * len(vals)):]


print(training_labels)

# flatten labels with one hot encoding
training_labels = to_categorical(training_labels, 5)
validation_labels = to_categorical(validation_labels, 5)

# save all arrays with numpy
np.save('saved-files/vals', np.asarray(vals))
np.save('saved-files/labels', np.asarray(labels))
np.save('saved-files/training_data', np.asarray(training_data))
np.save('saved-files/validation_data', np.asarray(validation_data))
np.save('saved-files/training_labels', np.asarray(training_labels))
np.save('saved-files/validation_labels', np.asarray(validation_labels))

check to see if saved data exists, if not then create the data

if not os.path.exists('saved-files/training_data.npy') or not os.path.exists(

'saved-files/training_labels.npy') or not os.path.exists(

'saved-files/validation_data.npy') or not os.path.exists('saved-files/validation_labels.npy'):

print('creating')

if not os.path.exists('saved-files'):

os.mkdir('saved-files')

scrape_data()

scrape_data()

load the saved data

data_train = np.load('saved-files/training_data.npy')
label_train = np.load('saved-files/training_labels.npy')
data_eval = np.load('saved-files/validation_data.npy')
label_eval = np.load('saved-files/validation_labels.npy')

generate and compile the model

model = generate_model(len(data_train[0]))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

initialize tensorboard

tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)

only using 3 epochs otherwise the model would overfit to the data

history = model.fit(data_train, label_train, validation_data=(data_eval, label_eval), epochs=2, callbacks=[tensorboard])
loss_history = history.history["loss"]

numpy_loss_history = np.array(loss_history)
np.savetxt("saved-files/loss_history.txt", numpy_loss_history, delimiter=",")

model = load_model('saved-files/model.h5')

evaluating the model's performace

print(model.evaluate(data_eval, label_eval))
print(model.evaluate(data_train, label_train))

#if create_model_image:
plot_model(model, to_file='model.png', show_shapes=True)

plt.figure(1)

summarize history for accuracy

plt.subplot(211)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

summarize history for loss

plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

save the model for later so no retraining is needed

model.save('saved-files/model.h5')

play sound when done with code to alert me

os.system('afplay /System/Library/Sounds/Ping.aiff')
os.system('afplay /System/Library/Sounds/Ping.aiff')

@jnothman
Copy link
Contributor

jnothman commented Jul 13, 2020 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants