-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathclassifier_model.py
71 lines (56 loc) · 2.61 KB
/
classifier_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, RobustScaler
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils
# load the dataset
df = pd.read_csv('model_data.csv')
y = df.univName # Name of the University is our target
x = df.drop('univName',axis=1) # Remove University name to get parameters
# Label the University names to perform SMOTE
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
x1 = pd.get_dummies(x)
# Perform over_sampling to balance the dataset
smote = SMOTE(sampling_strategy='not majority')
X1, Y1 = smote.fit_sample(x1,encoded_Y)
# Scale the values
sc = RobustScaler() # Robust scaler takes care of outliers as well
X = sc.fit_transform(X1)
# One-hot encoding of the University Names
Y = np_utils.to_categorical(Y1)
# Make the train and test set
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2,random_state=42,shuffle=True)
################################ Make multi-class Classifier Model #####################################################
classifier = Sequential()
classifier.add(Dense(400, activation='relu', kernel_initializer='random_normal', input_dim=X_test.shape[1]))
classifier.add(Dense(800, activation='relu', kernel_initializer='random_normal'))
classifier.add(Dense(100, activation='relu', kernel_initializer='random_normal'))
classifier.add(Dense(36, activation='softmax', kernel_initializer='random_normal'))
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classifier.fit(X_train,Y_train,batch_size=20,epochs=200,verbose=0)
eval_model = classifier.evaluate(X_train,Y_train)
print("Accuracy: ",eval_model[1]) # accuracy = 0.7864
# Get the predicted class for each test sample
y_pred = classifier.predict_classes(X_test)
print(y_pred)
# Generate confusion matrix to see the performance of classifier in classifying correctly
cm = confusion_matrix(Y_test.argmax(axis=1),y_pred)
ax = plt.subplot()
sns.heatmap(cm,annot=False,ax=ax);
ax.set_xlabel('Predicted');
ax.set_ylabel('Actual');
ax.set_title('Confusion Matrix');
plt.show()
########################################## Pickle the Classifier Model #################################################
import joblib
joblib.dump(classifier, 'classifier_model.pkl')