-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrnn_models_final.py
51 lines (41 loc) · 2.02 KB
/
rnn_models_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, GRU
from keras.layers.embeddings import Embedding
import pandas as import pd
train = pd.read_csv('./Data_files/Training_data.csv')
test = pd.read_csv('./Data_files/Test_data.csv')
train.dropna(inplace=True)
test.dropna(inplace=True)
X_train = train.drop(columns = ['class','Hate','Offensive','Neither'])
y_train = train[['Hate','Offensive','Neither']]
X_test = test.drop(columns = ['class','Hate','Offensive','Neither'])
y_test = test[['Hate','Offensive','Neither']]
tkobj = Tokenizer()
all_ = pd.concat([train,test],axis=0)
reviews = all_['clean_txt']
tkobj.fit_on_texts(reviews)
max_length = max([len(s.split()) for s in reviews])
X_train_tkns = tkobj.texts_to_sequences(X_train['clean_txt'])
X_test_tkns = tkobj.texts_to_sequences(X_test['clean_txt'])
X_train_pad = pad_sequences(X_train_tkns, maxlen=max_length, padding = 'post')
X_test_pad = pad_sequences(X_test_tkns, maxlen=max_length, padding = 'post')
# RNN + GRU Modelling
vsize = len(tkobj.word_index)+1
Embed_dim = 100
model1 = Sequential()
model1.add(Embedding(vsize,Embed_dim,input_length=max_length))
model1.add(GRU(units=32,dropout=0.2, recurrent_dropout=0.20))
model1.add(Dense(3,activation='softmax'))
model1.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model1.fit(X_train_pad,y_train,batch_size=256,validation_data=(X_test_pad,y_test), epochs=20)
rnn_gru_preds = model1.predict(X_test_pad)
#RNN +LSTM Modelling
model2 = Sequential()
model2.add(Embedding(vsize,Embed_dim,input_length=max_length))
model2.add(LSTM(units=32,dropout=0.2, recurrent_dropout=0.20))
model2.add(Dense(3,activation='softmax'))
model2.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model2.fit(X_train_pad,y_train,batch_size=256,validation_data=(X_test_pad,y_test), epochs=20)
rnn_gru_preds = model2.predict(X_test_pad)