-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLabel_encoding.py
22 lines (16 loc) · 1.03 KB
/
Label_encoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import pandas as pd
#Importing the data files from train test split output
train = pd.read_csv('/Users/umeshkethepalli/Desktop/Hate Speech/HateSpeech-5/data_files/Training_data.csv')
test = pd.read_csv('/Users/umeshkethepalli/Desktop/Hate Speech/HateSpeech-5/data_files/Test_data.csv')
#Creating dummies i.e. one hot encoding and concatenating to the original file
train = pd.concat([train,pd.get_dummies(train['class'])], axis=1)
test = pd.concat([test,pd.get_dummies(test['class'])], axis=1)
#Renaming the dummy columns
train.rename(columns={0:'Hate',1:'Offensive',2:'Neither'}, inplace=True)
test.rename(columns={0:'Hate',1:'Offensive',2:'Neither'}, inplace=True)
#Dropping the unnecessary column
#train.drop(columns=['Unnamed: 0'], inplace = True)
#test.drop(columns=['Unnamed: 0'], inplace = True)
#Saving the files
train.to_csv('/Users/umeshkethepalli/Desktop/Hate Speech/HateSpeech-5/data_files/Training_data.csv', index=False)
test.to_csv('/Users/umeshkethepalli/Desktop/Hate Speech/HateSpeech-5/data_files/Test_data.csv', index=False)