-
Notifications
You must be signed in to change notification settings - Fork 0
/
2 parts of speech.py
78 lines (64 loc) · 2.4 KB
/
2 parts of speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
import xlwt
from xlwt import Workbook
import operator
import numpy as np
import math
import pandas as pd
from pandas import DataFrame
#GET COMAPNY GLOSSARIES
file_path = [0]*8
file_path[0]='companies_glossary/HUL.xlsx'
file_path[1]='companies_glossary/Colgate.xlsx'
file_path[2]='companies_glossary/ITC.xlsx'
file_path[3]='companies_glossary/Dabur.xlsx'
file_path[4]='companies_glossary/Godrej.xlsx'
file_path[5]='companies_glossary/Marico.xlsx'
file_path[6]='companies_glossary/Nestle.xlsx'
file_path[7]='companies_glossary/PnG.xlsx'
class standards_and_sentiments:
def __init__(self, POS):
self.partofspeech = POS
#READ PoS
def read_partofspeech(self):
master_list=[]
for company in file_path:
sheet = []
sheet = pd.read_excel(company, sheet_name=self.partofspeech, usecols='B:F')
sheet = sheet.values.tolist()
master_list.extend(sheet)
return master_list
# print(Nouns)
#REMOVE Duplicates
def reduce_glossary(self,sorted_words):
glossary=[]
while(len(sorted_words)>0):
keyword=[]
while(len(sorted_words)>1 and sorted_words[0][2]==sorted_words[1][2]):
sorted_words[0][0]=sorted_words[0][0]+sorted_words[1][0]
sorted_words=np.delete(sorted_words, 1, 0)
if int(sorted_words[0][0]) >= 100:
keyword.extend(sorted_words[0])
glossary.append(keyword)
sorted_words=np.delete(sorted_words, 0, 0)
return glossary
#WRITE PoS
def write_partofspeech(self):
unsorted_words = np.array(self.read_partofspeech())
sorted_words=unsorted_words[unsorted_words[:, 1].argsort()]
sorted_words=self.reduce_glossary(sorted_words)
df_words = pd.DataFrame(sorted_words)
df_words.columns=['frequency','text','lemma','pos','afinn sentiment']
print(df_words)
glossary to excel
with pd.ExcelWriter("companies_glossary/"+self.partofspeech+".xlsx") as writer:
df_words.to_excel(writer, sheet_name=self.partofspeech)
writer.save()
Nouns= standards_and_sentiments("Nouns")
Nouns.write_partofspeech()
Verbs= standards_and_sentiments("Verbs")
Verbs.write_partofspeech()
Adverbs= standards_and_sentiments("Adverbs")
Adverbs.write_partofspeech()
Adjectives= standards_and_sentiments("Adjectives")
Adjectives.write_partofspeech()