forked from bg459/countingcomments
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunner.py
221 lines (200 loc) · 8.76 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import sys
import argparse
import re
from thread import Thread
import time
# Helper: checks if a string is a time string
def contains_time(s):
if s.find(":") >= 0 and (s.find("AM")>=0 or s.find("PM") >=0):
return True
return False
# Given a list of names, sort alpahbetically by last name.
# Assertion, names have 2+ words and "last name" is the very last word of the name.
def sort_alphabetical(l):
l.sort(key = lambda s: s.split()[-1])
return l
# thread marker: "Selected text", "|"
def read_path(path):
thread = []
threads = []
with open(path, "r") as input:
lines = input.readlines()
## Get the case where the first name is copied fully
## Assert: the first line contains the name of the first comment, which must be then repeated
## in the next line, alone with time stamp. If this is not, then force it.
if lines[1].find(lines[0]) < 0:
m = re.search(r"\d", lines[0])
if m is not None:
start = m.start()
else:
start = 0
name = lines[0][:start]
lines = [name] + lines
markers = []
lines = [x.replace("\n", "") if x.find("\n")>=0 else x for x in lines ]
special_case = -2
## SELECT TEXT special case
# for i in range(len(lines)-1):
# if lines[i] == "Selected text":
# special_case = i
# if lines[i+1].find(lines[i]) >=0 and i != special_case+1:
# lines[i+1] = lines[i+1].replace(lines[i], "")
# if contains_time(lines[i+1]):
# lines[i+1] = lines[i+1]
# markers.append(i)
# cleaning duplicate names, adding markers at every time/date
for i in range(len(lines)-1):
if lines[i+1].find(lines[i]) >= 0:
lines[i+1] = lines[i+1].replace(lines[i], "")
if contains_time(lines[i+1]):
markers.append(i)
markers.append(len(lines))
# will strike if EXACT match
strikes = ["Suggestion accepted", "Suggestion rejected", "Add space", "Made a suggestion", "Marked as resolved",\
"Add paragraph", "Add tab", "Delete space", "Delete paragraph", "Delete tab" ]
# will strike if INCLUDES match
inclusion_strikes = ["Delete:", "Add:", "Replace:"]
for i in range( len(markers)-1):
valid = True
entry = lines[markers[i]-1:markers[i+1]-1]
# Marked as resolved is ok, if there is also "re-opened"
for e in entry:
if e in strikes:
valid = False
for i in inclusion_strikes:
if e.find(i) >=0:
valid = False
# Dealing with the 'marked as resolved' case. If it is re-opened, we need to toggle it back to being 'valid'
if e == "Re-opened":
valid = True
if valid:
prev = ""
for x in entry:
if x != "":
#print(x)
if x != "Selected text" and x != "|" and prev != "|":
thread.append(x)
if (x == "Claire Chen" and len(thread)): # TO BE CHANGED ask for user name---David Clarke
threads.append(thread)
print(thread)
thread = []
prev = x
# threads.append([x for x in entry if x != ""])
return threads
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--file', required = True, help = "location of the file")
parser.add_argument('--names', action = 'store_true', help = "Lists all the names of students who commented, in alphabetical order by last name")
parser.add_argument('--stats', action = 'store_true', help = "Shows all students in alphabetical order, with number of comments and number of replies")
parser.add_argument('--verbose', metavar = "output", type = str, help = "Shows all students in alphabetical order, with sequenced list of each reply. \
No reply contents. Writes it to an output file.")
parser.add_argument('--full', metavar = "output", type = str, help = "Writes to output file of the comments on the google doc, cleaned up from automated\
messages, in the order they appear on the document")
args = parser.parse_args()
master = []
threads = read_path(args.file)
for t in threads:
thread = Thread()
thread.process_text(t)
master.append(thread)
# Debug
for thread in master:
names = thread.get_users()
comms = thread.get_comments()
if args.names:
names = []
for thread in master:
names = names + thread.get_users()
names = list(set(names))
print("Total " + str(len(names)) + " unique participants")
print(sort_alphabetical(names))
elif args.stats:
names = []
for thread in master:
names = names + thread.get_users()
names = list(set(names))
names = sort_alphabetical(names)
comments = [0] * len(names)
replies = [0] * len(names)
# Loop through threads
for thread in master:
users = thread.get_users()
for i in range(len(users)):
loc = names.index(users[i])
if i == 0:
comments[loc] = comments[loc] + 1
else:
replies[loc] = replies[loc] + 1
print("Statistics of all students, with number of comments and replies.")
print ('%-40s%-30s%-30s' % ("Name", "Number of Comments", "Number of Replies"))
for i in range(len(names)):
print ('%-40s%-30i%-30i' % (str(names[i]), comments[i], replies[i]))
elif args.verbose:
writer = open(args.verbose, "w")
writer.write("List of all timestamps of comments for each student. Students sorted alphabetically and timestamps"\
+ " sorted in reverse chronology.\n")
names = []
for thread in master:
names = names + thread.get_users()
names = list(set(names))
names = sort_alphabetical(names)
for name in names:
times = []
types = []
writer.write("NAME: " + name + "\n")
for thread in master:
if name in thread.get_users():
temp_time, temp_type = thread.get_time_for_name(name)
times = times + temp_time
types = types + temp_type
## Need to sort these before reporting
temp = times.copy()
for i in range(len(times)):
if times[i].find(" (") >=0:
k = times[i].find(" (")
temp[i] = temp[i][:k]
final_times, final_types = zip(*sorted(zip(temp, types), key = lambda x: time.strptime(x[0], "%I:%M %p %b %d")))
#temp.sort(key = lambda x: time.strptime(x, "%I:%M %p %b %d"), reverse = True)
for i in range(len(final_times)):
if final_types[i] == 1:
writer.write(final_times[i] + " - COMMENT\n")
else:
writer.write(final_times[i] + " - REPLY\n")
print("Output written to file " + args.verbose + ".")
writer.close()
elif args.full:
writer = open(args.full, "w")
for thread in master:
writer.write("****\n")
writer.write("SELECTED TEXT: " + thread.get_selected() + "\n")
users = thread.get_users()
comments = thread.get_comments()
times = thread.get_times()
for i in range(len(users)):
writer.write(users[i] + " (" + times[i] + "):\n")
writer.write(comments[i] + "\n")
print("Output written to file " + args.full + ".")
writer.close()
else:
names = []
for thread in master:
names = names + thread.get_users()
names = list(set(names))
names = sort_alphabetical(names)
comments = [0] * len(names)
replies = [0] * len(names)
# Loop through threads
for thread in master:
users = thread.get_users()
for i in range(len(users)):
loc = names.index(users[i])
if i == 0:
comments[loc] += 1
else:
replies[loc] +=1
print("Statistics of all students, with number of comments and replies.")
print ('%-40s%-30s%-30s' % ("Name", "Number of Comments", "Number of Replies"))
for i in range(len(names)):
print ('%-40s%-30i%-30i' % (str(names[i]), comments[i], replies[i]))
if __name__ == '__main__':
main()