-
Notifications
You must be signed in to change notification settings - Fork 0
/
utility_functions.py
244 lines (192 loc) · 7.93 KB
/
utility_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
from email.mime.base import MIMEBase
from email.mime.image import MIMEImage
import math
def df_to_json(df, columns=False):
"""
Returns columns in a Pandas dataframe as a JSON object
with the following structure:
{
"col_name":[val1,val2,val3],
"col_name2":[val1,val2,val3]
}
"""
if columns:
df_columns = columns
else:
df_columns = df.columns
json_dict = {}
for col in df_columns:
json_dict.update({col: list(df[col].values)})
return json_dict
def format_col(df, col_name, rounding=0, currency=False, percent=False):
"""Function to format numerical Pandas Dataframe columns (one at a time). WARNING: This function will convert the column to strings. Apply this function as the last step in your script.
Output is the formatted column.
Parameters:
df = the dataframe object
col_name = the name of the column as a string
rounding = decimal places to round to. ie 0 means round to the nearest whole number
currency = adds the $ symbol
percent = adds the % symbol and multiplies by 100
"""
import locale
import math
locale.setlocale(locale.LC_ALL, 'en_US.utf8')
round_by = '{:,.%sf}' % str(rounding)
if currency == True:
return df[col_name].apply(lambda x: '$' + round_by.format(x) if math.isnan(x) == False else x)
elif percent == True:
return df[col_name].apply(lambda x: round_by.format(x * 100) + '%' if math.isnan(x) == False else x)
else:
return df[col_name].apply(lambda x: round_by.format(x) if math.isnan(x) == False else x)
def format_value(value, rounding=0, currency=False, percent=False):
import locale
locale.setlocale(locale.LC_ALL, 'en_US.utf8')
round_by = '{:,.%sf}' % str(rounding)
if currency == True:
return '$' + round_by.format(value)
elif percent == True:
return round_by.format(value * 100) + '%'
else:
return round_by.format(value)
def format_df(df):
"""Attempts to autoformat numbers based on the column names.
if %: format as percent
if imps: format as integer with commas
if revenue, cost, cpm, rpm, profit: format as currency with 2 decimals
"""
for col in df.columns:
if '%' in col:
df[col] = format_col(
df, col, rounding=0, currency=False, percent=True)
if 'imps' in col:
df[col] = format_col(
df, col, rounding=0, currency=False, percent=False)
if any([x in col for x in ['revenue', 'cost', 'cpm', 'rpm', 'profit']]):
df[col] = format_col(
df, col, rounding=2, currency=True, percent=False)
def sql_list(iterable, format="int"):
"""Function to convert a Pandas dataframe column/series into an SQL friends string of comma separate values.
Parameters
==========================
iterable: can be any type of array, list, or pandas column/series
format: "int" or "string". "int" returns string with no quotes, "string" does.
"""
sql_list = ''
if format == "string":
for i in iterable:
sql_list += "'" + str(i) + "'" + ","
elif format == "int":
for i in iterable:
sql_list += str(i) + ','
else:
print 'Incorrect format parameter. Choose string or int.'
sql_list = sql_list[:-1] # delete last comma
return sql_list
def reorder_df_cols(df, new_order, print_summary=True):
"""
Function reorders columns in a Pandas DataFrame by using the column order index.
Parameters
=======================================
df: the dataframe for which you'd like to reorder columns
new_order: a list of integer values that correspond to the positions of the columns in the new order. Example: if columns in a df are 'a' 'b' 'c', and you
wanted to reorder to be 'b', 'a', 'c', the new_order list would be [1,0,2]
print_summary: if True, prints the original column order and index values and the new column order and index values
"""
orig_cols = df.columns.tolist()
if print_summary:
print "Original Order:\n"
for i in enumerate(orig_cols):
print i
new_cols = [orig_cols[i] for i in new_order]
df = df[new_cols]
if print_summary:
print "\nNew Order:\n"
for i in enumerate(new_cols):
print i
return df
def send_email(to_addr_list,
subject,
body,
from_addr,
smtpserver="mail.adnxs.net",
cc_addr_list=None,
attachments=None,
inline_images=None,
login=None,
password=None
):
"""
Function to send emails using Python.
Parameters
==================================
to_addr_list: Supply a list of recipients (even if only sending to one recipient). Example=['[email protected]','[email protected]']
subject: The email subject as a string
body: Body of the email. This can be HTML or plain text.
from_addr: The name and/or email of the sender. This can be any string value but it is recommend to follow the format:
Display Name <[email protected]>. For example if you are using this for the Manual Exclusions script,
you might set the from_addr to be: Manual Exclusions Alert <[email protected]>
smtpserver: default is mail.adnxs.net. Do not change unless you know what you're doing.
cc_addr_list: Provide cc email recipients in the same format as to_addr_list
attachments: Provide list of attachment locations. If in same directory as script, simply input the filename.
"""
import smtplib
import os
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email import Encoders
from bs4 import BeautifulSoup as bs
msgRoot = MIMEMultipart('mixed')
msg = MIMEMultipart('alternative')
soup = bs(body)
plain_text_body = str(soup.getText())
plain = MIMEText(plain_text_body, 'plain')
html = MIMEText(body, 'html')
msgRoot.add_header('From', from_addr)
msgRoot.add_header('To', ','.join(to_addr_list))
msgRoot.add_header('Subject', subject)
if attachments != None:
for attachment in attachments:
attached_file = MIMEBase('application', "octet-stream")
attached_file.set_payload(open(attachment, "rb").read())
Encoders.encode_base64(attached_file)
attached_file.add_header(
'Content-Disposition', 'attachment', filename=os.path.basename(attachment))
msgRoot.attach(attached_file)
if inline_images:
for inline_image in inline_images:
with open(inline_image['path'], 'rb') as file:
msg_image = MIMEImage(file.read(), name=os.path.basename(inline_image['path']))
msg.attach(msg_image)
msg_image.add_header('Content-ID', '<{}>'.format(inline_image['cid']))
msg.attach(plain)
msg.attach(html)
msgRoot.attach(msg)
server = smtplib.SMTP(smtpserver)
server.starttls()
server.login(login, password)
server.sendmail(from_addr, to_addr_list, msgRoot.as_string())
server.quit()
def add_lists(first, second):
return [x + y for x, y in zip(first, second)]
def format_col_names(input_df):
for col in input_df.columns:
new_col = col.replace('_', ' ')
new_col = new_col.title()
# print new_col
input_df = input_df.rename(columns={col: new_col})
return input_df
def format_trend_col(x):
# format_dict[col] = lambda x: '{0:.0%}'.format(x)
if math.isnan(x) == False:
if x > .2:
return "<span class='positive_trend'>" + \
'{0:.0%}'.format(x) + \
"</span>"
elif x < -.2:
return "<span class='negative_trend'>" + \
'{0:.0%}'.format(x) + \
"</span>"
else:
return '{0:.0%}'.format(x)
else:
return x