This repository has been archived by the owner on Aug 28, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
/
package_managers.py
237 lines (211 loc) · 9.32 KB
/
package_managers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import requests
import datetime
import re
from bs4 import BeautifulSoup
from db_connector import DBConnector, PackageManagerData
import sys
if (3, 1) < sys.version_info < (3, 6):
def u(x):
return x
else:
import codecs
def u(x):
return codecs.unicode_escape_decode(x)[0]
class PackageManagers(object):
"""Collect time stamped package manager data from various package managers
and store in a DB"""
def __init__(self):
self.db = DBConnector()
def update_package_manager_data(self, package_manager_urls):
"""Gets data given package manager urls and adds it to the DB
:param package_manager_urls: URL(s) to the package you want to obtain
download data from
:type package_manager_urls: Array of strings
:returns: Returns the data object that was added to the DB
:rtype: Data object
"""
num_total_csharp_downloads = None
num_nodejs_monthly_downloads = None
num_php_downloads = None
num_python_downloads = None
num_ruby_downloads = None
num_python_http_client_downloads = None
num_python_open_source_library_data_collector_downloads = None
num_ruby_http_client_downloads = None
num_csharp_http_client_downloads = None
num_php_http_client_downloads = None
num_node_http_client_downloads = None
for url in package_manager_urls:
if 'https://www.nuget.org/packages/SendGrid' == url:
num_total_csharp_downloads = self.csharp_downloads(url)
if 'https://www.nuget.org/packages/SendGrid.CSharp.HTTP.Client' \
== url:
num_csharp_http_client_downloads = self.csharp_downloads(url)
if 'https://www.npmjs.com/package/sendgrid' in url:
if 'https://www.npmjs.com/package/sendgrid-rest' != url:
num_nodejs_monthly_downloads = self.nodejs_downloads(url)
if 'https://www.npmjs.com/package/sendgrid-rest' in url:
num_node_http_client_downloads = self.nodejs_downloads(url)
if 'https://packagist.org/packages/sendgrid/sendgrid' == url:
num_php_downloads = self.php_downloads(url)
if 'https://packagist.org/packages/sendgrid/php-http-client' == \
url:
num_php_http_client_downloads = self.php_downloads(url)
if 'pypi' in url and 'sendgrid' in url:
num_python_downloads = self.python_downloads(url)
if 'pypi' in url and 'python_http_client' in url:
num_python_http_client_downloads = self.python_downloads(url)
if 'pypi' in url and 'open_source_library_data_collector' in url:
num_python_open_source_library_data_collector_downloads = \
self.python_downloads(url)
if 'rubygems' in url and 'sendgrid' in url:
num_ruby_downloads = self.ruby_downloads(url)
if 'rubygems' in url and 'http' in url:
num_ruby_http_client_downloads = self.ruby_downloads(url)
return self.update_db(
num_total_csharp_downloads,
num_nodejs_monthly_downloads,
num_php_downloads,
num_python_downloads,
num_ruby_downloads,
num_python_http_client_downloads,
num_python_open_source_library_data_collector_downloads,
num_ruby_http_client_downloads,
num_csharp_http_client_downloads,
num_php_http_client_downloads,
num_node_http_client_downloads)
def csharp_downloads(self, url):
"""Gets library download data from nuget.org
:param url: the URL of the package
:type url: string
:returns: The number of total library downloads
:rtype: Integer
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
pattern = re.compile(r'total downloads')
lines = soup.find(text=pattern).__dict__['parent']
num_total_csharp_downloads = str(lines)[:-39]
num_total_csharp_downloads = num_total_csharp_downloads[-9:]
num_total_csharp_downloads = \
num_total_csharp_downloads.replace(',', '')
return num_total_csharp_downloads
def nodejs_downloads(self, url):
"""Gets library download data from npmjs.org
:param url: the URL of the package
:type url: string
:returns: The number of library downloads in the last month
:rtype: Integer
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
mydivs = soup.findAll("strong",
{"class": "pretty-number monthly-downloads"})
nodes = []
for node in mydivs:
nodes.append(''.join(node.findAll(text=True)))
num_nodejs_monthly_downloads = nodes[0].replace(',', '')
return num_nodejs_monthly_downloads
def php_downloads(self, url):
"""Gets library download data from packagist.org
:param url: the URL of the package
:type url: string
:returns: The number of total library downloads
:rtype: Integer
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
mydivs = soup.findAll("div",
{"class": "facts col-xs-12 col-sm-6 col-md-12"})
nodes = []
for node in mydivs:
nodes.append(''.join(node.findAll(text=True)))
num_php_downloads = nodes[0][11:].replace(u('\u2009'), '').split('\n')
num_php_downloads = str(num_php_downloads[0])
return num_php_downloads
def python_downloads(self, url):
"""Gets library download data from pypi.python.org
:param url: the URL of the package
:type url: string
:returns: The number of library downloads in the last month
:rtype: Integer
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
mydivs = soup.findAll("ul", {"class": "nodot"})
nodes = []
for node in mydivs:
nodes.append(''.join(node.findAll(text=True)))
num_python_downloads = \
nodes[0].replace(u('\n'), '') \
.rpartition('week')[-1] \
.rpartition('downloads')[0][2:] \
.replace(u('\u2009'), '')
return num_python_downloads
def ruby_downloads(self, url):
"""Gets library download data from rubygems.org
:param url: the URL of the package
:type url: string
:returns: The number of total library downloads
:rtype: Integer
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
mydivs = soup.findAll("span", {"class": "gem__downloads"})
nodes = []
for node in mydivs:
nodes.append(''.join(node.findAll(text=True)))
num_ruby_downloads = nodes[0].replace(',', '')
return num_ruby_downloads
def update_db(
self,
num_total_csharp_downloads,
num_nodejs_monthly_downloads,
num_php_downloads,
num_python_downloads,
num_ruby_downloads,
num_python_http_client_downloads,
num_python_open_source_library_data_collector_downloads,
num_ruby_http_client_downloads,
num_csharp_http_client_downloads,
num_php_http_client_downloads,
num_node_http_client_downloads
):
parms = [
num_total_csharp_downloads,
num_nodejs_monthly_downloads,
num_php_downloads,
num_python_downloads,
num_ruby_downloads,
num_python_http_client_downloads,
num_python_open_source_library_data_collector_downloads,
num_ruby_http_client_downloads,
num_csharp_http_client_downloads,
num_php_http_client_downloads,
num_node_http_client_downloads]
for parm in parms:
parm = None if parm is None else int(parm)
"""Update the DB with the package manager data
:param num_total_csharp_downloads: # of total downloads
:param num_nodejs_monthly_downloads: # of downloads in the last month
:param num_php_downloads: # of total downloads
:param num_python_downloads: # of downloads in the last month
:param num_ruby_downloads: # of total downloads
:type num_total_csharp_downloads: Integer
:type num_nodejs_monthly_downloads: Integer
:type num_php_downloads: Integer
:type num_python_downloads: Integer
:type num_ruby_downloads: Integer
:returns: Returns the data object that was added to the DB
:rtype: Data object
"""
packagedata = PackageManagerData(
date_updated=datetime.datetime.now(),
csharp_downloads=num_total_csharp_downloads,
nodejs_downloads=num_nodejs_monthly_downloads,
ruby_downloads=num_ruby_downloads,
csharp_http_client_downloads=num_csharp_http_client_downloads,
ruby_http_client_downloads=num_ruby_http_client_downloads,
node_http_client_downloads=num_node_http_client_downloads
)
return self.db.add_data(packagedata)