-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhublist.py
322 lines (258 loc) · 11.8 KB
/
hublist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import bz2
import socket
import sys
import urllib
import urllib.request
import urllib.parse
import xml.etree.ElementTree as ET
from subprocess import run, PIPE
##### CAN BE CONFIGURED #####
own_hublist = "https://dcnf.github.io/Hublist/ownDataHublist.xml"
internet_hublists = [
# list based on:
# - DC++: [dcpp/SettingsManager.cpp#l197](https://sourceforge.net/p/dcplusplus/code/ci/eb139c8d81a96ed6627b6fda8c94ffb325a0a308/tree/dcpp/SettingsManager.cpp#l197)
# - AirDC++: [airdcpp/airdcpp/SettingsManager.cpp#L426](https://github.com/airdcpp/airdcpp-windows/blob/8c359424d883ba836b344383c862ba0b386fc30b/airdcpp/airdcpp/SettingsManager.cpp#L426)
# - FlyLinkDC: [compiled/Settings/flylinkdc-config-r6xx.xml#L19](https://github.com/pavel-pimenov/flylinkdc-r6xx/blob/094f312eb07718f1583a7e08da4abe4557d01835/compiled/Settings/flylinkdc-config-r6xx.xml#L19)
# - EiskaltDC++: [dcpp/SettingsManager.cpp#L165](https://github.com/eiskaltdcpp/eiskaltdcpp/blob/9b65fdd4f51b93a90a63ac84d638b7ff1f79771d/dcpp/SettingsManager.cpp#L165)
"https://www.te-home.net/?do=hublist&get=hublist.xml",
"https://dchublist.org/hublist.xml.bz2",
"https://dchublist.ru/hublist.xml.bz2",
"https://hublist.pwiam.com/hublist.xml",
"http://dchublist.biz/?do=hublist.xml.bz2",
"https://dcnf.github.io/Hublist/hublist.xml.bz2", # a backup
]
local_hublists = [
#"/home/user/file.xml",
]
# timeout in seconds
TIMEOUT = 10
##### END OF THE CONFIGURATION #####
socket.setdefaulttimeout(TIMEOUT)
# List of attributes, in the form (attribute name, type)
attributes = (
# useful for all
('Address', 'string'),
('Name', 'string'),
('Description', 'string'),
('Users', 'int'),
('Country', 'string'),
('Shared', 'bytes'),
('Minshare', 'bytes'),
('Minslots', 'int'),
('Maxhubs', 'int'),
('Maxusers', 'int'),
('Reliability', 'string'),
('Rating', 'string'),
# normally useful, for NMDC
('Encoding', 'string'),
# useful for flylinkdc
('Software', 'string'),
('Website', 'string'),
('Email', 'string'),
('ASN', 'string'),
('Operators', 'int'),
('Bots', 'int'),
('Infected', 'int'),
# useful for website
('Status', 'string'),
# useful for this script
('Failover', 'string'),
)
# Supported NMDC Encoding (should be in lower)
supported_encoding = ['utf-8', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1256', 'cp1257','gb18030']
# Suported chemas of NMDC protocol
supported_schemas_dc = ['dchub', 'dchubs', 'nmdc', 'nmdcs']
# Suported chemas of Secure NMDC protocol
supported_schemas_dc_secure = ['dchubs', 'nmdcs']
# Suported chemas of unsecure NMDC protocol
supported_schemas_dc_unsecure = ['dchub', 'nmdc']
# Suported chemas of ADC protocol
supported_schemas_adc = ['adc', 'adcs']
# Suported chemas of Secure ADC protocol
supported_schemas_adc_secure = ['adcs']
# Suported chemas of unsecure ADC protocol
supported_schemas_adc_unsecure = ['adc']
# in-place prettyprint formatter from http://effbot.org/zone/element-lib.htm#prettyprint
def indent(elem, level=0):
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def addr_complete(addr_hub):
url_info = urllib.parse.urlparse(addr_hub)
# Add DCHUB protocol to url if no protocol is specified
if not url_info.scheme:
addr_hub = 'dchub://' + addr_hub
# Add NMDC optional port to url if no port is specified
if url_info.scheme == 'dchub' and not url_info.port:
addr_hub = addr_hub + ':411'
return addr_hub
def hub_addr_compare(adrr_hub1, adrr_hub2):
if urllib.parse.urlparse(adrr_hub1).hostname != urllib.parse.urlparse(adrr_hub2).hostname:
return False
if urllib.parse.urlparse(adrr_hub1).port != urllib.parse.urlparse(adrr_hub2).port:
return False
return True
def duplicate_hub(hub1, hub2):
# CHECK ADDR
## First check: normal address hub1
if hub_addr_compare(hub1.attrib['Address'], hub2.attrib['Address']):
return True
has_hub1_failover = (hub1.attrib.get('Failover') is not None and hub1.attrib.get('Failover') != '')
has_hub2_failover = (hub2.attrib.get('Failover') is not None and hub2.attrib.get('Failover') != '')
## Second check: failover address hub1
if has_hub1_failover:
if hub_addr_compare(hub1.attrib['Failover'], hub2.attrib['Address']):
return True
## Third check: failover address hub2
if has_hub2_failover:
if hub_addr_compare(hub2.attrib['Failover'], hub1.attrib['Address']):
return True
## Fourth check: failover address hub
if has_hub1_failover and has_hub2_failover:
if hub_addr_compare(hub1.attrib['Failover'], hub2.attrib['Failover']):
return True
# CHECK STATUS
if hub1.attrib.get('Status') is not None and hub1.attrib.get('Status') != '' and hub2.attrib.get('Status') is not None and hub2.attrib.get('Status') != '':
if hub1.attrib['Status'] != hub2.attrib['Status']:
return False
# CHECK SAME ELEMENT NAME,
# DESCRIPTION
# AND ENCODING
if hub1.attrib.get('Name') is not None and hub1.attrib.get('Name') != '' and hub2.attrib.get('Name') is not None and hub2.attrib.get('Name') != '':
if hub1.attrib.get('Description') is not None and hub1.attrib.get('Description') != '' and hub2.attrib.get('Description') is not None and hub2.attrib.get('Description') != '':
if hub1.attrib.get('Encoding') is not None and hub1.attrib.get('Encoding') != '' and hub2.attrib.get('Encoding') is not None and hub2.attrib.get('Encoding') != '':
return (hub1.attrib['Name'] == hub2.attrib['Name']) and (hub1.attrib['Description'] == hub2.attrib['Description']) and (hub1.attrib['Encoding'] == hub2.attrib['Encoding'])
return False
def priorize_hub(hub):
# Priority:
# - ADCS with key
# - ADCS
# - ADC
# - DCHUBS / NMDCS
# - DCHUB / NMDC
# - are there any others?
if urllib.parse.urlparse(hub.attrib['Address']).scheme in supported_schemas_adc_secure:
if urllib.parse.urlparse(hub.attrib['Address']).query.startswith('kp='):
return 1
return 2
elif urllib.parse.urlparse(hub.attrib['Address']).scheme in supported_schemas_adc_unsecure:
return 3
elif urllib.parse.urlparse(hub.attrib['Address']).scheme in supported_schemas_dc_secure:
return 4
elif urllib.parse.urlparse(hub.attrib['Address']).scheme in supported_schemas_dc_unsecure:
return 5
return 6
def hub_merge(hub1, hub2):
# Set attributes with no value in hub1 from value in hub2
for att, _ in attributes:
if att in hub2.attrib:
if (hub1.attrib.get(att) is None or hub1.attrib.get(att) == '') and hub2.attrib.get(att) is not None and hub2.attrib.get(att) != '':
hub1.attrib[att] = hub2.attrib[att]
return hub1
xml_files = []
# Download files (and extract if necessary)
internet_hublists.insert(0, own_hublist)
for url in internet_hublists:
print('Will download hub list from', url)
xml_file = urllib.request.urlopen(url).read()
if url.endswith('.bz2'):
xml_file = bz2.decompress(xml_file)
xml_files.append(xml_file)
# Import local file
for local_hublist in local_hublists:
print('Loading hub list from', local_hublist)
if local_hublist.endswith('.bz2'):
local_hublist = bz2.BZ2File(local_hublist)
root = ET.parse(local_hublist).getroot()
xml_files.append(ET.tostring(root).decode())
# Parsing XML files
hubs_from_xml = []
for xml_file in xml_files:
root = ET.fromstring(xml_file)
for hub_element in root.iter('Hub'):
hub_element.attrib['Address'] = addr_complete(hub_element.attrib['Address'])
# Same for failover
if hub_element.attrib.get('Failover') is not None and hub_element.attrib.get('Failover') != '':
hub_element.attrib['Failover'] = addr_complete(hub_element.attrib['Failover'])
# Delete if no Encoding is set
if urllib.parse.urlparse(hub_element.attrib['Address']).scheme in supported_schemas_dc:
if hub_element.attrib.get('Encoding') is not None and hub_element.attrib.get('Encoding') != '':
if hub_element.attrib['Encoding'].lower() in supported_encoding:
hubs_from_xml.append(hub_element)
else:
print('Unknown encoding:', hub_element.attrib.get('Encoding'), hub_element.attrib['Address'])
elif urllib.parse.urlparse(hub_element.attrib['Address']).scheme in ('adc', 'adcs'):
hub_element.attrib['Encoding'] = 'UTF-8'
hubs_from_xml.append(hub_element)
else:
print('Unknown scheme:', urllib.parse.urlparse(hub_element.attrib['Address']).scheme, hub_element.attrib['Address'])
hubs_from_xml.sort(key=priorize_hub)
clean_hubs = []
while len(hubs_from_xml) != 0:
hub_from_xml = hubs_from_xml[0]
HUB_TO_KEEP = True
if len(sys.argv) >= 2:
cmd = [sys.argv[1], 'ping', hub_from_xml.attrib['Address'], '--out=xml-line', '--hubs=2', '--slots=6', '--share=324882100000']
if urllib.parse.urlparse(hub_from_xml.attrib['Address']).scheme in supported_schemas_dc:
cmd.append('--encoding=' + hub_from_xml.attrib['Encoding'])
output = run(cmd, check=False, stdout=PIPE).stdout
hub_response = ET.fromstring(output)
hub_response.attrib['Address'] = addr_complete(hub_response.attrib['Address'])
print('URL returned by the hub', hub_from_xml.attrib['Address'], '~', hub_response.attrib['Address'])
if hub_response.attrib['Status'] == 'Error':
if hub_response.attrib.get('ErrCode') == '226':
HUB_TO_KEEP = False
elif hub_response.attrib.get('Status') == 'Offline':
HUB_TO_KEEP = False
else:
hub_from_xml.attrib['Status'] = 'Offline'
hub_response = hub_from_xml
else:
hub_response = hub_from_xml
for duplicata_hub in list(hubs_from_xml):
if (duplicate_hub(duplicata_hub, hub_response)):
hub_response = hub_merge(hub_response, duplicata_hub)
hubs_from_xml.remove(duplicata_hub)
# if URL is redirected, we also removed the old URL from the list too
if hub_from_xml.attrib['Address'] != hub_response.attrib['Address']:
for duplicata_hub in list(hubs_from_xml):
if (duplicate_hub(duplicata_hub, hub_from_xml)):
hubs_from_xml.remove(duplicata_hub)
if HUB_TO_KEEP:
clean_hubs.append(hub_response)
# Prepare output file
merge_root = ET.Element('Hublist', Name='The DCNF Hublist', Address='https://dcnf.github.io/Hublist/')
merge_hubs = ET.SubElement(merge_root, 'Hubs')
merge_cols = ET.SubElement(merge_hubs, 'Columns')
# do columns
for name, type_ in attributes:
ET.SubElement(merge_cols, 'Column', Name=name, Type=type_)
# populate hub columns
for hub_add in clean_hubs:
attribs = {}
for name, _ in attributes:
if name in hub_add.attrib and hub_add.attrib.get(name) is not None:
attribs[name] = hub_add.attrib.get(name)
else:
# Inserting no value
attribs[name] = ''
ET.SubElement(merge_hubs, 'Hub', attribs)
indent(merge_root)
merge_tree = ET.ElementTree(merge_root)
merge_tree.write('hublist.xml', encoding='UTF-8', xml_declaration=True)
# bz2
tarbz2contents = bz2.compress(open('hublist.xml', 'rb').read(), 9)
with open('hublist.xml.bz2', 'wb') as fh:
fh.write(tarbz2contents)