-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheex-check.py
23 lines (19 loc) · 943 Bytes
/
eex-check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import requests
import re
# quick & dirty script to scan EEX and look for links that may already be in the data catalog or DECDG systems
url = 'https://energydata.info/api/3/action/package_list'
regex_str = '^https?://(data|databank|datacatalog).worldbank.org/';
response = requests.get(url).json()
for id in response['result']:
url = 'https://energydata.info/api/3/action/package_show?id={}'.format(id)
response = requests.get(url).json()
dataset = response['result']
try:
if dataset.get('organization',{}).get('name','') == 'world-bank-grou':
if re.match(regex_str, dataset.get('url', '')):
print 'Dataset URL: {} {}'.format(id, dataset['url'])
for i in dataset.get('resources',[]):
if re.match('^https?://(data|databank|datacatalog).worldbank.org/', i['url']):
print 'Resource URL: {} {}'.format(id, i['url'])
except:
raise