44
55from os import makedirs
66from os import path
7- from urllib .request import urlopen , Request
7+ from urllib .request import Request
88from urllib .error import HTTPError
9- from zipfile import ZipFile , is_zipfile
109
11- from .base import ContentProvider
10+ from .doi import DoiProvider
1211from ..utils import copytree , deep_get
13- from ..utils import normalize_doi , is_doi
14- from .. import __version__
1512
1613
17- class Zenodo (ContentProvider ):
14+ class Zenodo (DoiProvider ):
1815 """Provide contents of a Zenodo deposit."""
1916
20- def _urlopen (self , req , headers = None ):
21- """A urlopen() helper"""
22- # someone passed a string, not a request
23- if not isinstance (req , Request ):
24- req = Request (req )
25-
26- req .add_header ("User-Agent" , "repo2docker {}" .format (__version__ ))
27- if headers is not None :
28- for key , value in headers .items ():
29- req .add_header (key , value )
30-
31- return urlopen (req )
32-
33- def _doi2url (self , doi ):
34- # Transform a DOI to a URL
35- # If not a doi, assume we have a URL and return
36- if is_doi (doi ):
37- doi = normalize_doi (doi )
38-
39- try :
40- resp = self ._urlopen ("https://doi.org/{}" .format (doi ))
41- # If the DOI doesn't resolve, just return URL
42- except HTTPError :
43- return doi
44- return resp .url
45- else :
46- # Just return what is actulally just a URL
47- return doi
48-
49- def detect (self , doi , ref = None , extra_args = None ):
50- """Trigger this provider for things that resolve to a Zenodo/Invenio record"""
17+ def __init__ (self ):
5118 # We need the hostname (url where records are), api url (for metadata),
5219 # filepath (path to files in metadata), filename (path to filename in
5320 # metadata), download (path to file download URL), and type (path to item type in metadata)
54- hosts = [
21+ self . hosts = [
5522 {
5623 "hostname" : ["https://zenodo.org/record/" , "http://zenodo.org/record/" ],
5724 "api" : "https://zenodo.org/api/records/" ,
@@ -73,9 +40,11 @@ def detect(self, doi, ref=None, extra_args=None):
7340 },
7441 ]
7542
76- url = self ._doi2url (doi )
43+ def detect (self , doi , ref = None , extra_args = None ):
44+ """Trigger this provider for things that resolve to a Zenodo/Invenio record"""
45+ url = self .doi2url (doi )
7746
78- for host in hosts :
47+ for host in self . hosts :
7948 if any ([url .startswith (s ) for s in host ["hostname" ]]):
8049 self .record_id = url .rsplit ("/" , maxsplit = 1 )[1 ]
8150 return {"record" : self .record_id , "host" : host }
@@ -90,53 +59,17 @@ def fetch(self, spec, output_dir, yield_output=False):
9059 "{}{}" .format (host ["api" ], record_id ),
9160 headers = {"accept" : "application/json" },
9261 )
93- resp = self ._urlopen (req )
62+ resp = self .urlopen (req )
9463
9564 record = json .loads (resp .read ().decode ("utf-8" ))
9665
97- def _fetch (file_ref , unzip = False ):
98- # the assumption is that `unzip=True` means that this is the only
99- # file related to the zenodo record
100- with self ._urlopen (deep_get (file_ref , host ["download" ])) as src :
101- fname = deep_get (file_ref , host ["filename" ])
102- if path .dirname (fname ):
103- sub_dir = path .join (output_dir , path .dirname (fname ))
104- if not path .exists (sub_dir ):
105- yield "Creating {}\n " .format (sub_dir )
106- makedirs (sub_dir , exist_ok = True )
107-
108- dst_fname = path .join (output_dir , fname )
109- with open (dst_fname , "wb" ) as dst :
110- yield "Fetching {}\n " .format (fname )
111- shutil .copyfileobj (src , dst )
112- # first close the newly written file, then continue
113- # processing it
114- if unzip and is_zipfile (dst_fname ):
115- yield "Extracting {}\n " .format (fname )
116- zfile = ZipFile (dst_fname )
117- zfile .extractall (path = output_dir )
118- zfile .close ()
119-
120- # delete downloaded file ...
121- os .remove (dst_fname )
122- # ... and any directories we might have created,
123- # in which case sub_dir will be defined
124- if path .dirname (fname ):
125- shutil .rmtree (sub_dir )
126-
127- new_subdirs = os .listdir (output_dir )
128- # if there is only one new subdirectory move its contents
129- # to the top level directory
130- if len (new_subdirs ) == 1 :
131- d = new_subdirs [0 ]
132- copytree (path .join (output_dir , d ), output_dir )
133- shutil .rmtree (path .join (output_dir , d ))
134-
13566 is_software = deep_get (record , host ["type" ]).lower () == "software"
13667 files = deep_get (record , host ["filepath" ])
13768 only_one_file = len (files ) == 1
13869 for file_ref in files :
139- for line in _fetch (file_ref , unzip = is_software and only_one_file ):
70+ for line in self .fetch_file (
71+ file_ref , host , output_dir , is_software and only_one_file
72+ ):
14073 yield line
14174
14275 @property
0 commit comments