23
23
logger = logging .getLogger (__name__ )
24
24
logger .setLevel (logging .WARNING )
25
25
26
- icecube_data_base_url = "https://icecube.wisc.edu/data-releases"
26
+ # icecube_data_base_url = "https://icecube.wisc.edu/data-releases"
27
27
data_directory = os .path .abspath (os .path .join (os .path .expanduser ("~" ), ".icecube_data" ))
28
28
29
+ available_datasets = {
30
+ "20210126" : {
31
+ "url" : "https://dataverse.harvard.edu/api/access/dataset/:persistentId/?persistentId=doi:10.7910/DVN/VKL316" ,
32
+ "dir" : "20210126_PS-IC40-IC86_VII" ,
33
+ "subdir" : "icecube_10year_ps" ,
34
+ },
35
+ "20181018" : {
36
+ "url" : "https://icecube.wisc.edu/data-releases/20181018_All-sky_point-source_IceCube_data%20_years_2010-2012.zip" ,
37
+ "dir" : "20181018_All-sky_point-source_IceCube_data%20_years_2010-2012" ,
38
+ "subdir" : ""
39
+ },
40
+ "20150820" : {
41
+ "url" : "https://icecube.wisc.edu/data-releases/20150820_Astrophysical_muon_neutrino_flux_in_the_northern_sky_with_2_years_of_IceCube_data.zip" ,
42
+ "dir" : "20150820_Astrophysical_muon_neutrino_flux_in_the_northern_sky_with_2_years_of_IceCube_data" ,
43
+ "subdir" : ""
44
+ },
45
+ "20131121" : {
46
+ "url" : "https://icecube.wisc.edu/data-releases/20131121_Search_for_contained_neutrino_events_at_energies_above_30_TeV_in_2_years_of_data.zip" ,
47
+ "dir" : "20131121_Search_for_contained_neutrino_events_at_energies_above_30_TeV_in_2_years_of_data" ,
48
+ "subdir" : "" ,
49
+ }
50
+ }
51
+
29
52
available_irf_periods = ["IC40" , "IC59" , "IC79" , "IC86_I" , "IC86_II" ]
30
53
31
54
available_data_periods = [
@@ -50,10 +73,10 @@ class IceCubeData:
50
73
51
74
def __init__ (
52
75
self ,
53
- base_url = icecube_data_base_url ,
76
+ # base_url=icecube_data_base_url,
54
77
data_directory = data_directory ,
55
78
cache_name = ".cache" ,
56
- update = False ,
79
+ # update=False,
57
80
):
58
81
"""
59
82
Handle the interface with IceCube's public data
@@ -65,7 +88,7 @@ def __init__(
65
88
:param update: Refresh the cache if true
66
89
"""
67
90
68
- self .base_url = base_url
91
+ # self.base_url = base_url
69
92
70
93
self .data_directory = data_directory
71
94
@@ -74,21 +97,21 @@ def __init__(
74
97
expire_after = - 1 ,
75
98
)
76
99
77
- self .ls (verbose = False , update = update )
78
-
79
100
# Make data directory if it doesn't exist
80
101
if not os .path .exists (self .data_directory ):
81
102
os .makedirs (self .data_directory )
82
103
104
+
83
105
def ls (self , verbose = True , update = False ):
84
106
"""
85
107
List the available datasets.
86
108
87
109
:param verbose: Print the datasets if true
88
110
:param update: Refresh the cache if true
89
111
"""
112
+ raise NotImplementedError ()
90
113
91
- self . datasets = []
114
+ available_datasets = []
92
115
93
116
if update :
94
117
requests_cache .clear ()
@@ -104,7 +127,7 @@ def ls(self, verbose=True, update=False):
104
127
href = link .get ("href" )
105
128
106
129
if ".zip" in href :
107
- self . datasets .append (href )
130
+ available_datasets .append (href )
108
131
109
132
if verbose :
110
133
print (href )
@@ -116,7 +139,7 @@ def find(self, search_string):
116
139
117
140
found_datasets = []
118
141
119
- for dataset in self . datasets :
142
+ for dataset in available_datasets :
120
143
if search_string in dataset :
121
144
found_datasets .append (dataset )
122
145
@@ -137,44 +160,49 @@ def fetch(self, datasets, overwrite=False, write_to=None):
137
160
self .data_directory = write_to
138
161
139
162
for dataset in datasets :
140
- if dataset not in self . datasets :
163
+ if dataset not in available_datasets :
141
164
raise ValueError (
142
165
"Dataset %s is not in list of known datasets" % dataset
143
166
)
144
-
145
- url = os .path .join (self .base_url , dataset )
146
-
147
- local_path = os .path .join (self .data_directory , dataset )
148
-
167
+
168
+ ds = available_datasets [dataset ]
169
+ url = ds ["url" ]
170
+ dl_dir = ds ["dir" ]
171
+ local_path = os .path .join (self .data_directory , dl_dir )
172
+ subdir = ds ["subdir" ]
173
+ file = os .path .join (local_path , dl_dir + ".zip" )
149
174
# Only fetch if not already there!
150
- if not os .path .exists (os .path .splitext (local_path )[0 ]) or overwrite :
175
+ if not os .path .exists (local_path ) or overwrite :
176
+ os .makedirs (local_path , exist_ok = True )
151
177
# Don't cache this as we want to stream
152
178
with requests_cache .disabled ():
153
179
response = requests .get (url , stream = True )
154
180
155
181
if response .ok :
156
- total = int (response .headers ["content-length" ])
157
182
158
183
# For progress bar description
159
184
short_name = dataset
160
185
if len (dataset ) > 40 :
161
186
short_name = dataset [0 :40 ] + "..."
162
187
163
188
# Save locally
164
- with open (local_path , "wb" ) as f , tqdm (
165
- desc = short_name , total = total
189
+ with open (file , "wb" ) as f , tqdm (
190
+ desc = short_name ,
166
191
) as bar :
167
192
for chunk in response .iter_content (chunk_size = 1024 * 1024 ):
168
193
size = f .write (chunk )
169
194
bar .update (size )
170
195
171
196
# Unzip
172
- dataset_dir = os .path .splitext (local_path )[0 ]
173
- with ZipFile (local_path , "r" ) as zip_ref :
197
+ if subdir :
198
+ dataset_dir = os .path .join (local_path , subdir )
199
+ else :
200
+ dataset_dir = local_path
201
+ with ZipFile (file , "r" ) as zip_ref :
174
202
zip_ref .extractall (dataset_dir )
175
203
176
204
# Delete zipfile
177
- os .remove (local_path )
205
+ os .remove (file )
178
206
179
207
# Check for further compressed files in the extraction
180
208
tar_files = find_files (dataset_dir , ".tar" )
@@ -198,22 +226,28 @@ def fetch_all_to(self, write_to, overwrite=False):
198
226
"""
199
227
Download all data to a given location
200
228
"""
201
-
202
- self .fetch (self . datasets , write_to = write_to , overwrite = overwrite )
229
+ raise NotImplementedError ()
230
+ self .fetch (list ( available_datasets . keys ()) , write_to = write_to , overwrite = overwrite )
203
231
204
232
def get_path_to (self , dataset ):
205
233
"""
206
234
Get path to a given dataset
207
235
"""
208
236
209
- if dataset not in self . datasets :
237
+ if dataset not in available_datasets . keys () :
210
238
raise ValueError ("Dataset is not available" )
239
+
240
+ ds = available_datasets [dataset ]
241
+ dl_dir = ds ["dir" ]
242
+ local_path = os .path .join (self .data_directory , dl_dir )
243
+ subdir = ds ["subdir" ]
244
+ #file = os.path.join(local_path, dl_dir+".zip")
211
245
212
- local_zip_loc = os .path .join (self .data_directory , dataset )
246
+ # local_zip_loc = os.path.join(self.data_directory, dataset)
213
247
214
- local_path = os .path .splitext ( local_zip_loc )[ 0 ]
248
+ path = os .path .join ( local_path , subdir )
215
249
216
- return local_path
250
+ return path
217
251
218
252
219
253
class ddict (dict ):
@@ -942,12 +976,20 @@ def from_event_files(
942
976
else :
943
977
temp = cls (seed = 42 )
944
978
temp .events = {}
945
- temp .events [p ] = np .loadtxt (
946
- join (
947
- data_directory ,
948
- f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{ p } _exp.csv" ,
979
+ try :
980
+ temp .events [p ] = np .loadtxt (
981
+ join (
982
+ data_directory ,
983
+ f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{ p } _exp.csv" ,
984
+ )
985
+ )
986
+ except FileNotFoundError :
987
+ temp .events [p ] = np .loadtxt (
988
+ join (
989
+ data_directory ,
990
+ f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{ p } _exp-1.csv" ,
991
+ )
949
992
)
950
- )
951
993
temp ._periods .append (p )
952
994
temp ._sort ()
953
995
RealEvents .STACK [p ] = temp
0 commit comments