Skip to content

Commit 70fda69

Browse files
authored
Merge pull request #58 from cescalara/cescalara-patch-1
Update tests.yml
2 parents d8f8bc2 + b4c5dc3 commit 70fda69

File tree

6 files changed

+83
-45
lines changed

6 files changed

+83
-45
lines changed

.github/workflows/tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
run: |
3535
pytest -vv --cov=icecube_tools/ --cov-report=xml
3636
- name: Upload coverage to Codecov
37-
uses: codecov/codecov-action@v3
37+
uses: codecov/codecov-action@v4
3838
with:
3939
token: ${{ secrets.CODECOV_TOKEN }}
4040
files: ./coverage.xml

docs/markdown/detector_model.md

-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ The `IceCubeData` class can be used for a quick check of the available datasets
3434

3535
```python
3636
my_data = IceCubeData()
37-
my_data.datasets
3837
```
3938

4039
<!-- #region -->

docs/markdown/public_data_access.md

+2-5
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,10 @@ IceCube has a bunch of public datasets available at [https://icecube.wisc.edu/sc
2020
from icecube_tools.utils.data import IceCubeData
2121
```
2222

23-
The `IceCubeData` class provides this functionality. Upon initialisation, `IceCubeData` queries the website using HTTP requests to check what datasets are currently available. By default, this request is cached to avoid spamming the IceCube website. However, you can use the keyword argument `update` to override this.
23+
The `IceCubeData` class provides this functionality.
2424

2525
```python
26-
my_data = IceCubeData(update=True)
27-
28-
# The available datasets
29-
my_data.datasets
26+
my_data = IceCubeData()
3027
```
3128

3229
You can use the `find` method to pick out datasets you are interested in.

icecube_tools/utils/data.py

+75-33
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,32 @@
2323
logger = logging.getLogger(__name__)
2424
logger.setLevel(logging.WARNING)
2525

26-
icecube_data_base_url = "https://icecube.wisc.edu/data-releases"
26+
# icecube_data_base_url = "https://icecube.wisc.edu/data-releases"
2727
data_directory = os.path.abspath(os.path.join(os.path.expanduser("~"), ".icecube_data"))
2828

29+
available_datasets = {
30+
"20210126": {
31+
"url": "https://dataverse.harvard.edu/api/access/dataset/:persistentId/?persistentId=doi:10.7910/DVN/VKL316",
32+
"dir": "20210126_PS-IC40-IC86_VII",
33+
"subdir": "icecube_10year_ps",
34+
},
35+
"20181018": {
36+
"url": "https://icecube.wisc.edu/data-releases/20181018_All-sky_point-source_IceCube_data%20_years_2010-2012.zip",
37+
"dir": "20181018_All-sky_point-source_IceCube_data%20_years_2010-2012",
38+
"subdir": ""
39+
},
40+
"20150820": {
41+
"url": "https://icecube.wisc.edu/data-releases/20150820_Astrophysical_muon_neutrino_flux_in_the_northern_sky_with_2_years_of_IceCube_data.zip",
42+
"dir": "20150820_Astrophysical_muon_neutrino_flux_in_the_northern_sky_with_2_years_of_IceCube_data",
43+
"subdir": ""
44+
},
45+
"20131121": {
46+
"url": "https://icecube.wisc.edu/data-releases/20131121_Search_for_contained_neutrino_events_at_energies_above_30_TeV_in_2_years_of_data.zip",
47+
"dir": "20131121_Search_for_contained_neutrino_events_at_energies_above_30_TeV_in_2_years_of_data",
48+
"subdir": "",
49+
}
50+
}
51+
2952
available_irf_periods = ["IC40", "IC59", "IC79", "IC86_I", "IC86_II"]
3053

3154
available_data_periods = [
@@ -50,10 +73,10 @@ class IceCubeData:
5073

5174
def __init__(
5275
self,
53-
base_url=icecube_data_base_url,
76+
#base_url=icecube_data_base_url,
5477
data_directory=data_directory,
5578
cache_name=".cache",
56-
update=False,
79+
# update=False,
5780
):
5881
"""
5982
Handle the interface with IceCube's public data
@@ -65,7 +88,7 @@ def __init__(
6588
:param update: Refresh the cache if true
6689
"""
6790

68-
self.base_url = base_url
91+
#self.base_url = base_url
6992

7093
self.data_directory = data_directory
7194

@@ -74,21 +97,21 @@ def __init__(
7497
expire_after=-1,
7598
)
7699

77-
self.ls(verbose=False, update=update)
78-
79100
# Make data directory if it doesn't exist
80101
if not os.path.exists(self.data_directory):
81102
os.makedirs(self.data_directory)
82103

104+
83105
def ls(self, verbose=True, update=False):
84106
"""
85107
List the available datasets.
86108
87109
:param verbose: Print the datasets if true
88110
:param update: Refresh the cache if true
89111
"""
112+
raise NotImplementedError()
90113

91-
self.datasets = []
114+
available_datasets = []
92115

93116
if update:
94117
requests_cache.clear()
@@ -104,7 +127,7 @@ def ls(self, verbose=True, update=False):
104127
href = link.get("href")
105128

106129
if ".zip" in href:
107-
self.datasets.append(href)
130+
available_datasets.append(href)
108131

109132
if verbose:
110133
print(href)
@@ -116,7 +139,7 @@ def find(self, search_string):
116139

117140
found_datasets = []
118141

119-
for dataset in self.datasets:
142+
for dataset in available_datasets:
120143
if search_string in dataset:
121144
found_datasets.append(dataset)
122145

@@ -137,44 +160,49 @@ def fetch(self, datasets, overwrite=False, write_to=None):
137160
self.data_directory = write_to
138161

139162
for dataset in datasets:
140-
if dataset not in self.datasets:
163+
if dataset not in available_datasets:
141164
raise ValueError(
142165
"Dataset %s is not in list of known datasets" % dataset
143166
)
144-
145-
url = os.path.join(self.base_url, dataset)
146-
147-
local_path = os.path.join(self.data_directory, dataset)
148-
167+
168+
ds = available_datasets[dataset]
169+
url = ds["url"]
170+
dl_dir = ds["dir"]
171+
local_path = os.path.join(self.data_directory, dl_dir)
172+
subdir = ds["subdir"]
173+
file = os.path.join(local_path, dl_dir+".zip")
149174
# Only fetch if not already there!
150-
if not os.path.exists(os.path.splitext(local_path)[0]) or overwrite:
175+
if not os.path.exists(local_path) or overwrite:
176+
os.makedirs(local_path, exist_ok=True)
151177
# Don't cache this as we want to stream
152178
with requests_cache.disabled():
153179
response = requests.get(url, stream=True)
154180

155181
if response.ok:
156-
total = int(response.headers["content-length"])
157182

158183
# For progress bar description
159184
short_name = dataset
160185
if len(dataset) > 40:
161186
short_name = dataset[0:40] + "..."
162187

163188
# Save locally
164-
with open(local_path, "wb") as f, tqdm(
165-
desc=short_name, total=total
189+
with open(file, "wb") as f, tqdm(
190+
desc=short_name,
166191
) as bar:
167192
for chunk in response.iter_content(chunk_size=1024 * 1024):
168193
size = f.write(chunk)
169194
bar.update(size)
170195

171196
# Unzip
172-
dataset_dir = os.path.splitext(local_path)[0]
173-
with ZipFile(local_path, "r") as zip_ref:
197+
if subdir:
198+
dataset_dir = os.path.join(local_path, subdir)
199+
else:
200+
dataset_dir = local_path
201+
with ZipFile(file, "r") as zip_ref:
174202
zip_ref.extractall(dataset_dir)
175203

176204
# Delete zipfile
177-
os.remove(local_path)
205+
os.remove(file)
178206

179207
# Check for further compressed files in the extraction
180208
tar_files = find_files(dataset_dir, ".tar")
@@ -198,22 +226,28 @@ def fetch_all_to(self, write_to, overwrite=False):
198226
"""
199227
Download all data to a given location
200228
"""
201-
202-
self.fetch(self.datasets, write_to=write_to, overwrite=overwrite)
229+
raise NotImplementedError()
230+
self.fetch(list(available_datasets.keys()), write_to=write_to, overwrite=overwrite)
203231

204232
def get_path_to(self, dataset):
205233
"""
206234
Get path to a given dataset
207235
"""
208236

209-
if dataset not in self.datasets:
237+
if dataset not in available_datasets.keys():
210238
raise ValueError("Dataset is not available")
239+
240+
ds = available_datasets[dataset]
241+
dl_dir = ds["dir"]
242+
local_path = os.path.join(self.data_directory, dl_dir)
243+
subdir = ds["subdir"]
244+
#file = os.path.join(local_path, dl_dir+".zip")
211245

212-
local_zip_loc = os.path.join(self.data_directory, dataset)
246+
#local_zip_loc = os.path.join(self.data_directory, dataset)
213247

214-
local_path = os.path.splitext(local_zip_loc)[0]
248+
path = os.path.join(local_path, subdir)
215249

216-
return local_path
250+
return path
217251

218252

219253
class ddict(dict):
@@ -942,12 +976,20 @@ def from_event_files(
942976
else:
943977
temp = cls(seed=42)
944978
temp.events = {}
945-
temp.events[p] = np.loadtxt(
946-
join(
947-
data_directory,
948-
f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{p}_exp.csv",
979+
try:
980+
temp.events[p] = np.loadtxt(
981+
join(
982+
data_directory,
983+
f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{p}_exp.csv",
984+
)
985+
)
986+
except FileNotFoundError:
987+
temp.events[p] = np.loadtxt(
988+
join(
989+
data_directory,
990+
f"20210126_PS-IC40-IC86_VII/icecube_10year_ps/events/{p}_exp-1.csv",
991+
)
949992
)
950-
)
951993
temp._periods.append(p)
952994
temp._sort()
953995
RealEvents.STACK[p] = temp

tests/test_angular_resolution.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_kappa_conversion():
1414

1515
assert theta_1sigma == approx(theta_p)
1616

17-
17+
"""
1818
def test_angular_resolution():
1919
2020
# Load
@@ -33,7 +33,7 @@ def test_angular_resolution():
3333
3434
# Return angular error
3535
assert ang_res.ret_ang_err == ang_res.get_ret_ang_err(Etrue)
36-
36+
"""
3737

3838
def test_r2021_irf():
3939

tests/test_data_interface.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22

33
my_data = IceCubeData()
44

5-
5+
"""
66
def test_data_scan():
77
88
assert my_data.datasets[1] == "20080911_AMANDA_7_Year_Data.zip"
9-
9+
"""
1010

1111
def test_file_download(output_directory):
1212

13-
found_dataset = my_data.find("AMANDA")
13+
found_dataset = ["20181018"]
1414

1515
my_data.fetch(found_dataset, write_to=output_directory)
1616

0 commit comments

Comments
 (0)