Skip to content

Add a option for local search to fido client #143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ sg_execution_times.rst

# IDE
.idea
.vscode

# stixpy
stixpy/data/*.fits
1 change: 1 addition & 0 deletions changelog/143.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update `Fido` client to optionally take root path or url. Also add attributes to enable searching for specific versions for example latest version available.
78 changes: 78 additions & 0 deletions stixpy/net/attrs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from sunpy.net.attr import SimpleAttr

__all__ = ["DataType", "DataProduct"]
Expand All @@ -9,6 +11,82 @@
"""


class Version(SimpleAttr):
"""
Exact version of the data file
"""

PATTERN = re.compile(r"V(\d{2})([a-zA-Z]?)")

def __init__(self, version: int):
super().__init__(version)
self.allow_uncompleted = False
self.operator = int.__eq__

def matches(self, ver: str) -> bool:
match = Version.PATTERN.match(ver)
if match is None:
return False

Check warning on line 29 in stixpy/net/attrs.py

View check run for this annotation

Codecov / codecov/patch

stixpy/net/attrs.py#L29

Added line #L29 was not covered by tests
v = int(match.group(1))
u = match.group(2)

ver_res = self.operator(v, self.value)
u_res = u in ["", "U"] if self.allow_uncompleted else u == ""

return ver_res and u_res


class VersionU(Version):
"""
min version of the data file
"""

def __init__(self, version: int):
super().__init__(version)
self.allow_uncompleted = True
self.operator = int.__eq__


class MinVersion(Version):
"""
min version of the data file
"""

def __init__(self, version: int):
super().__init__(version)
self.operator = int.__ge__


class MinVersionU(VersionU):
"""
min version of the data file
"""

def __init__(self, version: int):
super().__init__(version)
self.operator = int.__ge__


class MaxVersion(Version):
"""
max version of the data file
"""

def __init__(self, version: int):
super().__init__(version)
self.operator = int.__lt__


class MaxVersionU(VersionU):
"""
max version of the data file
"""

def __init__(self, version: int):
super().__init__(version)
self.operator = int.__lt__


class DataProduct(SimpleAttr):
"""
Data product
Expand Down
116 changes: 94 additions & 22 deletions stixpy/net/client.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,36 @@
import numpy as np
from sunpy.net import attrs as a
from sunpy.net.attr import SimpleAttr
from sunpy.net.dataretriever import GenericClient
from sunpy.net.dataretriever.client import QueryResponse
from sunpy.time import TimeRange

from stixpy.net.attrs import MaxVersion, MaxVersionU, MinVersion, MinVersionU, Version, VersionU

try:
from sunpy.net.scraper import Scraper
except ModuleNotFoundError:
from sunpy.util.scraper import Scraper

__all__ = ["STIXClient"]
__all__ = ["STIXClient", "StixQueryResponse"]


class StixQueryResponse(QueryResponse):
def filter_for_latest_version(self, allow_uncompleted=False):
self["tidx"] = range(len(self))
grouped_res = self.group_by(
["Start Time", "End Time", "Instrument", "Level", "DataType", "DataProduct", "Request ID"]
)
keep = np.zeros(len(self), dtype=bool)
for key, group in zip(grouped_res.groups.keys, grouped_res.groups):
group.sort("Ver")
if not allow_uncompleted:
incomplete = np.char.endswith(group["Ver"].data, "U")
keep[group[~incomplete][-1]["tidx"]] = True
else:
keep[group[-1]["tidx"]] = True
self.remove_column("tidx")
self.remove_rows(np.where(~keep))


class STIXClient(GenericClient):
Expand All @@ -35,17 +57,30 @@
<BLANKLINE>
"""

baseurl = r"https://pub099.cs.technik.fhnw.ch/data/fits/{level}/{year:4d}/{month:02d}/{day:02d}/{datatype}/"
ql_filename = r"solo_{level}_stix-{product}_\d{{8}}_V\d{{2}}\D?.fits"
sci_filename = r"solo_{level}_stix-{product}_" r"\d{{8}}T\d{{6}}-\d{{8}}T\d{{6}}_V\d{{2}}\D?_.*.fits"
baseurl = r"https://pub099.cs.technik.fhnw.ch/data/fits"
datapath = r"{level}/{year:4d}/{month:02d}/{day:02d}/{datatype}/"

ql_filename = r"solo_{level}_stix-{product}_[0-9]{{8}}_V.*.fits"
sci_filename = r"solo_{level}_stix-{product}_[0-9]{{8}}T[0-9]{{6}}-[0-9]{{8}}T[0-9]{{6}}_V.*.fits"

base_pattern = r"{}/{Level}/{year:4d}/{month:02d}/{day:02d}/{DataType}/"
ql_pattern = r"solo_{Level}_{descriptor}_{time}_{Ver}.fits"
sci_pattern = r"solo_{Level}_{descriptor}_{start}-{end}_{Ver}_{Request}-{tc}.fits"

required = {a.Time, a.Instrument}

def search(self, *args, **kwargs):
def __init__(self, *, source="https://pub099.cs.technik.fhnw.ch/data/fits") -> None:
"""Creates a Fido client to search and download STIX data from the STIX instrument archive

Parameters
----------
source : str, optional
a url like path to alternative data source. You can provide a local filesystem path here. by default "https://pub099.cs.technik.fhnw.ch/data/fits/"
"""
super().__init__()
self.baseurl = source + r"/{level}/{year:4d}/{month:02d}/{day:02d}/{datatype}/"

def search(self, *args, **kwargs) -> StixQueryResponse:
"""
Query this client for a list of results.

Expand All @@ -63,6 +98,12 @@
matchdict = self._get_match_dict(*args, **kwargs)
levels = matchdict["Level"]

versions = []
for versionAttrType in [Version, VersionU, MinVersion, MinVersionU, MaxVersion, MaxVersionU]:
if versionAttrType.__name__ in matchdict:
for version in matchdict[versionAttrType.__name__]:
versions.append(versionAttrType(int(version)))

metalist = []
tr = TimeRange(matchdict["Start Time"], matchdict["End Time"])
for date in tr.get_dates():
Expand All @@ -85,7 +126,7 @@
elif datatype.lower() == "cal" and product.startswith("cal"):
url = self.baseurl + self.ql_filename
pattern = self.base_pattern + self.ql_pattern
elif datatype.lower() == "asp" and product.startswith("asp"):
elif datatype.lower() in ["asp"] and product.endswith("ephemeris"):
url = self.baseurl + self.ql_filename
pattern = self.base_pattern + self.ql_pattern

Expand All @@ -99,24 +140,54 @@
)

scraper = Scraper(url, regex=True)
filesmeta = scraper._extract_files_meta(tr, extractor=pattern)
for i in filesmeta:
rowdict = self.post_search_hook(i, matchdict)
file_tr = rowdict.pop("tr", None)
if file_tr is not None:
# 4 cases file time full in, fully our start in or ends in
if file_tr.start >= tr.start and file_tr.end <= tr.end:
metalist.append(rowdict)
elif tr.start <= file_tr.start and tr.end >= file_tr.end:
metalist.append(rowdict)
elif file_tr.start <= tr.start <= file_tr.end:
try:
filesmeta = scraper._extract_files_meta(tr, extractor=pattern)

for i in filesmeta:
rowdict = self.post_search_hook(i, matchdict)

versionTest = True
for versionAttr in versions:
versionTest &= versionAttr.matches(rowdict["Ver"])
if not versionTest:
break
if not versionTest:
continue

file_tr = rowdict.pop("tr", None)
if file_tr is not None:
# 4 cases file time full in, fully our start in or end in
if file_tr.start >= tr.start and file_tr.end <= tr.end:
metalist.append(rowdict)
elif tr.start <= file_tr.start and tr.end >= file_tr.end:
metalist.append(rowdict)

Check warning on line 163 in stixpy/net/client.py

View check run for this annotation

Codecov / codecov/patch

stixpy/net/client.py#L163

Added line #L163 was not covered by tests
elif file_tr.start <= tr.start <= file_tr.end:
metalist.append(rowdict)
elif file_tr.start <= tr.end <= file_tr.end:
metalist.append(rowdict)
else:
metalist.append(rowdict)
elif file_tr.start <= tr.end <= file_tr.end:
metalist.append(rowdict)
else:
metalist.append(rowdict)
except FileNotFoundError:
continue
return StixQueryResponse(metalist, client=self)

return QueryResponse(metalist, client=self)
@classmethod
def _can_handle_query(cls, *query):
"""
Method the
`sunpy.net.fido_factory.UnifiedDownloaderFactory`
class uses to dispatch queries to this Client.
"""
regattrs_dict = cls.register_values()
optional = {k for k in regattrs_dict.keys()} - cls.required
if not cls.check_attr_types_in_query(query, cls.required, optional):
return False

Check warning on line 184 in stixpy/net/client.py

View check run for this annotation

Codecov / codecov/patch

stixpy/net/client.py#L184

Added line #L184 was not covered by tests
for key in regattrs_dict:
all_vals = [i[0].lower() for i in regattrs_dict[key]]
for x in query:
if isinstance(x, key) and issubclass(key, SimpleAttr) and str(x.value).lower() not in all_vals:
return False

Check warning on line 189 in stixpy/net/client.py

View check run for this annotation

Codecov / codecov/patch

stixpy/net/client.py#L189

Added line #L189 was not covered by tests
return True

def post_search_hook(self, exdict, matchdict):
rowdict = super().post_search_hook(exdict, matchdict)
Expand Down Expand Up @@ -152,6 +223,7 @@
attrs.Level: [
("L0", "STIX: commutated, uncompressed, uncalibrated data."),
("L1", "STIX: Engineering and UTC time conversion ."),
("ANC", "STIX: Ancillary Data like aspect."),
("L2", "STIX: Calibrated data."),
("ANC", "STIX: Ancillary data."),
],
Expand Down
Loading
Loading