Skip to content

Commit

Permalink
feat: update nhm plugin to work with vds vNext
Browse files Browse the repository at this point in the history
BREAKING CHANGE: update nhm plugin to work with vds vNext

Additionally, this removes the record_show action from this plugin and moves it to the vds plugin (where it still can be called as record_show given it aligns well with ckan's naming conventions, but also can be called as vds_data_get to align with vds's naming conventions). There is also a new action, datastore_search, which chains the datastore_search action from vds (or others!) and adds the include_urls parameter.
  • Loading branch information
jrdh committed Sep 16, 2024
1 parent 3e6b1e4 commit 9d7d176
Show file tree
Hide file tree
Showing 28 changed files with 10,735 additions and 363 deletions.
9 changes: 4 additions & 5 deletions ckanext/nhm/dcat/specimen_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
Namespaces,
object_uri,
as_dwc_list,
epoch_to_datetime,
)
from ckanext.nhm.lib.dwc import dwc_terms
from ckanext.nhm.lib.helpers import get_department
Expand Down Expand Up @@ -95,7 +94,7 @@ def __init__(
self.version = version

# figure out the rounded version of the record
self.rounded_version = toolkit.get_action('datastore_get_rounded_version')(
self.rounded_version = toolkit.get_action('vds_version_round')(
{},
{
'resource_id': self.record.resource_id,
Expand Down Expand Up @@ -261,7 +260,7 @@ def _cetaf_cspp(self):
yield (
self.record_ref,
self.namespaces.dc.created,
Literal(epoch_to_datetime(self.record.data['created'])),
Literal(self.record.data['created']),
)
yield self.record_ref, self.namespaces.dc.publisher, URIRef('https://nhm.ac.uk')

Expand Down Expand Up @@ -381,15 +380,15 @@ def _dwc(self):
yield (
self.record_ref,
self.namespaces.dc.created,
Literal(epoch_to_datetime(self.record.data['created'])),
Literal(self.record.data['created']),
)

if self.record.data.get('modified', None) is not None:
# yield the modified date in the correct format
yield (
self.record_ref,
self.namespaces.dwc.modified,
Literal(epoch_to_datetime(self.record.data['modified'])),
Literal(self.record.data['modified']),
)

def _version_info(self):
Expand Down
16 changes: 2 additions & 14 deletions ckanext/nhm/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
# This file is part of ckanext-nhm
# Created by the Natural History Museum in London, UK

from ckanext.nhm.lib.helpers import get_specimen_resource_id
from datetime import datetime
from rdflib import namespace

from ckan.plugins import toolkit
from ckanext.nhm.lib.helpers import get_specimen_resource_id


def object_uri(uuid, version=None):
Expand All @@ -35,18 +34,7 @@ def as_dwc_list(objects):
:param objects: the objects
:return: a | separated string
"""
return ' | '.join(objects)


def epoch_to_datetime(epoch_timestamp):
"""
Converts the given epoch timestamp into a datetime object. The timestamp passed in
is assumed to be the integer number of milliseconds since the UNIX epoch.
:param epoch_timestamp: the integer number of milliseconds since the UNIX epoch
:return: a datetime object
"""
return datetime.fromtimestamp(epoch_timestamp / 1000.0)
return " | ".join(objects)


class Namespaces:
Expand Down
14 changes: 7 additions & 7 deletions ckanext/nhm/lib/filter_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# Created by the Natural History Museum in London, UK

from elasticsearch_dsl import Q
from splitgill.indexing.fields import DocumentField
from splitgill.search import exists_query, term_query


class FilterOption:
Expand Down Expand Up @@ -43,18 +45,16 @@ def as_dict(self):


# define some simple, common filters
has_image = FilterOption(
'_has_image', 'Has image', Q('exists', field='data.associatedMedia')
)
has_image = FilterOption("_has_image", "Has image", exists_query("associatedMedia"))

has_lat_long = FilterOption(
'_has_lat_long', 'Has lat/long', Q('exists', field='meta.geo')
"_has_lat_long", "Has lat/long", Q("exists", field=DocumentField.ALL_POINTS)
)

exclude_mineralogy = FilterOption(
'_exclude_mineralogy',
'Exclude Mineralogy',
"_exclude_mineralogy",
"Exclude Mineralogy",
# note the ~ which inverts the query
~Q('term', **{'data.collectionCode': 'min'}),
~term_query("collectionCode", "min", case_sensitive=False),
hide=True,
)
80 changes: 44 additions & 36 deletions ckanext/nhm/lib/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,28 @@ def get_record_count():


@cache_region('collection_stats', 'record_stats')
def get_record_stats():
start_version = 1501545600
end_version = int(time.time())
count_action = toolkit.get_action('datastore_count')

record_stats = []
def get_record_stats() -> List[dict]:
"""
Returns a list of dictionaries containing statistics about the number of records
available each week starting from 01/08/2017 and ending now.
for v in range(start_version, end_version, 604800):
record_stats.append(
{
'date': datetime.fromtimestamp(v),
'count': count_action({}, {'version': v * 1000}),
}
)
:return: a list of dicts
"""
# 01/08/2017 as ms epoch
start_version = 1501545600000
# now as ms epoch
end_version = int(time.time() * 1000)
# 1 week in ms
step = 604800000
count_action = toolkit.get_action("vds_multi_count")

return record_stats
return [
{
'date': datetime.fromtimestamp(version / 1000),
'count': count_action({}, {'version': version})["total"],
}
for version in range(start_version, end_version, step)
]


def _get_action(action, params):
Expand Down Expand Up @@ -156,7 +162,7 @@ def get_record(resource_id, record_id):
:param record_id: the ID of the record
"""
record = _get_action(
'record_show', {'resource_id': resource_id, 'record_id': record_id}
"vds_data_get", {"resource_id": resource_id, "record_id": record_id}
)
return record.get('data', None)

Expand Down Expand Up @@ -278,7 +284,7 @@ def get_nhm_organisation_id():
:returns: ID for the NHM organisation
"""
value = toolkit.config.get('ldap.organization.id')
value = toolkit.config.get("ldap.organization.id")
return str(value) if value is not None else None


Expand All @@ -294,6 +300,7 @@ def is_collection_resource_id(resource_id: str) -> bool:
get_artefact_resource_id(),
get_indexlot_resource_id(),
get_specimen_resource_id(),
get_sample_resource_id(),
}
return resource_id in resource_ids

Expand All @@ -319,32 +326,34 @@ def get_indexlot_resource_id():


def get_artefact_resource_id():
'''
@return: ID for artefact resource
'''
return toolkit.config.get('ckanext.nhm.artefact_resource_id')


def get_sample_resource_id() -> str:
return toolkit.config.get("ckanext.nhm.sample_resource_id")


def get_beetle_iiif_resource_id():
"""
Get the ID for the beetle IIIF resource.
:return: the resource id
"""
value = toolkit.config.get('ckanext.nhm.beetle_iiif_resource_id')
value = toolkit.config.get("ckanext.nhm.beetle_iiif_resource_id")
return str(value) if value is not None else None


@cache_region('collection_stats', 'collection_stats')
@cache_region("collection_stats", "collection_stats")
def collection_stats():
"""
Get collection stats, including collection codes and collection totals.
"""
stats = {}
collections = [
('artefacts', get_artefact_resource_id()),
('indexlots', get_indexlot_resource_id()),
('specimens', get_specimen_resource_id()),
("artefacts", get_artefact_resource_id()),
("indexlots", get_indexlot_resource_id()),
("specimens", get_specimen_resource_id()),
("samples", get_sample_resource_id()),
]

collections_total = 0
Expand All @@ -353,7 +362,7 @@ def collection_stats():
'resource_id': resource_id,
'limit': 0,
}
stats[name] = toolkit.get_action('datastore_search')({}, params)['total']
stats[name] = toolkit.get_action("vds_basic_count")({}, params)
collections_total += stats[name]
stats['total'] = collections_total

Expand All @@ -375,7 +384,7 @@ def collection_stats():
}
},
}
total = toolkit.get_action('datastore_multisearch')({}, params)['total']
total = toolkit.get_action("vds_multi_count")({}, params)["total"]
collection_code_counts.append((collection_code, total))

collection_code_counts.sort(key=operator.itemgetter(1), reverse=True)
Expand Down Expand Up @@ -553,7 +562,7 @@ def get_resource_fields(resource, version=None, use_request_version=False):
if '__version__' in filters:
data['version'] = int(filters['__version__'][0])

result = toolkit.get_action('datastore_search')({}, data)
result = toolkit.get_action("vds_basic_query")({}, data)
return [field['id'] for field in result.get('fields', [])]


Expand Down Expand Up @@ -1031,7 +1040,7 @@ def get_resource_facets(resource):
if toolkit.h.get_param_int('_{}_limit'.format(field_name)) == 0:
search_params.setdefault('facet_limits', {})[field_name] = 50

search = toolkit.get_action('datastore_search')(context, search_params)
search = toolkit.get_action("vds_basic_query")(context, search_params)
facets = []

# dictionary of facet name => formatter function with camel_case_to_string defined
Expand Down Expand Up @@ -1249,7 +1258,7 @@ def resource_view_get_filterable_fields(resource):
'resource_id': resource['id'],
'limit': 0,
}
fields = toolkit.get_action('datastore_search')({}, data).get('fields', [])
fields = toolkit.get_action("vds_basic_query")({}, data).get("fields", [])

# sort and filter the fields ensuring we only return string type fields and don't
# return the id
Expand Down Expand Up @@ -1280,7 +1289,7 @@ def _get_latest_update(package_or_resource_dicts):
"""
# a list of fields on the resource that should contain update dates
fields = ['last_modified', 'revision_timestamp', 'Created']
get_rounded_version = toolkit.get_action('datastore_get_rounded_version')
get_rounded_version = toolkit.get_action("vds_version_round")

latest_dict = None
latest_date = None
Expand Down Expand Up @@ -1398,7 +1407,7 @@ def get_object_url(resource_id, guid, version=None, include_version=True):
:return: the object url
"""
if include_version:
rounded_version = toolkit.get_action('datastore_get_rounded_version')(
rounded_version = toolkit.get_action("vds_version_round")(
{},
{
'resource_id': resource_id,
Expand Down Expand Up @@ -1537,13 +1546,12 @@ def group_resources(resource_list):


def get_resource_size(resource_dict):
prefixes = 'KMGTPEZ' # kilo, mega, giga, etc. I could make this a list but what's the point
if toolkit.get_action('datastore_is_datastore_resource')(
if toolkit.get_action("vds_resource_check")(
{}, {'resource_id': resource_dict['id']}
):
try:
records = toolkit.get_action('datastore_count')(
{}, {'resource_ids': [resource_dict['id']]}
records = toolkit.get_action("vds_basic_count")(
{}, {'resource_id': resource_dict['id']}
)
return f'{records} records'
except:
Expand Down Expand Up @@ -1572,7 +1580,7 @@ def get_record_permalink(
'record_id': record_dict['_id'],
}
if include_version:
rounded_version = toolkit.get_action('datastore_get_rounded_version')(
rounded_version = toolkit.get_action("vds_version_round")(
{},
{
'resource_id': resource_dict['id'],
Expand Down
21 changes: 11 additions & 10 deletions ckanext/nhm/lib/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_record_by_uuid(uuid, version=None) -> Optional['Record']:
'version': version,
}
# retrieve datastore record
search_result = toolkit.get_action('datastore_search')(
search_result = toolkit.get_action("vds_basic_query")(
context, search_data_dict
)
records = search_result['records']
Expand Down Expand Up @@ -183,7 +183,7 @@ def data(self) -> dict:
data_dict = dict(record_id=self.id, resource_id=self.resource_id)
if self.version is not None:
data_dict['version'] = self.version
self._data = toolkit.get_action('record_show')(self._context, data_dict)[
self._data = toolkit.get_action("vds_data_get")(self._context, data_dict)[
'data'
]
return self._data
Expand Down Expand Up @@ -384,7 +384,8 @@ def geojson(self) -> Optional[dict]:
extract the values from the record data and return a GeoJSON compatible Point
where the record is located.
:return: None if the latitude and longitude couldn't be identified or a GeoJSON Point
:return: None if the latitude and longitude couldn't be identified or a GeoJSON
Point
"""
lat_field = self.resource.get(
LATITUDE_FIELD, DWC_LATITUDE if self.is_dwc else None
Expand All @@ -396,12 +397,12 @@ def geojson(self) -> Optional[dict]:
if not lat_field or not lon_field:
return None

latitude = self.data.get(lat_field)
longitude = self.data.get(lon_field)

if latitude is None or longitude is None:
try:
latitude = float(self.data.get(lat_field))
longitude = float(self.data.get(lon_field))
except (ValueError, TypeError):
return None

# create a piece of GeoJSON to point at the specific record location on a map (note the
# longitude then latitude ordering required by GeoJSON)
return dict(type='Point', coordinates=[float(longitude), float(latitude)])
# create a piece of GeoJSON to point at the specific record location on a map
# (note the longitude then latitude ordering required by GeoJSON)
return dict(type="Point", coordinates=[longitude, latitude])
Loading

0 comments on commit 9d7d176

Please sign in to comment.