From 5a3291bb70b91ea1afd79d7fd02638c5331af5bf Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 22 Nov 2024 13:26:38 -0500 Subject: [PATCH] expose links to all export formats via Signposting #10542 --- doc/release-notes/10542-signposting.md | 9 ++++ .../source/admin/discoverability.rst | 2 +- doc/sphinx-guides/source/api/native-api.rst | 14 ++++-- .../source/user/dataset-management.rst | 2 + .../export/SchemaDotOrgExporter.java | 6 ++- .../dataverse/util/SignpostingResources.java | 45 ++++++++++++++----- .../iq/dataverse/api/SignpostingIT.java | 22 +++++++++ 7 files changed, 84 insertions(+), 16 deletions(-) create mode 100644 doc/release-notes/10542-signposting.md diff --git a/doc/release-notes/10542-signposting.md b/doc/release-notes/10542-signposting.md new file mode 100644 index 00000000000..b6429310323 --- /dev/null +++ b/doc/release-notes/10542-signposting.md @@ -0,0 +1,9 @@ +# Signposting Output Now Contains Links to All Dataset Metadata Export Formats + +When Signposting was added in Dataverse 5.14 (#8981), it only provided links for the `schema.org` metadata export format. + +The output of HEAD, GET, and the Signposting "linkset" API have all been updated to include links to all available dataset metadata export formats (including any external exporters, such as Croissant, that have been enabled). + +This provides a lightweight machine-readable way to first retrieve a list of links (via a HTTP HEAD request, for example) to each available metadata export format and then follow up with a request for the export format of interest. + +See also [the docs](https://preview.guides.gdcc.io/en/develop/api/native-api.html#retrieve-signposting-information) and #10542. diff --git a/doc/sphinx-guides/source/admin/discoverability.rst b/doc/sphinx-guides/source/admin/discoverability.rst index 19ef7250a29..22ff66246f0 100644 --- a/doc/sphinx-guides/source/admin/discoverability.rst +++ b/doc/sphinx-guides/source/admin/discoverability.rst @@ -51,7 +51,7 @@ The Dataverse team has been working with Google on both formats. Google has `ind Signposting +++++++++++ -The Dataverse software supports `Signposting `_. This allows machines to request more information about a dataset through the `Link `_ HTTP header. +The Dataverse software supports `Signposting `_. This allows machines to request more information about a dataset through the `Link `_ HTTP header. Links to all enabled metadata export formats are given. See :ref:`metadata-export-formats` for a list. There are 2 Signposting profile levels, level 1 and level 2. In this implementation, * Level 1 links are shown `as recommended `_ in the "Link" diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2bf0cd5c635..2bb27677b9b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1352,9 +1352,16 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/export?exporter=ddi&persistentId=doi:10.5072/FK2/J8SJZB" -.. note:: Supported exporters (export formats) are ``ddi``, ``oai_ddi``, ``dcterms``, ``oai_dc``, ``schema.org`` , ``OAI_ORE`` , ``Datacite``, ``oai_datacite`` and ``dataverse_json``. Descriptive names can be found under :ref:`metadata-export-formats` in the User Guide. +Available Dataset Metadata Exporters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Dataset metadata exporters that ship with Dataverse are ``ddi``, ``oai_ddi``, ``dcterms``, ``oai_dc``, ``schema.org`` , ``OAI_ORE`` , ``Datacite``, ``oai_datacite`` and ``dataverse_json``. These are the strings to pass as ``$METADATA_FORMAT`` in the examples above. Descriptive names for each format can be found under :ref:`metadata-export-formats` in the User Guide. + +Additional exporters can be enabled, as described under :ref:`external-exporters` in the Installation Guide. They are listed under :ref:`inventory-of-external-exporters`. + +To discover the machine-readable name of exporters (e.g. ``ddi``) that have been enabled on the installation of Dataverse you are using, you can use the Signposting "linkset" API documented under :ref:`signposting-api`. -.. note:: Additional exporters can be enabled, as described under :ref:`external-exporters` in the Installation Guide. To discover the machine-readable name of each exporter (e.g. ``ddi``), check :ref:`inventory-of-external-exporters` or ``getFormatName`` in the exporter's source code. +To discover the machine-readable name of exporters generally, check :ref:`inventory-of-external-exporters` or ``getFormatName`` in the exporter's source code. Schema.org JSON-LD ^^^^^^^^^^^^^^^^^^ @@ -1368,6 +1375,7 @@ Both forms are valid according to Google's Structured Data Testing Tool at https The standard has further evolved into a format called Croissant. For details, see :ref:`schema.org-head` in the Admin Guide. +The ``schema.org`` format changed after Dataverse 6.4 as well. Previously its content type was "application/json" but now it is "application/ld+json". List Files in a Dataset ~~~~~~~~~~~~~~~~~~~~~~~ @@ -2936,7 +2944,7 @@ Signposting involves the addition of a `Link ;rel="cite-as", ;rel="describedby";type="application/vnd.citationstyles.csl+json",;rel="describedby";type="application/ld+json", ;rel="type",;rel="type", ;rel="license", ; rel="linkset";type="application/linkset+json"`` +``Link: ;rel="cite-as", ;rel="describedby";type="application/vnd.citationstyles.csl+json",;rel="describedby";type="application/json",;rel="describedby";type="application/xml",;rel="describedby";type="application/xml",;rel="describedby";type="application/xml",;rel="describedby";type="application/ld+json",;rel="describedby";type="application/xml",;rel="describedby";type="application/xml",;rel="describedby";type="text/html",;rel="describedby";type="application/json",;rel="describedby";type="application/xml", ;rel="type",;rel="type", ;rel="license", ; rel="linkset";type="application/linkset+json"`` The URL for linkset information is discoverable under the ``rel="linkset";type="application/linkset+json`` entry in the "Link" header, such as in the example above. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index b3a14554b40..d1acb3294fc 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -43,6 +43,8 @@ Additional formats can be enabled. See :ref:`inventory-of-external-exporters` in Each of these metadata exports contains the metadata of the most recently published version of the dataset. +For each dataset, links to each enabled metadata format are available programmatically via Signposting. For details, see :ref:`discovery-sign-posting` in the Admin Guide and :ref:`signposting-api` in the API Guide. + .. _adding-new-dataset: Adding a New Dataset diff --git a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java index 0c4b39fd641..d4f2f95389f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java @@ -111,7 +111,11 @@ public Boolean isAvailableToUsers() { @Override public String getMediaType() { - return MediaType.APPLICATION_JSON; + /** + * Changed from "application/json" to "application/ld+json" because + * that's what Signposting expects. + */ + return "application/ld+json"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java index b6f8870aa2d..d4e6ba2b39d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java @@ -16,6 +16,7 @@ Two configurable options allow changing the limit for the number of authors or d import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.dataset.DatasetUtil; +import edu.harvard.iq.dataverse.export.ExportService; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; @@ -28,6 +29,8 @@ Two configurable options allow changing the limit for the number of authors or d import java.util.logging.Logger; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; +import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.Exporter; public class SignpostingResources { private static final Logger logger = Logger.getLogger(SignpostingResources.class.getCanonicalName()); @@ -72,8 +75,18 @@ public String getLinks() { } String describedby = "<" + ds.getGlobalId().asURL().toString() + ">;rel=\"describedby\"" + ";type=\"" + "application/vnd.citationstyles.csl+json\""; - describedby += ",<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" - + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ">;rel=\"describedby\"" + ";type=\"application/ld+json\""; + ExportService instance = ExportService.getInstance(); + for (String[] labels : instance.getExportersLabels()) { + String formatName = labels[1]; + Exporter exporter; + try { + exporter = ExportService.getInstance().getExporter(formatName); + describedby += ",<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ">;rel=\"describedby\"" + ";type=\"" + exporter.getMediaType() + "\""; + } catch (ExportException ex) { + logger.warning("Could not look up exporter based on " + formatName + ". Exception: " + ex); + } + } valueList.add(describedby); String type = ";rel=\"type\""; @@ -112,15 +125,25 @@ public JsonArrayBuilder getJsonLinkset() { ) ); - mediaTypes.add( - jsonObjectBuilder().add( - "href", - systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() - ).add( - "type", - "application/ld+json" - ) - ); + ExportService instance = ExportService.getInstance(); + for (String[] labels : instance.getExportersLabels()) { + String formatName = labels[1]; + Exporter exporter; + try { + exporter = ExportService.getInstance().getExporter(formatName); + mediaTypes.add( + jsonObjectBuilder().add( + "href", + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=" + formatName + "&persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ).add( + "type", + exporter.getMediaType() + ) + ); + } catch (ExportException ex) { + logger.warning("Could not look up exporter based on " + formatName + ". Exception: " + ex); + } + } JsonArrayBuilder linksetJsonObj = Json.createArrayBuilder(); JsonObjectBuilder mandatory; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SignpostingIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SignpostingIT.java index 75f514f3398..4327aa26778 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SignpostingIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SignpostingIT.java @@ -58,6 +58,16 @@ public void testSignposting() { Response getHtml = given().get(datasetLandingPage); System.out.println("Link header: " + getHtml.getHeader("Link")); + if (false) { + // Split on commas to make the output more readable. + System.out.println("---"); + String header = getHtml.getHeader("Link"); + for (String string : header.split(",")) { + System.out.println(string + ","); + } + System.out.println("returning early..."); + return; + } getHtml.then().assertThat().statusCode(OK.getStatusCode()); @@ -67,6 +77,8 @@ public void testSignposting() { assertTrue(linkHeader.contains(datasetPid)); assertTrue(linkHeader.contains("cite-as")); assertTrue(linkHeader.contains("describedby")); + // Make sure we get more exporters besides just "schema.org". + assertTrue(linkHeader.contains("oai_datacite")); Response headHtml = given().head(datasetLandingPage); @@ -76,6 +88,7 @@ public void testSignposting() { // Make sure there's Signposting stuff in the "Link" header such as // the dataset PID, cite-as, etc. + // TODO: The comment above is a repeat and so are some of the assertions below. Consolidate? linkHeader = getHtml.getHeader("Link"); assertTrue(linkHeader.contains(datasetPid)); assertTrue(linkHeader.contains("cite-as")); @@ -90,8 +103,10 @@ public void testSignposting() { System.out.println("Linkset URL: " + linksetUrl); Response linksetResponse = given().accept(ContentType.JSON).get(linksetUrl); + linksetResponse.prettyPrint(); String responseString = linksetResponse.getBody().asString(); + System.out.println("response string: " + responseString); JsonObject data = JsonUtil.getJsonObject(responseString); JsonObject lso = data.getJsonArray("linkset").getJsonObject(0); @@ -107,6 +122,13 @@ public void testSignposting() { Pattern exporterPattern = Pattern.compile("[<\\[][^()\\[\\]]*?exporter=schema.org[^()\\[\\]]*[>\\]]"); Matcher exporterMatcher = exporterPattern.matcher(linkHeader); exporterMatcher.find(); + // TODO: make an assertion + //assertTrue(exporterMatcher.find()); + + // Test another + Pattern exporterPattern2 = Pattern.compile("exporter=oai_datacite"); + Matcher exporterMatcher2 = exporterPattern2.matcher(linkHeader); + assertTrue(exporterMatcher2.find()); Response exportDataset = UtilIT.exportDataset(datasetPid, "schema.org"); exportDataset.prettyPrint();