Skip to content

Commit 954ce1a

Browse files
authored
Closes #2506: Removal of harvested DOI (#2523)
* Closes #2507: Datacite DOI harvester * Refs #2507: Minor adjustments * Closes #2504: Endpoint to run datacite harvester * Closes #2506: Removal of harvested DOI * review comments * merge conflicts * merge conflicts
1 parent 00d99d8 commit 954ce1a

File tree

5 files changed

+239
-65
lines changed

5 files changed

+239
-65
lines changed

dataverse-webapp/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java

+25
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
88
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
99
import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestedDatasetCommand;
10+
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand;
1011
import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand;
1112
import edu.harvard.iq.dataverse.persistence.datafile.DataFile;
1213
import edu.harvard.iq.dataverse.persistence.dataset.Dataset;
@@ -112,6 +113,30 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
112113
}
113114
}
114115

116+
@TransactionAttribute(REQUIRES_NEW)
117+
public void doDeleteHarvestedDataset(DataverseRequest request, HarvestingClient harvestingClient, String identifier) throws ImportException {
118+
Dataset dataset = datasetDao.getDatasetByHarvestInfo(harvestingClient.getDataverse(), identifier);
119+
if (dataset != null) {
120+
// Purge all the SOLR documents associated with this client from the
121+
// index server:
122+
indexService.deleteHarvestedDocuments(dataset);
123+
124+
// files from harvested datasets are removed unceremoniously,
125+
// directly in the database. no need to bother calling the
126+
// DeleteFileCommand on them.
127+
for (DataFile harvestedFile : dataset.getFiles()) {
128+
DataFile merged = em.merge(harvestedFile);
129+
em.remove(merged);
130+
}
131+
132+
dataset.setFiles(null);
133+
Dataset merged = em.merge(dataset);
134+
engineSvc.submit(new DeleteDatasetCommand(request, merged));
135+
} else {
136+
throw new ImportException("No dataset found for " + identifier + ", skipping delete. ");
137+
}
138+
}
139+
115140
private Dataset importDatasetDTOJson(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String json) throws ImportException {
116141
try {
117142
Dataset ds = harvestedJsonParser.parseDataset(json);

dataverse-webapp/src/main/java/edu/harvard/iq/dataverse/harvest/client/DataciteDOIHarvester.java

+18-7
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
@LocalBean
1919
public class DataciteDOIHarvester implements Harvester<DataciteHarvesterParams> {
2020

21-
2221
@Inject
2322
private DataCiteRestApiClient dataCiteRestApiClient;
2423

@@ -42,7 +41,7 @@ public Class<DataciteHarvesterParams> getParamsClass() {
4241

4342
@Override
4443
public HarvesterResult harvest(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, DataciteHarvesterParams params) throws ImportException {
45-
if (params.getDoiImport().isEmpty()) {
44+
if (params.getDoiImport().isEmpty() && params.getDoiRemove().isEmpty()) {
4645
throw new ImportException("Missing DOI's");
4746
}
4847

@@ -52,24 +51,36 @@ public HarvesterResult harvest(DataverseRequest dataverseRequest, HarvestingClie
5251
importDOI(rs, dataverseRequest, harvestingClient, hdLogger, doi);
5352
}
5453

54+
for (DataciteHarvesterParams.DOIValue doi: params.getDoiRemove()) {
55+
removeDOI(rs, dataverseRequest, harvestingClient, hdLogger, doi);
56+
}
57+
5558
return rs;
5659
}
5760

5861
// -------------------- PRIVATE --------------------
5962

60-
private void importDOI(HarvesterResult rs, DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, DataciteHarvesterParams.DOIValue doi) throws ImportException {
63+
private void importDOI(HarvesterResult rs, DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, DataciteHarvesterParams.DOIValue doi) {
6164
try {
6265
DatasetDTO dto = dataciteDatasetMapper.toDataset(dataCiteRestApiClient.findDoi(doi.getAuthority(), doi.getId()));
6366
importService.doImportHarvestedDataset(dataverseRequest, harvestingClient, doi.getFull(), dto);
6467
rs.incrementHarvested();
6568
} catch (Exception e) {
6669
rs.incrementFailed();
67-
String errorMessage = "Failed to import DOI "
70+
hdLogger.log(Level.SEVERE, "Failed to import DOI (" + doi.getFull() + ") "
6871
+ harvestingClient.getName()
6972
+ "; "
70-
+ e.getMessage();
71-
hdLogger.log(Level.SEVERE, errorMessage);
72-
throw new ImportException(errorMessage, e);
73+
+ e.getMessage());
74+
}
75+
}
76+
77+
private void removeDOI(HarvesterResult rs, DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, DataciteHarvesterParams.DOIValue doi) {
78+
try {
79+
importService.doDeleteHarvestedDataset(dataverseRequest, harvestingClient, doi.getFull());
80+
rs.incrementDeleted();
81+
} catch (Exception e) {
82+
rs.incrementFailed();
83+
hdLogger.log(Level.SEVERE, "Failed to delete DOI " + doi.getFull() + " (" + harvestingClient.getName() + "): " + e.getMessage());
7384
}
7485
}
7586
}

dataverse-webapp/src/main/java/edu/harvard/iq/dataverse/harvest/client/DataciteHarvesterParams.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package edu.harvard.iq.dataverse.harvest.client;
22

3-
import com.fasterxml.jackson.annotation.JsonCreator;
43
import io.vavr.control.Option;
54

65
import java.util.Collections;
@@ -15,18 +14,28 @@ public class DataciteHarvesterParams extends HarvesterParams {
1514

1615
private List<DOIValue> doiImport;
1716

17+
private List<DOIValue> doiRemove;
18+
1819
// -------------------- GETTERS --------------------
1920

2021
public List<DOIValue> getDoiImport() {
2122
return Option.of(doiImport).getOrElse(Collections.emptyList());
2223
}
2324

25+
public List<DOIValue> getDoiRemove() {
26+
return Option.of(doiRemove).getOrElse(Collections.emptyList());
27+
}
28+
2429
// -------------------- SETTERS --------------------
2530

2631
public void setDoiImport(List<DOIValue> doiImport) {
2732
this.doiImport = doiImport;
2833
}
2934

35+
public void setDoiRemove(List<DOIValue> doiRemove) {
36+
this.doiRemove = doiRemove;
37+
}
38+
3039
// -------------------- INNER CLASSES --------------------
3140

3241
static public class DOIValue {

dataverse-webapp/src/main/java/edu/harvard/iq/dataverse/harvest/client/OAIHarvester.java

+3-57
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,14 @@
11
package edu.harvard.iq.dataverse.harvest.client;
22

3-
import edu.harvard.iq.dataverse.DatasetDao;
4-
import edu.harvard.iq.dataverse.EjbDataverseEngine;
53
import edu.harvard.iq.dataverse.api.imports.HarvestImporterType;
64
import edu.harvard.iq.dataverse.api.imports.HarvestImporterTypeResolver;
75
import edu.harvard.iq.dataverse.api.imports.ImportException;
86
import edu.harvard.iq.dataverse.api.imports.ImportServiceBean;
97
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
10-
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
11-
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
12-
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
13-
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand;
148
import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler;
159
import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandlerException;
16-
import edu.harvard.iq.dataverse.persistence.datafile.DataFile;
17-
import edu.harvard.iq.dataverse.persistence.dataset.Dataset;
1810
import edu.harvard.iq.dataverse.persistence.harvest.HarvestType;
1911
import edu.harvard.iq.dataverse.persistence.harvest.HarvestingClient;
20-
import edu.harvard.iq.dataverse.search.index.IndexServiceBean;
2112
import org.apache.commons.io.FileUtils;
2213
import org.dspace.xoai.model.oaipmh.Header;
2314
import org.dspace.xoai.serviceprovider.exceptions.HarvestException;
@@ -46,21 +37,12 @@ public class OAIHarvester implements Harvester<HarvesterParams.EmptyHarvesterPar
4637
@PersistenceContext(unitName = "VDCNet-ejbPU")
4738
private EntityManager em;
4839

49-
@EJB
50-
DatasetDao datasetDao;
51-
5240
@EJB
5341
ImportServiceBean importService;
5442

55-
@EJB
56-
IndexServiceBean indexService;
57-
5843
@EJB
5944
private HarvestImporterTypeResolver harvestImporterTypeResolver;
6045

61-
@EJB
62-
EjbDataverseEngine engineService;
63-
6446
// -------------------- LOGIC --------------------
6547

6648
@Override
@@ -136,16 +118,9 @@ private void processRecord(HarvesterResult result, DataverseRequest dataverseReq
136118
if (record.isDeleted()) {
137119
hdLogger.info("Deleting harvesting dataset for " + identifier + ", per the OAI server's instructions.");
138120

139-
Dataset dataset = datasetDao.getDatasetByHarvestInfo(oaiHandler.getHarvestingClient().getDataverse(), identifier);
140-
if (dataset != null) {
141-
result.incrementDeleted();
142-
hdLogger.info("Deleting dataset " + dataset.getGlobalIdString());
143-
deleteHarvestedDataset(dataset, dataverseRequest, hdLogger);
144-
// TODO:
145-
// check the status of that Delete - see if it actually succeeded
146-
} else {
147-
hdLogger.info("No dataset found for " + identifier + ", skipping delete. ");
148-
}
121+
importService.doDeleteHarvestedDataset(dataverseRequest, oaiHandler.getHarvestingClient(), identifier);
122+
result.incrementDeleted();
123+
149124
} else {
150125
hdLogger.info("Successfully retrieved GetRecord response.");
151126
HarvestImporterType importType = harvestImporterTypeResolver.resolveImporterType(oaiHandler.getMetadataFormat())
@@ -175,35 +150,6 @@ private void processRecord(HarvesterResult result, DataverseRequest dataverseReq
175150
}
176151
}
177152

178-
private void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Logger hdLogger) {
179-
// Purge all the SOLR documents associated with this client from the
180-
// index server:
181-
indexService.deleteHarvestedDocuments(dataset);
182-
183-
try {
184-
// files from harvested datasets are removed unceremoniously,
185-
// directly in the database. no need to bother calling the
186-
// DeleteFileCommand on them.
187-
for (DataFile harvestedFile : dataset.getFiles()) {
188-
DataFile merged = em.merge(harvestedFile);
189-
em.remove(merged);
190-
harvestedFile = null;
191-
}
192-
dataset.setFiles(null);
193-
Dataset merged = em.merge(dataset);
194-
engineService.submit(new DeleteDatasetCommand(request, merged));
195-
} catch (IllegalCommandException ex) {
196-
// TODO: log the result
197-
} catch (PermissionException ex) {
198-
// TODO: log the result
199-
} catch (CommandException ex) {
200-
// TODO: log the result
201-
}
202-
203-
// TODO: log the success result
204-
}
205-
206-
207153
private void logBeginOaiHarvest(Logger hdLogger, HarvestingClient harvestingClient) {
208154
hdLogger.log(Level.INFO, "BEGIN HARVEST, oaiUrl="
209155
+ harvestingClient.getHarvestingUrl()

0 commit comments

Comments
 (0)