Skip to content

Commit 990fd79

Browse files
committed
Closes #2507: Datacite DOI harvester
1 parent 9c4f81a commit 990fd79

26 files changed

+1560
-416
lines changed

dataverse-persistence/src/main/java/edu/harvard/iq/dataverse/common/DatasetFieldConstant.java

+3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ public class DatasetFieldConstant {
3939
public final static String title = "title";
4040
public final static String subTitle = "subtitle"; //SEK 6-7-2016 to match what is in DB
4141
public final static String alternativeTitle = "alternativeTitle"; //missing from class
42+
public final static String titleTranslation = "titleTranslation";
43+
public final static String titleTranslationText = "titleTranslationText";
44+
public final static String titleTranslationLanguage = "titleTranslationLanguage";
4245
public final static String datasetId = "datasetId";
4346
public final static String authorName = "authorName";
4447
public final static String authorAffiliation = "authorAffiliation";

dataverse-persistence/src/main/java/edu/harvard/iq/dataverse/persistence/dataset/Dataset.java

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import edu.harvard.iq.dataverse.persistence.datafile.license.FileTermsOfUse;
1212
import edu.harvard.iq.dataverse.persistence.dataverse.link.DatasetLinkingDataverse;
1313
import edu.harvard.iq.dataverse.persistence.guestbook.Guestbook;
14+
import edu.harvard.iq.dataverse.persistence.harvest.HarvestStyle;
1415
import edu.harvard.iq.dataverse.persistence.harvest.HarvestingClient;
1516
import io.vavr.control.Option;
1617
import org.apache.commons.lang3.StringUtils;
@@ -615,22 +616,24 @@ public String getRemoteArchiveURL() {
615616
if (!isHarvested()) {
616617
return null;
617618
}
618-
if (HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(getHarvestedFrom().getHarvestStyle())) {
619+
if (HarvestStyle.DATAVERSE.equals(getHarvestedFrom().getHarvestStyle())) {
619620
return getHarvestedFrom().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalIdString();
620-
} else if (HarvestingClient.HARVEST_STYLE_VDC.equals(getHarvestedFrom().getHarvestStyle())) {
621+
} else if (HarvestStyle.VDC.equals(getHarvestedFrom().getHarvestStyle())) {
621622
String rootArchiveUrl = getHarvestedFrom().getHarvestingUrl();
622623
int c = rootArchiveUrl.indexOf("/OAIHandler");
623624
return c > 0
624625
? rootArchiveUrl.substring(0, c) + "/faces/study/StudyPage.xhtml?globalId=" + getGlobalIdString()
625626
: null;
626-
} else if (HarvestingClient.HARVEST_STYLE_ICPSR.equals(getHarvestedFrom().getHarvestStyle())) {
627+
} else if (HarvestStyle.ICPSR.equals(getHarvestedFrom().getHarvestStyle())) {
627628
// For the ICPSR, it turns out that the best thing to do is to
628629
// rely on the DOI to send the user to the right landing page for
629630
// the study:
630631
//String icpsrId = identifier;
631632
//return getOwner().getHarvestingClient().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing";
632633
return "http://doi.org/" + getAuthority() + "/" + getIdentifier();
633-
} else if (HarvestingClient.HARVEST_STYLE_NESSTAR.equals(getHarvestedFrom().getHarvestStyle())) {
634+
} else if (HarvestStyle.DOI.equals(getHarvestedFrom().getHarvestStyle())) {
635+
return getHarvestedFrom().getArchiveUrl() + "/" + getAuthority() + "/" + getIdentifier();
636+
} else if (HarvestStyle.NESSTAR.equals(getHarvestedFrom().getHarvestStyle())) {
634637
String nServerURL = getHarvestedFrom().getArchiveUrl();
635638
// chop any trailing slashes in the server URL - or they will result
636639
// in multiple slashes in the final URL pointing to the study
@@ -641,9 +644,9 @@ public String getRemoteArchiveURL() {
641644
//SEK 09/13/18
642645
return nServerURL + "/webview/?mode=documentation&submode=abstract&studydoc=" + nServerURLencoded + "%2Fobj%2FfStudy%2F"
643646
+ getIdentifier() + "&top=yes";
644-
} else if (HarvestingClient.HARVEST_STYLE_ROPER.equals(getHarvestedFrom().getHarvestStyle())) {
647+
} else if (HarvestStyle.ROPER.equals(getHarvestedFrom().getHarvestStyle())) {
645648
return getHarvestedFrom().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + getIdentifier();
646-
} else if (HarvestingClient.HARVEST_STYLE_HGL.equals(getHarvestedFrom().getHarvestStyle())) {
649+
} else if (HarvestStyle.HGL.equals(getHarvestedFrom().getHarvestStyle())) {
647650
// a bit of a hack, true.
648651
// HGL documents, when turned into Dataverse studies/datasets
649652
// all 1 datafile; the location ("storage identifier") of the file
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package edu.harvard.iq.dataverse.persistence.harvest;
2+
3+
/**
4+
* Different harvesting "styles". These define how we format and
5+
* display metadata harvested from various remote resources.
6+
*/
7+
public enum HarvestStyle {
8+
DATAVERSE("Dataverse v4+"),
9+
VDC("DVN, v2-3"),
10+
ICPSR("ICPSR"),
11+
NESSTAR("Nesstar archive"),
12+
ROPER("Roper Archive"),
13+
HGL("HGL"),
14+
DOI("DOI"),
15+
DEFAULT("Generic OAI resource (DC)");
16+
17+
final String description;
18+
19+
HarvestStyle(String description) {
20+
this.description = description;
21+
}
22+
23+
public String getDescription() {
24+
return description;
25+
}
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package edu.harvard.iq.dataverse.persistence.harvest;
2+
3+
public enum HarvestType {
4+
OAI, DATACITE_DOI
5+
}

dataverse-persistence/src/main/java/edu/harvard/iq/dataverse/persistence/harvest/HarvestingClient.java

+12-59
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import javax.persistence.CascadeType;
99
import javax.persistence.Column;
1010
import javax.persistence.Entity;
11+
import javax.persistence.EnumType;
12+
import javax.persistence.Enumerated;
1113
import javax.persistence.GeneratedValue;
1214
import javax.persistence.GenerationType;
1315
import javax.persistence.Id;
@@ -23,13 +25,10 @@
2325
import javax.validation.constraints.Size;
2426
import java.io.Serializable;
2527
import java.text.SimpleDateFormat;
26-
import java.util.Arrays;
2728
import java.util.Calendar;
2829
import java.util.Date;
2930
import java.util.GregorianCalendar;
30-
import java.util.LinkedHashMap;
3131
import java.util.List;
32-
import java.util.Map;
3332

3433
/**
3534
* @author Leonid Andreev
@@ -58,58 +57,14 @@ public void setId(Long id) {
5857
this.id = id;
5958
}
6059

61-
public static final String HARVEST_TYPE_OAI = "oai";
62-
public static final String HARVEST_TYPE_NESSTAR = "nesstar";
63-
64-
65-
/*
66-
* Different harvesting "styles". These define how we format and
67-
* display meatada harvested from various remote resources.
68-
*/
69-
public static final String HARVEST_STYLE_DATAVERSE = "dataverse";
70-
// pre-4.0 remote Dataverse:
71-
public static final String HARVEST_STYLE_VDC = "vdc";
72-
public static final String HARVEST_STYLE_ICPSR = "icpsr";
73-
public static final String HARVEST_STYLE_NESSTAR = "nesstar";
74-
public static final String HARVEST_STYLE_ROPER = "roper";
75-
public static final String HARVEST_STYLE_HGL = "hgl";
76-
public static final String HARVEST_STYLE_DEFAULT = "default";
77-
78-
public static final String HARVEST_STYLE_DESCRIPTION_DATAVERSE = "Dataverse v4+";
79-
// pre-4.0 remote Dataverse:
80-
public static final String HARVEST_STYLE_DESCRIPTION_VDC = "DVN, v2-3";
81-
public static final String HARVEST_STYLE_DESCRIPTION_ICPSR = "ICPSR";
82-
public static final String HARVEST_STYLE_DESCRIPTION_NESSTAR = "Nesstar archive";
83-
public static final String HARVEST_STYLE_DESCRIPTION_ROPER = "Roper Archive";
84-
public static final String HARVEST_STYLE_DESCRIPTION_HGL = "HGL";
85-
public static final String HARVEST_STYLE_DESCRIPTION_DEFAULT = "Generic OAI resource (DC)";
86-
87-
88-
public static final List<String> HARVEST_STYLE_LIST = Arrays.asList(HARVEST_STYLE_DATAVERSE, HARVEST_STYLE_VDC, HARVEST_STYLE_ICPSR, HARVEST_STYLE_NESSTAR, HARVEST_STYLE_ROPER, HARVEST_STYLE_HGL, HARVEST_STYLE_DEFAULT);
89-
public static final List<String> HARVEST_STYLE_DESCRIPTION_LIST = Arrays.asList(HARVEST_STYLE_DESCRIPTION_DATAVERSE, HARVEST_STYLE_DESCRIPTION_VDC, HARVEST_STYLE_DESCRIPTION_ICPSR, HARVEST_STYLE_DESCRIPTION_NESSTAR, HARVEST_STYLE_DESCRIPTION_ROPER, HARVEST_STYLE_DESCRIPTION_HGL, HARVEST_STYLE_DESCRIPTION_DEFAULT);
90-
91-
public static final Map<String, String> HARVEST_STYLE_INFOMAP = new LinkedHashMap<String, String>();
92-
93-
static {
94-
for (int i = 0; i < HARVEST_STYLE_LIST.size(); i++) {
95-
HARVEST_STYLE_INFOMAP.put(HARVEST_STYLE_LIST.get(i), HARVEST_STYLE_DESCRIPTION_LIST.get(i));
96-
}
97-
}
98-
99-
100-
public static final String REMOTE_ARCHIVE_URL_LEVEL_DATAVERSE = "dataverse";
101-
public static final String REMOTE_ARCHIVE_URL_LEVEL_DATASET = "dataset";
102-
public static final String REMOTE_ARCHIVE_URL_LEVEL_FILE = "file";
103-
10460
public static final String SCHEDULE_PERIOD_DAILY = "daily";
10561
public static final String SCHEDULE_PERIOD_WEEKLY = "weekly";
10662

10763
public HarvestingClient() {
108-
this.harvestType = HARVEST_TYPE_OAI; // default harvestType
109-
this.harvestStyle = HARVEST_STYLE_DATAVERSE; // default harvestStyle
64+
this.harvestType = HarvestType.OAI; // default harvestType
65+
this.harvestStyle = HarvestStyle.DATAVERSE; // default harvestStyle
11066
}
11167

112-
11368
@ManyToOne
11469
@JoinColumn(name = "dataverse_id")
11570
private Dataverse dataverse;
@@ -148,27 +103,25 @@ public void setName(String name) {
148103
this.name = name;
149104
}
150105

151-
private String harvestType;
106+
@Enumerated(EnumType.STRING)
107+
private HarvestType harvestType;
152108

153-
public String getHarvestType() {
109+
public HarvestType getHarvestType() {
154110
return harvestType;
155111
}
156112

157-
public void setHarvestType(String harvestType) {
113+
public void setHarvestType(HarvestType harvestType) {
158114
this.harvestType = harvestType;
159115
}
160116

161-
public boolean isOai() {
162-
return HARVEST_TYPE_OAI.equals(harvestType);
163-
}
164-
165-
private String harvestStyle;
117+
@Enumerated(EnumType.STRING)
118+
private HarvestStyle harvestStyle;
166119

167-
public String getHarvestStyle() {
120+
public HarvestStyle getHarvestStyle() {
168121
return harvestStyle;
169122
}
170123

171-
public void setHarvestStyle(String harvestStyle) {
124+
public void setHarvestStyle(HarvestStyle harvestStyle) {
172125
this.harvestStyle = harvestStyle;
173126
}
174127

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- Harvest type enum
2+
UPDATE harvestingclient SET harvesttype = 'OAI' WHERE harvesttype ='oai';
3+
4+
-- Harvest style enums
5+
UPDATE harvestingclient SET harveststyle = 'DATAVERSE' WHERE harveststyle = 'dataverse';
6+
UPDATE harvestingclient SET harveststyle = 'VDC' WHERE harveststyle = 'vdc';
7+
UPDATE harvestingclient SET harveststyle = 'ICPSR' WHERE harveststyle = 'icpsr';
8+
UPDATE harvestingclient SET harveststyle = 'NESSTAR' WHERE harveststyle = 'nesstar';
9+
UPDATE harvestingclient SET harveststyle = 'ROPER' WHERE harveststyle = 'roper';
10+
UPDATE harvestingclient SET harveststyle = 'HGL' WHERE harveststyle = 'hgl';
11+
UPDATE harvestingclient SET harveststyle = 'DEFAULT' WHERE harveststyle = 'default';

dataverse-persistence/src/test/resources/dbinit.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ INSERT INTO builtinuser (id, encryptedpassword, passwordencryptionversion, usern
2121
-------------------- HARVESTING CLIENTS --------------------
2222

2323
INSERT INTO harvestingclient(id, archiveurl, deleted, harveststyle, harvesttype, harvestingnow, harvestingset, harvestingurl, metadataprefix, name, scheduledayofweek, schedulehourofday, scheduleperiod, scheduled, dataverse_id)
24-
VALUES (12, '', false, '', '', false, '', '', '', 'harvest_client_name', 1, 1, '', false, 1);
24+
VALUES (12, '', false, 'DATAVERSE', 'OAI', false, '', '', '', 'harvest_client_name', 1, 1, '', false, 1);
2525

2626
-------------------- DATAVERSES --------------------
2727

dataverse-webapp/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
66
import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand;
77
import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand;
8+
import edu.harvard.iq.dataverse.harvest.client.HarvesterParams;
89
import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean;
910
import edu.harvard.iq.dataverse.harvest.client.HarvestingClientDao;
1011
import edu.harvard.iq.dataverse.persistence.dataverse.Dataverse;
1112
import edu.harvard.iq.dataverse.persistence.harvest.HarvestingClient;
1213
import edu.harvard.iq.dataverse.persistence.user.AuthenticatedUser;
1314
import edu.harvard.iq.dataverse.util.json.JsonParseException;
15+
import io.vavr.control.Option;
1416

1517
import javax.ejb.EJB;
1618
import javax.ejb.Stateless;
@@ -210,7 +212,7 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
210212
}
211213

212214
DataverseRequest dataverseRequest = createDataverseRequest(superuser);
213-
harvesterService.doAsyncHarvest(dataverseRequest, harvestingClient);
215+
harvesterService.doAsyncHarvest(dataverseRequest, harvestingClient, HarvesterParams.empty());
214216

215217
} catch (Exception e) {
216218
return error(Response.Status.BAD_REQUEST, "Exception thrown when running harvesting client\"" + clientNickname + "\" via REST API; " + e.getMessage());
@@ -226,7 +228,7 @@ public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvesti
226228

227229
return jsonObjectBuilder().add("nickName", harvestingConfig.getName()).
228230
add("dataverseAlias", harvestingConfig.getDataverse().getAlias()).
229-
add("type", harvestingConfig.getHarvestType()).
231+
add("type", Option.of(harvestingConfig.getHarvestType()).map(Enum::name).getOrNull()).
230232
add("harvestUrl", harvestingConfig.getHarvestingUrl()).
231233
add("archiveUrl", harvestingConfig.getArchiveUrl()).
232234
add("metadataFormat", harvestingConfig.getMetadataPrefix()).

0 commit comments

Comments
 (0)