-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
mEDRA DOI fetcher implementation. #6641
Changes from 14 commits
2d75da2
df31be7
b521e7c
3539079
1b78fd5
81d5cc3
51423cb
a42904c
c0a329f
85fc98f
2ede4ac
a108974
4f96aab
4cfc672
4ae1788
8c7f1b5
5504e52
6fd81c3
9b9b14f
3596442
d17af0f
dd0a665
4fa073e
faa304a
4523f1d
1793870
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,175 @@ | ||||||
package org.jabref.logic.importer.fetcher; | ||||||
|
||||||
import java.io.IOException; | ||||||
import java.io.InputStream; | ||||||
import java.io.PushbackInputStream; | ||||||
import java.net.MalformedURLException; | ||||||
import java.net.URISyntaxException; | ||||||
import java.net.URL; | ||||||
import java.util.ArrayList; | ||||||
import java.util.List; | ||||||
import java.util.Optional; | ||||||
|
||||||
import org.jabref.logic.importer.FetcherException; | ||||||
import org.jabref.logic.importer.IdBasedParserFetcher; | ||||||
import org.jabref.logic.importer.ParseException; | ||||||
import org.jabref.logic.importer.Parser; | ||||||
import org.jabref.logic.importer.util.JsonReader; | ||||||
import org.jabref.logic.importer.util.MediaTypes; | ||||||
import org.jabref.logic.net.URLDownload; | ||||||
import org.jabref.model.entry.AuthorList; | ||||||
import org.jabref.model.entry.BibEntry; | ||||||
import org.jabref.model.entry.field.StandardField; | ||||||
import org.jabref.model.entry.types.EntryType; | ||||||
import org.jabref.model.entry.types.StandardEntryType; | ||||||
|
||||||
import kong.unirest.json.JSONArray; | ||||||
import kong.unirest.json.JSONException; | ||||||
import kong.unirest.json.JSONObject; | ||||||
import org.apache.http.client.utils.URIBuilder; | ||||||
|
||||||
/** | ||||||
* A class for fetching DOIs from Medra | ||||||
* | ||||||
* @see <a href="https://data.medra.org">mEDRA Content Negotiation API</a> for an overview of the API | ||||||
* <p> | ||||||
* It requires "Accept" request Header attribute to be set to desired content-type. | ||||||
*/ | ||||||
public class Medra implements IdBasedParserFetcher { | ||||||
|
||||||
public static final String API_URL = "https://data.medra.org"; | ||||||
|
||||||
@Override | ||||||
public String getName() { | ||||||
return "mEDRA"; | ||||||
} | ||||||
|
||||||
@Override | ||||||
public Parser getParser() { | ||||||
return inputStream -> { | ||||||
JSONObject response = JsonReader.toJsonObject(inputStream); | ||||||
|
||||||
List<BibEntry> entries = new ArrayList<>(); | ||||||
BibEntry entry = jsonItemToBibEntry(response); | ||||||
entries.add(entry); | ||||||
|
||||||
return entries; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These 4 lines can be done shorter as it is a single entry:
Suggested change
|
||||||
}; | ||||||
} | ||||||
|
||||||
private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { | ||||||
try { | ||||||
BibEntry entry = new BibEntry(); | ||||||
entry.setType(convertType(item.getString("type"))); | ||||||
koppor marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
entry.setField(StandardField.TITLE, item.getString("title")); | ||||||
entry.setField(StandardField.AUTHOR, toAuthors(item.optJSONArray("author"))); | ||||||
entry.setField(StandardField.YEAR, | ||||||
Optional.ofNullable(item.optJSONObject("issued")) | ||||||
.map(array -> array.optJSONArray("date-parts")) | ||||||
.map(array -> array.optJSONArray(0)) | ||||||
.map(array -> array.optInt(0)) | ||||||
.map(year -> Integer.toString(year)).orElse("")); | ||||||
entry.setField(StandardField.DOI, item.getString("DOI")); | ||||||
entry.setField(StandardField.PAGES, item.optString("page")); | ||||||
entry.setField(StandardField.ISSN, item.optString("ISSN")); | ||||||
entry.setField(StandardField.JOURNAL, item.optString("container-title")); | ||||||
entry.setField(StandardField.PUBLISHER, item.optString("publisher")); | ||||||
entry.setField(StandardField.URL, item.optString("URL")); | ||||||
entry.setField(StandardField.VOLUME, item.optString("volume")); | ||||||
return entry; | ||||||
} catch (JSONException exception) { | ||||||
throw new ParseException("mEdRA API JSON format has changed", exception); | ||||||
} | ||||||
} | ||||||
|
||||||
private EntryType convertType(String type) { | ||||||
switch (type) { | ||||||
case "article-journal": | ||||||
return StandardEntryType.Article; | ||||||
default: | ||||||
return StandardEntryType.Misc; | ||||||
} | ||||||
} | ||||||
|
||||||
private String toAuthors(JSONArray authors) { | ||||||
if (authors == null) { | ||||||
return ""; | ||||||
} | ||||||
|
||||||
// input: list of {"literal":"A."} | ||||||
AuthorList authorsParsed = new AuthorList(); | ||||||
String name = ""; | ||||||
|
||||||
for (int i = 0; i < authors.length(); i++) { | ||||||
JSONObject author = authors.getJSONObject(i); | ||||||
if (author.has("literal")) { | ||||||
name = author.optString("literal", ""); | ||||||
} else { | ||||||
name = author.optString("family", "") + " " + author.optString("given", ""); | ||||||
} | ||||||
|
||||||
authorsParsed.addAuthor( | ||||||
name, | ||||||
"", | ||||||
"", | ||||||
"", | ||||||
""); | ||||||
|
||||||
} | ||||||
return authorsParsed.getAsFirstLastNamesWithAnd(); | ||||||
} | ||||||
|
||||||
@Override | ||||||
public Optional<BibEntry> performSearchById(String identifier) throws FetcherException { | ||||||
koppor marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
try (InputStream stream = getUrlDownload(identifier).asInputStream(); | ||||||
PushbackInputStream pushbackInputStream = new PushbackInputStream(stream)) { | ||||||
|
||||||
List<BibEntry> fetchedEntries = new ArrayList<>(); | ||||||
|
||||||
// check if there is anything to read since mEDRA '404 not found' returns nothing | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, you mean HTTP 404 returns an empty response. In line 259 of URLDownload is is implemented exactly as that. Why do the other fetchers cope well with that and here you have to do some special tweaks? Maybe, the response from the mEDRA thing is different? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, URLDownload returns empty stream but I thought somewhere I had to check whether it's empty or not before trying to parse it to JSON. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it makes sense to add that to CrossRef as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, this is fixed in https://github.com/mind000/jabref/pull/1/, too. |
||||||
int readByte; | ||||||
readByte = pushbackInputStream.read(); | ||||||
koppor marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if (readByte != -1) { | ||||||
pushbackInputStream.unread(readByte); | ||||||
fetchedEntries = getParser().parseEntries(pushbackInputStream); | ||||||
} | ||||||
|
||||||
if (fetchedEntries.isEmpty()) { | ||||||
return Optional.empty(); | ||||||
} | ||||||
|
||||||
BibEntry entry = fetchedEntries.get(0); | ||||||
|
||||||
// Post-cleanup | ||||||
doPostCleanup(entry); | ||||||
|
||||||
return Optional.of(entry); | ||||||
} catch (URISyntaxException e) { | ||||||
throw new FetcherException("Search URI is malformed", e); | ||||||
} catch (IOException e) { | ||||||
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource. It might be that there is an UnknownHostException (eutils.ncbi.nlm.nih.gov cannot be resolved). | ||||||
throw new FetcherException("A network error occurred", e); | ||||||
} catch (ParseException e) { | ||||||
throw new FetcherException("An internal parser error occurred", e); | ||||||
} | ||||||
} | ||||||
|
||||||
@Override | ||||||
public void doPostCleanup(BibEntry entry) { | ||||||
IdBasedParserFetcher.super.doPostCleanup(entry); | ||||||
} | ||||||
|
||||||
public URLDownload getUrlDownload(String identifier) throws MalformedURLException, FetcherException, URISyntaxException { | ||||||
URLDownload download = new URLDownload(getURLForID(identifier)); | ||||||
download.addHeader("Accept", MediaTypes.APPLICATION_JSON); | ||||||
return download; | ||||||
} | ||||||
|
||||||
@Override | ||||||
public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException { | ||||||
URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier); | ||||||
return uriBuilder.build().toURL(); | ||||||
} | ||||||
|
||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This calls for discussing good and bad deisgn. - Why should a general class handle special cases of a sub clasS?
The mEDRA
getUrlDownload
should provide a proper stream. We will see it below.First idea: It can be wrapped into a BufferedStream and checked if there is something and reset if everythign is OK. If not, an empty stream should be returend, which should lead to an empty list of bib entries, doesn't it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was the first implementation i did, then I've overridden performSearchById inside Medra class.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then please revert this changes here, as it's no longer necessary, right?