Skip to content

Commit 3bc1f02

Browse files
Fix inspire fetcher (#6258)
* Fix inspire fetcher Use application/x-bibtex header Fixes #6229 * update changelog * extract urldownload method for easier overwriting revert not related changes Co-authored-by: Tobias Diez <[email protected]>
1 parent 940ef9d commit 3bc1f02

File tree

4 files changed

+52
-59
lines changed

4 files changed

+52
-59
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
3434
- We fixed an issue with inconsistent capitalization of file extensions when downloading files. [#6115](https://github.com/JabRef/jabref/issues/6115)
3535
- We fixed the display of language and encoding in the preferences dialog. [#6130](https://github.com/JabRef/jabref/pull/6130)
3636
- We fixed an issue where search full-text documents downloaded files with same name, overwriting existing files. [#6174](https://github.com/JabRef/jabref/pull/6174)
37-
- We fixed an issue where when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
37+
- We fixed an issue when importing into current library an erroneous message "import cancelled" is displayed even though import is successful. [#6266](https://github.com/JabRef/jabref/issues/6266)
3838
- We fixed an issue where custom jstyles for Open/LibreOffice where not saved correctly. [#6170](https://github.com/JabRef/jabref/issues/6170)
39+
- We fixed an issue where the INSPIRE fetcher was no longer working [#6229](https://github.com/JabRef/jabref/issues/6229)
3940

4041

4142
### Removed

src/main/java/org/jabref/logic/importer/SearchBasedParserFetcher.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,24 @@ default void doPostCleanup(BibEntry entry) {
4949
// Do nothing by default
5050
}
5151

52+
/**
53+
* Gets the {@link URLDownload} object for downloading content. Overwrite, if you need to send additional headers for the download
54+
* @param query The search query
55+
* @throws MalformedURLException
56+
* @throws FetcherException
57+
* @throws URISyntaxException
58+
*/
59+
default URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
60+
return new URLDownload(getURLForQuery(query));
61+
}
62+
5263
@Override
5364
default List<BibEntry> performSearch(String query) throws FetcherException {
5465
if (StringUtil.isBlank(query)) {
5566
return Collections.emptyList();
5667
}
5768

58-
try (InputStream stream = new URLDownload(getURLForQuery(query)).asInputStream()) {
69+
try (InputStream stream = getUrlDownload(query).asInputStream()) {
5970
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
6071

6172
// Post-cleanup
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
package org.jabref.logic.importer.fetcher;
22

3-
import java.io.BufferedReader;
4-
import java.io.InputStreamReader;
53
import java.net.MalformedURLException;
64
import java.net.URISyntaxException;
75
import java.net.URL;
8-
import java.util.ArrayList;
9-
import java.util.List;
106
import java.util.Optional;
11-
import java.util.stream.Collectors;
127

138
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
149
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
@@ -18,32 +13,28 @@
1813
import org.jabref.logic.importer.Parser;
1914
import org.jabref.logic.importer.SearchBasedParserFetcher;
2015
import org.jabref.logic.importer.fileformat.BibtexParser;
21-
import org.jabref.logic.util.OS;
16+
import org.jabref.logic.importer.util.MediaTypes;
17+
import org.jabref.logic.net.URLDownload;
2218
import org.jabref.model.cleanup.FieldFormatterCleanup;
2319
import org.jabref.model.entry.BibEntry;
2420
import org.jabref.model.entry.field.StandardField;
2521
import org.jabref.model.entry.field.UnknownField;
2622
import org.jabref.model.util.DummyFileUpdateMonitor;
2723

2824
import org.apache.http.client.utils.URIBuilder;
29-
import org.jsoup.Jsoup;
30-
import org.jsoup.nodes.Document;
31-
import org.jsoup.nodes.Element;
32-
import org.jsoup.select.Elements;
3325

3426
/**
3527
* Fetches data from the INSPIRE database.
3628
*
37-
* @implNote We just use the normal search interface since it provides direct BibTeX export while the API (http://inspirehep.net/info/hep/api) currently only supports JSON and XML
3829
*/
3930
public class INSPIREFetcher implements SearchBasedParserFetcher {
4031

41-
private static final String INSPIRE_HOST = "https://inspirehep.net/search";
32+
private static final String INSPIRE_HOST = "https://inspirehep.net/api/literature/";
4233

43-
private final ImportFormatPreferences preferences;
34+
private final ImportFormatPreferences importFormatPreferences;
4435

4536
public INSPIREFetcher(ImportFormatPreferences preferences) {
46-
this.preferences = preferences;
37+
this.importFormatPreferences = preferences;
4738
}
4839

4940
@Override
@@ -59,33 +50,15 @@ public Optional<HelpFile> getHelpPage() {
5950
@Override
6051
public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
6152
URIBuilder uriBuilder = new URIBuilder(INSPIRE_HOST);
62-
uriBuilder.addParameter("p", query); // Query
63-
// uriBuilder.addParameter("jrec", "1"); // Start index (not needed at the moment)
64-
uriBuilder.addParameter("rg", "100"); // Should return up to 100 items (instead of default 25)
65-
uriBuilder.addParameter("of", "hx"); // BibTeX format
53+
uriBuilder.addParameter("q", query); // Query
6654
return uriBuilder.build().toURL();
6755
}
6856

6957
@Override
70-
public Parser getParser() {
71-
// Inspire returns the BibTeX result embedded in HTML
72-
// So we extract the BibTeX string from the <pre>bibtex</pre> tags and pass the content to the BibTeX parser
73-
return inputStream -> {
74-
String response = new BufferedReader(new InputStreamReader(inputStream)).lines().collect(Collectors.joining(OS.NEWLINE));
75-
76-
List<BibEntry> entries = new ArrayList<>();
77-
78-
Document doc = Jsoup.parse(response);
79-
Elements preElements = doc.getElementsByTag("pre");
80-
81-
for (Element elem : preElements) {
82-
// We have to use a new instance here, because otherwise only the first entry gets parsed
83-
BibtexParser bibtexParser = new BibtexParser(preferences, new DummyFileUpdateMonitor());
84-
List<BibEntry> entry = bibtexParser.parseEntries(elem.text());
85-
entries.addAll(entry);
86-
}
87-
return entries;
88-
};
58+
public URLDownload getUrlDownload(String query) throws MalformedURLException, FetcherException, URISyntaxException {
59+
URLDownload download = new URLDownload(getURLForQuery(query));
60+
download.addHeader("Accept", MediaTypes.APPLICATION_BIBTEX);
61+
return download;
8962
}
9063

9164
@Override
@@ -96,4 +69,9 @@ public void doPostCleanup(BibEntry entry) {
9669
// Remove braces around content of "title" field
9770
new FieldFormatterCleanup(StandardField.TITLE, new RemoveBracesFormatter()).cleanup(entry);
9871
}
72+
73+
@Override
74+
public Parser getParser() {
75+
return new BibtexParser(importFormatPreferences, new DummyFileUpdateMonitor());
76+
}
9977
}

src/test/java/org/jabref/logic/importer/fetcher/INSPIREFetcherTest.java

+23-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package org.jabref.logic.importer.fetcher;
22

3-
import java.util.Arrays;
3+
import java.util.Collections;
44
import java.util.List;
55

66
import org.jabref.logic.bibtex.FieldContentFormatterPreferences;
@@ -32,24 +32,6 @@ void setUp() {
3232

3333
@Test
3434
void searchByQueryFindsEntry() throws Exception {
35-
BibEntry phd = new BibEntry(StandardEntryType.PhdThesis);
36-
phd.setCiteKey("Diez:2019pkg");
37-
phd.setField(StandardField.AUTHOR, "Diez, Tobias");
38-
phd.setField(StandardField.TITLE, "Normal Form of Equivariant Maps and Singular Symplectic Reduction in Infinite Dimensions with Applications to Gauge Field Theory");
39-
phd.setField(StandardField.YEAR, "2019");
40-
phd.setField(StandardField.EPRINT, "1909.00744");
41-
phd.setField(new UnknownField("reportnumber"), "urn:nbn:de:bsz:15-qucosa2-352179");
42-
phd.setField(StandardField.ARCHIVEPREFIX, "arXiv");
43-
phd.setField(StandardField.PRIMARYCLASS, "math.SG");
44-
45-
BibEntry article = new BibEntry(StandardEntryType.Article);
46-
article.setCiteKey("Diez:2018gjz");
47-
article.setField(StandardField.AUTHOR, "Diez, Tobias and Rudolph, Gerd");
48-
article.setField(StandardField.TITLE, "Singular symplectic cotangent bundle reduction of gauge field theory");
49-
article.setField(StandardField.YEAR, "2018");
50-
article.setField(StandardField.EPRINT, "1812.04707");
51-
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
52-
article.setField(StandardField.PRIMARYCLASS, "math-ph");
5335

5436
BibEntry master = new BibEntry(StandardEntryType.MastersThesis);
5537
master.setCiteKey("Diez:2014ppa");
@@ -63,6 +45,27 @@ void searchByQueryFindsEntry() throws Exception {
6345

6446
List<BibEntry> fetchedEntries = fetcher.performSearch("Fr\\'echet group actions field");
6547

66-
assertEquals(Arrays.asList(phd, article, master), fetchedEntries);
48+
assertEquals(Collections.singletonList(master), fetchedEntries);
49+
}
50+
51+
@Test
52+
public void searchByIdentifierFindsEntry() throws Exception {
53+
BibEntry article = new BibEntry(StandardEntryType.Article);
54+
article.setCiteKey("Melnikov:1998pr");
55+
article.setField(StandardField.AUTHOR, "Melnikov, Kirill and Yelkhovsky, Alexander");
56+
article.setField(StandardField.TITLE, "Top quark production at threshold with O(alpha-s**2) accuracy");
57+
article.setField(StandardField.DOI, "10.1016/S0550-3213(98)00348-4");
58+
article.setField(StandardField.JOURNAL, "Nucl.\\ Phys.\\ B");
59+
article.setField(StandardField.PAGES, "59--72");
60+
article.setField(StandardField.VOLUME, "528");
61+
article.setField(StandardField.YEAR, "1998");
62+
article.setField(StandardField.EPRINT, "hep-ph/9802379");
63+
article.setField(StandardField.ARCHIVEPREFIX, "arXiv");
64+
article.setField(new UnknownField("reportnumber"), "BUDKER-INP-1998-7, TTP-98-10");
65+
66+
List<BibEntry> fetchedEntries = fetcher.performSearch("hep-ph/9802379");
67+
68+
assertEquals(Collections.singletonList(article), fetchedEntries);
69+
6770
}
6871
}

0 commit comments

Comments
 (0)