From de790f92227efe01a80350638b78a5a3460e71ab Mon Sep 17 00:00:00 2001
From: Stefan Kolb <stefan-kolb@web.de>
Date: Sun, 15 Mar 2020 15:06:04 +0100
Subject: [PATCH 1/3] Improve ACS fetcher

---
 .../jabref/logic/importer/fetcher/ACS.java    | 25 ++++++++++---------
 .../logic/importer/fetcher/ACSTest.java       | 11 +++++++-
 2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
index 2fa068e1eb4..b2fff10f38f 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
@@ -26,7 +26,7 @@ public class ACS implements FulltextFetcher {
 
     /**
      * Tries to find a fulltext URL for a given BibTex entry.
-     *
+     * <p>
      * Currently only uses the DOI if found.
      *
      * @param entry The Bibtex entry
@@ -37,23 +37,24 @@ public class ACS implements FulltextFetcher {
     @Override
     public Optional<URL> findFullText(BibEntry entry) throws IOException {
         Objects.requireNonNull(entry);
-        Optional<URL> pdfLink = Optional.empty();
 
         // DOI search
         Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);
 
-        if (doi.isPresent()) {
-            String source = String.format(SOURCE, doi.get().getDOI());
-            // Retrieve PDF link
-            Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
-            Element link = html.select("a.button_primary").first();
+        if (!doi.isPresent()) {
+            return Optional.empty();
+        }
+
+        String source = String.format(SOURCE, doi.get().getDOI());
+        // Retrieve PDF link
+        Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
+        Element link = html.select("a.button_primary").first();
 
-            if (link != null) {
-                LOGGER.info("Fulltext PDF found @ ACS.");
-                pdfLink = Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
-            }
+        if (link != null) {
+            LOGGER.info("Fulltext PDF found @ ACS.");
+            return Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
         }
-        return pdfLink;
+        return Optional.empty();
     }
 
     @Override
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ACSTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ACSTest.java
index 8a58fd47819..2552b5cc735 100644
--- a/src/test/java/org/jabref/logic/importer/fetcher/ACSTest.java
+++ b/src/test/java/org/jabref/logic/importer/fetcher/ACSTest.java
@@ -16,7 +16,6 @@
 
 @FetcherTest
 class ACSTest {
-
     private ACS finder;
     private BibEntry entry;
 
@@ -44,4 +43,14 @@ void notFoundByDOI() throws IOException {
 
         assertEquals(Optional.empty(), finder.findFullText(entry));
     }
+
+    @Test
+    void entityWithoutDoi() throws IOException {
+        assertEquals(Optional.empty(), finder.findFullText(entry));
+    }
+
+    @Test
+    void trustLevel() {
+        assertEquals(TrustLevel.PUBLISHER, finder.getTrustLevel());
+    }
 }

From 8f68de937e53213bb3e778230c9b91a515f29ac9 Mon Sep 17 00:00:00 2001
From: Stefan Kolb <stefan-kolb@web.de>
Date: Sun, 15 Mar 2020 15:13:05 +0100
Subject: [PATCH 2/3] Improve arXiv fetcher

---
 .../java/org/jabref/logic/importer/fetcher/ArXiv.java |  4 +---
 .../org/jabref/logic/importer/fetcher/ArXivTest.java  | 11 ++++++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
index de3539d8184..28a521a8022 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
@@ -74,10 +74,8 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
                                                           .filter(Optional::isPresent)
                                                           .map(Optional::get)
                                                           .findFirst();
+            pdfUrl.ifPresent(url -> LOGGER.info("Fulltext PDF found @ arXiv."));
 
-            if (pdfUrl.isPresent()) {
-                LOGGER.info("Fulltext PDF found @ arXiv.");
-            }
             return pdfUrl;
         } catch (FetcherException e) {
             LOGGER.warn("arXiv API request failed", e);
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java
index 523cf9f651a..8bee0365c5b 100644
--- a/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java
+++ b/src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java
@@ -23,7 +23,6 @@
 
 @FetcherTest
 class ArXivTest {
-
     private ArXiv finder;
     private BibEntry entry;
     private BibEntry sliceTheoremPaper;
@@ -121,6 +120,16 @@ void findFullTextByDOINotAvailableInCatalog() throws IOException {
         assertEquals(Optional.empty(), finder.findFullText(entry));
     }
 
+    @Test
+    void findFullTextEntityWithoutDoi() throws IOException {
+        assertEquals(Optional.empty(), finder.findFullText(entry));
+    }
+
+    @Test
+    void findFullTextTrustLevel() {
+        assertEquals(TrustLevel.PREPRINT, finder.getTrustLevel());
+    }
+
     @Test
     void searchEntryByPartOfTitle() throws Exception {
         assertEquals(Collections.singletonList(sliceTheoremPaper),

From 0f93e2244d6500f1a6154c8433f2e2ad605b7939 Mon Sep 17 00:00:00 2001
From: Stefan Kolb <stefan-kolb@web.de>
Date: Sun, 15 Mar 2020 18:15:44 +0100
Subject: [PATCH 3/3] Minor improvements for Google scholar fetcher

Detect Google captcha div.
Follow up PRs might show or give more info to the user.
---
 .../logic/importer/fetcher/GoogleScholar.java | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
index 4e28444ff28..a8850bd9822 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/GoogleScholar.java
@@ -35,7 +35,7 @@
 
 /**
  * FulltextFetcher implementation that attempts to find a PDF URL at GoogleScholar.
- *
+ * <p>
  * Search String infos: https://scholar.google.com/intl/en/scholar/help.html#searching
  */
 public class GoogleScholar implements FulltextFetcher, SearchBasedFetcher {
@@ -58,11 +58,10 @@ public GoogleScholar(ImportFormatPreferences importFormatPreferences) {
     @Override
     public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherException {
         Objects.requireNonNull(entry);
-        Optional<URL> pdfLink = Optional.empty();
 
         // Search in title
         if (!entry.hasField(StandardField.TITLE)) {
-            return pdfLink;
+            return Optional.empty();
         }
 
         try {
@@ -74,12 +73,10 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherExc
             // as_occt field to search in
             uriBuilder.addParameter("as_occt", "title");
 
-            pdfLink = search(uriBuilder.toString());
+            return search(uriBuilder.toString());
         } catch (URISyntaxException e) {
             throw new FetcherException("Building URI failed.", e);
         }
-
-        return pdfLink;
     }
 
     @Override
@@ -91,6 +88,11 @@ private Optional<URL> search(String url) throws IOException {
         Optional<URL> pdfLink = Optional.empty();
 
         Document doc = Jsoup.connect(url).userAgent(URLDownload.USER_AGENT).get();
+
+        if (needsCaptcha(doc.body().html())) {
+            LOGGER.warn("Hit Google traffic limitation. Captcha prevents automatic fetching.");
+            return Optional.empty();
+        }
         // Check results for PDF link
         // TODO: link always on first result or none?
         for (int i = 0; i < NUM_RESULTS; i++) {
@@ -111,6 +113,10 @@ private Optional<URL> search(String url) throws IOException {
         return pdfLink;
     }
 
+    private boolean needsCaptcha(String body) {
+        return body.contains("id=\"gs_captcha_ccl\"");
+    }
+
     @Override
     public String getName() {
         return "Google Scholar";
@@ -158,6 +164,11 @@ public List<BibEntry> performSearch(String query) throws FetcherException {
     private void addHitsFromQuery(List<BibEntry> entryList, String queryURL) throws IOException, FetcherException {
         String content = new URLDownload(queryURL).asString();
 
+        if (needsCaptcha(content)) {
+            throw new FetcherException("Fetching from Google Scholar failed.",
+                    Localization.lang("This might be caused by reaching the traffic limitation of Google Scholar (see 'Help' for details)."), null);
+        }
+
         Matcher matcher = LINK_TO_BIB_PATTERN.matcher(content);
         while (matcher.find()) {
             String citationsPageURL = matcher.group().replace("&amp;", "&");