diff --git a/jablib/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java b/jablib/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java index 59527031e4c..0975c25ae1d 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java +++ b/jablib/src/main/java/org/jabref/logic/importer/util/FileFieldParser.java @@ -173,7 +173,7 @@ static LinkedFile convert(List entry) { // there is no Path.isValidPath(String) method field = new LinkedFile(entry.getFirst(), Path.of(pathStr), entry.get(2)); } catch (InvalidPathException e) { - // Ignored + // Use string if path is invalid LOGGER.debug("Invalid path object, continuing with string", e); field = new LinkedFile(entry.getFirst(), pathStr, entry.get(2)); } diff --git a/jablib/src/main/java/org/jabref/logic/util/URLUtil.java b/jablib/src/main/java/org/jabref/logic/util/URLUtil.java index 2e1e586911a..8a8c7772254 100644 --- a/jablib/src/main/java/org/jabref/logic/util/URLUtil.java +++ b/jablib/src/main/java/org/jabref/logic/util/URLUtil.java @@ -17,6 +17,8 @@ * For GUI-oriented URL utilities see {@link org.jabref.gui.fieldeditors.URLUtil}. */ public class URLUtil { + private static final String PROTOCOL_SEPARATOR = "://"; + private static final Pattern SCHEME_PREFIX = Pattern.compile("^[a-zA-Z]+://.*"); private static final String URL_REGEX = "(?i)\\b((?:https?|ftp)://[^\\s]+)"; @@ -79,31 +81,46 @@ public static String cleanGoogleSearchURL(String url) { /** * Checks whether the given String is a URL. *

- * Currently only checks for a protocol String. + * A valid URL must have a scheme (e.g., "http", "https", + * "ftp") and an authority (e.g., "example.com"). + * See + * URL Syntax for details. * * @param url the String to check for a URL * @return true if url contains a valid URL */ public static boolean isURL(String url) { + if (!SCHEME_PREFIX.matcher(url).matches()) { + return false; + } try { - create(url); + createUri(url); return true; - } catch (MalformedURLException | IllegalArgumentException e) { + } catch (IllegalArgumentException e) { return false; } } /** * Creates a {@link URL} object from the given string URL. + *

+ * If the given URL string does not contain a protocol (e.g., "example.com"), + * "https://" is automatically added to ensure a valid URL format. + * This prevents errors when handling URLs without explicit protocols. * * @param url the URL string to be converted into a {@link URL}. * @return the {@link URL} object created from the string URL. * @throws MalformedURLException if the URL is malformed and cannot be converted to a {@link URL}. */ public static URL create(String url) throws MalformedURLException { - return createUri(url).toURL(); - } + if (!url.contains(PROTOCOL_SEPARATOR)) { + url = "https://" + url; + } + URI uri = createUri(url); + return uri.toURL(); + } + /** * Creates a {@link URI} object from the given string URL. * This method attempts to convert the given URL string into a {@link URI} object. diff --git a/jablib/src/test/java/org/jabref/logic/net/URLUtilTest.java b/jablib/src/test/java/org/jabref/logic/net/URLUtilTest.java index 1728c8c898e..104709407ea 100644 --- a/jablib/src/test/java/org/jabref/logic/net/URLUtilTest.java +++ b/jablib/src/test/java/org/jabref/logic/net/URLUtilTest.java @@ -1,10 +1,13 @@ package org.jabref.logic.net; import java.net.URI; +import java.net.URL; import org.jabref.logic.util.URLUtil; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -87,4 +90,29 @@ void createUriShouldHandlePipeCharacter() { URI uri = URLUtil.createUri(input); assertEquals("http://example.com/test%7Cfile", uri.toString()); } + + @ParameterizedTest + @CsvSource({ + // Relative URLs (should default to HTTPS) + "www.example.com, https://www.example.com", + "example.com, https://example.com", + + // Absolute URLs (should remain unchanged) + "http://www.example.com, http://www.example.com", + "https://www.example.com, https://www.example.com" + }) + void createShouldHandleURLs(String expected, String input) throws Exception { + URL url = URLUtil.create(input); + assertEquals(expected, url.toString()); + } + + @ParameterizedTest + @CsvSource({ + "ftp://example.com, ftp://example.com", + "file:///path/to/file, file:/path/to/file" + }) + void createShouldHandleOtherProtocols(String expectedUrl, String inputUrl) throws Exception { + URL actualUrl = URLUtil.create(inputUrl); + assertEquals(expectedUrl, actualUrl.toString()); + } } diff --git a/src/test/java/org/jabref/logic/net/URLUtilTest.java b/src/test/java/org/jabref/logic/net/URLUtilTest.java new file mode 100644 index 00000000000..c7617e34a17 --- /dev/null +++ b/src/test/java/org/jabref/logic/net/URLUtilTest.java @@ -0,0 +1,119 @@ +package org.jabref.logic.net; + +import java.net.URI; +import java.net.URL; + +import org.jabref.logic.util.URLUtil; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class URLUtilTest { + + @Test + void cleanGoogleSearchURL() throws Exception { + // empty text + assertEquals("", URLUtil.cleanGoogleSearchURL("")); + assertEquals(" ", URLUtil.cleanGoogleSearchURL(" ")); + // no URL + assertEquals("this is no url!", URLUtil.cleanGoogleSearchURL("this is no url!")); + // no Google search URL + assertEquals("http://dl.acm.org/citation.cfm?id=321811", URLUtil.cleanGoogleSearchURL("http://dl.acm.org/citation.cfm?id=321811")); + // malformed Google URL + assertEquals("https://www.google.de/url♥", URLUtil.cleanGoogleSearchURL("https://www.google.de/url♥")); + // no queries + assertEquals("https://www.google.de/url", URLUtil.cleanGoogleSearchURL("https://www.google.de/url")); + assertEquals("https://www.google.de/url?", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?")); + // no multiple queries + assertEquals("https://www.google.de/url?key=value", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?key=value")); + // no key values + assertEquals("https://www.google.de/url?key", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?key")); + assertEquals("https://www.google.de/url?url", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?url")); + assertEquals("https://www.google.de/url?key=", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?key=")); + // no url param + assertEquals("https://www.google.de/url?key=value&key2=value2", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?key=value&key2=value2")); + // no url param value + assertEquals("https://www.google.de/url?url=", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?url=")); + // url param value no URL + assertEquals("https://www.google.de/url?url=this+is+no+url", URLUtil.cleanGoogleSearchURL("https://www.google.de/url?url=this+is+no+url")); + // Http + assertEquals( + "http://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("http://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=http%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + // Https + assertEquals( + "https://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("https://www.google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=https%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + // root domain + assertEquals( + "https://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("https://google.de/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=https%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + // foreign domain + assertEquals( + "https://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("https://www.google.fr/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=https%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + // foreign domain co.uk + assertEquals( + "https://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("https://www.google.co.uk/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=https%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + // accept ftp results + assertEquals( + "ftp://moz.com/ugc/the-ultimate-guide-to-the-google-search-parameters", + URLUtil.cleanGoogleSearchURL("https://www.google.fr/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CCEQFjAAahUKEwjJurHd2sfHAhWBsxQKHSrEAaM&url=ftp%3A%2F%2Fmoz.com%2Fugc%2Fthe-ultimate-guide-to-the-google-search-parameters&ei=0THeVYmOJIHnUqqIh5gK&usg=AFQjCNHnid_r_d2LP8_MqvI7lQnTC3lB_g&sig2=ICzxDroG2ENTJSUGmdhI2w")); + } + + @Test + void isURLshouldAcceptValidURL() { + assertTrue(URLUtil.isURL("http://www.google.com")); + assertTrue(URLUtil.isURL("https://www.google.com")); + } + + @Test + void isURLshouldRejectInvalidURL() { + assertFalse(URLUtil.isURL("www.google.com")); + assertFalse(URLUtil.isURL("google.com")); + } + + @Test + void isURLshouldRejectEmbeddedURL() { + boolean result = URLUtil.isURL("dblp computer science bibliography, http://dblp.org"); + assertEquals(false, result); + } + + @Test + void createUriShouldHandlePipeCharacter() { + String input = "http://example.com/test|file"; + URI uri = URLUtil.createUri(input); + assertEquals("http://example.com/test%7Cfile", uri.toString()); + } + + @ParameterizedTest + @CsvSource({ + // Relative URLs (should default to HTTPS) + "www.example.com, https://www.example.com", + "example.com, https://example.com", + + // Absolute URLs (should remain unchanged) + "http://www.example.com, http://www.example.com", + "https://www.example.com, https://www.example.com" + }) + void createShouldHandleURLs(String expected, String input) throws Exception { + URL url = URLUtil.create(input); + assertEquals(expected, url.toString()); + } + + @ParameterizedTest + @CsvSource({ + "ftp://example.com, ftp://example.com", + "file:///path/to/file, file:/path/to/file" + }) + void createShouldHandleOtherProtocols(String expectedUrl, String inputUrl) throws Exception { + URL actualUrl = URLUtil.create(inputUrl); + assertEquals(expectedUrl, actualUrl.toString()); + } +}