Skip to content

Commit

Permalink
Fix file /api/search endpoint when collections not configured as a po…
Browse files Browse the repository at this point in the history
…stgres view.

Fixes FAIRSPC-106
  • Loading branch information
ewelinagr committed Aug 30, 2024
1 parent 952178a commit a2496ef
Show file tree
Hide file tree
Showing 15 changed files with 567 additions and 173 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import io.fairspace.saturn.services.metadata.MetadataPermissions;
import io.fairspace.saturn.services.metadata.MetadataService;
import io.fairspace.saturn.services.metadata.validation.*;
import io.fairspace.saturn.services.search.FileSearchService;
import io.fairspace.saturn.services.search.JdbcFileSearchService;
import io.fairspace.saturn.services.search.SearchService;
import io.fairspace.saturn.services.search.SparqlFileSearchService;
import io.fairspace.saturn.services.users.UserService;
import io.fairspace.saturn.services.views.*;
import io.fairspace.saturn.services.workspaces.WorkspaceService;
Expand All @@ -31,6 +34,7 @@
import io.fairspace.saturn.webdav.blobstore.LocalBlobStore;

import static io.fairspace.saturn.config.ConfigLoader.CONFIG;
import static io.fairspace.saturn.services.views.ViewStoreClientFactory.protectedResources;
import static io.fairspace.saturn.vocabulary.Vocabularies.VOCABULARY;

@Log4j2
Expand All @@ -48,6 +52,7 @@ public class Services {
private final ViewService viewService;
private final QueryService queryService;
private final SearchService searchService;
private final FileSearchService fileSearchService;
private final BlobStore blobStore;
private final DavFactory davFactory;
private final HttpServlet davServlet;
Expand Down Expand Up @@ -115,6 +120,18 @@ public Services(
? new SparqlQueryService(config.search, viewsConfig, filteredDataset)
: new JdbcQueryService(
config.search, viewsConfig, viewStoreClientFactory, transactions, davFactory.root);

// File search should be done using JDBC for performance reasons. However, if the view store is not available,
// or collections and files view is not configured, we fall back to using SPARQL queries on the RDF database
// directly.
boolean useSparqlFileSearchService = viewStoreClientFactory == null
|| viewsConfig.views.stream().noneMatch(view -> protectedResources.containsAll(view.types));

fileSearchService = useSparqlFileSearchService
? new SparqlFileSearchService(filteredDataset)
: new JdbcFileSearchService(
config.search, viewsConfig, viewStoreClientFactory, transactions, davFactory.root);

viewService =
new ViewService(config, viewsConfig, filteredDataset, viewStoreClientFactory, metadataPermissions);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static Filter createSparkFilter(String apiPathPrefix, Services svc, Confi
new WorkspaceApp(apiPathPrefix + "/workspaces", svc.getWorkspaceService()),
new MetadataApp(apiPathPrefix + "/metadata", svc.getMetadataService()),
new ViewApp(apiPathPrefix + "/views", svc.getViewService(), svc.getQueryService()),
new SearchApp(apiPathPrefix + "/search", svc.getSearchService(), svc.getQueryService()),
new SearchApp(apiPathPrefix + "/search", svc.getSearchService(), svc.getFileSearchService()),
new VocabularyApp(apiPathPrefix + "/vocabulary"),
new UserApp(apiPathPrefix + "/users", svc.getUserService()),
new FeaturesApp(apiPathPrefix + "/features", config.features),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package io.fairspace.saturn.services.search;

import java.util.List;

public interface FileSearchService {
List<SearchResultDTO> searchFiles(FileSearchRequest request);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package io.fairspace.saturn.services.search;

import java.util.List;
import java.util.stream.Collectors;

import io.milton.resource.CollectionResource;
import lombok.SneakyThrows;
import lombok.extern.log4j.Log4j2;

import io.fairspace.saturn.config.Config;
import io.fairspace.saturn.config.ViewsConfig;
import io.fairspace.saturn.rdf.transactions.Transactions;
import io.fairspace.saturn.services.views.ViewStoreClientFactory;
import io.fairspace.saturn.services.views.ViewStoreReader;

import static io.fairspace.saturn.webdav.PathUtils.getCollectionNameByUri;

@Log4j2
public class JdbcFileSearchService implements FileSearchService {
private final Transactions transactions;
private final CollectionResource rootSubject;
private final Config.Search searchConfig;
private final ViewsConfig viewsConfig;
private final ViewStoreClientFactory viewStoreClientFactory;

public JdbcFileSearchService(
Config.Search searchConfig,
ViewsConfig viewsConfig,
ViewStoreClientFactory viewStoreClientFactory,
Transactions transactions,
CollectionResource rootSubject) {
this.searchConfig = searchConfig;
this.viewStoreClientFactory = viewStoreClientFactory;
this.transactions = transactions;
this.rootSubject = rootSubject;
this.viewsConfig = viewsConfig;
}

@SneakyThrows
public List<SearchResultDTO> searchFiles(FileSearchRequest request) {
var collectionsForUser = transactions.calculateRead(m -> rootSubject.getChildren().stream()
.map(collection -> getCollectionNameByUri(rootSubject.getUniqueId(), collection.getUniqueId()))
.collect(Collectors.toList()));

try (var viewStoreReader = new ViewStoreReader(searchConfig, viewsConfig, viewStoreClientFactory)) {
return viewStoreReader.searchFiles(request, collectionsForUser);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
package io.fairspace.saturn.services.search;

import io.fairspace.saturn.services.BaseApp;
import io.fairspace.saturn.services.views.QueryService;

import static org.eclipse.jetty.http.MimeTypes.Type.APPLICATION_JSON;
import static spark.Spark.post;

public class SearchApp extends BaseApp {
private final SearchService searchService;
private final QueryService queryService;
private final FileSearchService fileSearchService;

public SearchApp(String basePath, SearchService searchService, QueryService queryService) {
public SearchApp(String basePath, SearchService searchService, FileSearchService fileSearchService) {
super(basePath);
this.searchService = searchService;
this.queryService = queryService;
this.fileSearchService = fileSearchService;
}

@Override
protected void initApp() {
post("/files", (req, res) -> {
res.type(APPLICATION_JSON.asString());
var request = mapper.readValue(req.body(), FileSearchRequest.class);
var searchResult = queryService.searchFiles(request);
var searchResult = fileSearchService.searchFiles(request);

SearchResultsDTO resultDto = SearchResultsDTO.builder()
.results(searchResult)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package io.fairspace.saturn.services.search;

import java.util.List;

import lombok.extern.log4j.Log4j2;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QuerySolutionMap;

import io.fairspace.saturn.rdf.SparqlUtils;
import io.fairspace.saturn.vocabulary.FS;

import static io.fairspace.saturn.util.ValidationUtils.validateIRI;

import static org.apache.jena.rdf.model.ResourceFactory.createStringLiteral;

@Log4j2
public class SparqlFileSearchService implements FileSearchService {
private final Dataset ds;

public SparqlFileSearchService(Dataset ds) {
this.ds = ds;
}

public List<SearchResultDTO> searchFiles(FileSearchRequest request) {
var query = getSearchForFilesQuery(request.getParentIRI());
var binding = new QuerySolutionMap();
binding.add("regexQuery", createStringLiteral(SparqlUtils.getQueryRegex(request.getQuery())));
return SparqlUtils.getByQuery(query, binding, ds);
}

private Query getSearchForFilesQuery(String parentIRI) {
var builder = new StringBuilder("PREFIX fs: <")
.append(FS.NS)
.append(">\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\n")
.append("SELECT ?id ?label ?comment ?type\n")
.append("WHERE {\n");

if (parentIRI != null && !parentIRI.trim().isEmpty()) {
validateIRI(parentIRI);
builder.append("?id fs:belongsTo* <").append(parentIRI).append("> .\n");
}

builder.append("?id rdfs:label ?label ; a ?type .\n")
.append("FILTER (?type in (fs:File, fs:Directory, fs:Collection))\n")
.append("OPTIONAL { ?id rdfs:comment ?comment }\n")
.append("FILTER NOT EXISTS { ?id fs:dateDeleted ?anydate }\n")
.append("FILTER (regex(?label, ?regexQuery, \"i\") || regex(?comment, ?regexQuery, \"i\"))\n")
.append("}\nLIMIT 10000");

return QueryFactory.create(builder.toString());
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
package io.fairspace.saturn.services.views;

import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.sql.SQLTimeoutException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;

import io.milton.resource.CollectionResource;
Expand All @@ -19,8 +13,8 @@
import io.fairspace.saturn.config.ViewsConfig;
import io.fairspace.saturn.rdf.transactions.Transactions;
import io.fairspace.saturn.rdf.transactions.TxnIndexDatasetGraph;
import io.fairspace.saturn.services.search.FileSearchRequest;
import io.fairspace.saturn.services.search.SearchResultDTO;

import static io.fairspace.saturn.webdav.PathUtils.getCollectionNameByUri;

import static java.lang.Integer.min;

Expand Down Expand Up @@ -52,12 +46,6 @@ public JdbcQueryService(
this.viewsConfig = viewsConfig;
}

public String getCollectionName(String uri) {
var rootLocation = rootSubject.getUniqueId() + "/";
var location = uri.substring(rootLocation.length());
return URLDecoder.decode(location.split("/")[0], StandardCharsets.UTF_8);
}

ViewStoreReader getViewStoreReader() throws SQLException {
return new ViewStoreReader(searchConfig, viewsConfig, viewStoreClientFactory);
}
Expand All @@ -71,14 +59,14 @@ protected void applyCollectionsFilterIfRequired(String view, List<ViewFilter> fi
return;
}
var collections = transactions.calculateRead(m -> rootSubject.getChildren().stream()
.map(collection -> (Object) getCollectionName(collection.getUniqueId()))
.map(collection -> (Object) getCollectionNameByUri(rootSubject.getUniqueId(), collection.getUniqueId()))
.collect(Collectors.toList()));
if (filters.stream().anyMatch(filter -> filter.getField().equalsIgnoreCase("Resource_collection"))) {
// Update existing filters in place
filters.stream()
.filter(filter -> filter.getField().equalsIgnoreCase("Resource_collection"))
.forEach(filter -> filter.setValues(filter.values.stream()
.map(value -> getCollectionName(value.toString()))
.map(value -> getCollectionNameByUri(rootSubject.getUniqueId(), value.toString()))
.filter(collections::contains)
.collect(Collectors.toList())));
return;
Expand Down Expand Up @@ -131,15 +119,4 @@ public CountDTO count(CountRequest request) {
return new CountDTO(0, true);
}
}

@SneakyThrows
public List<SearchResultDTO> searchFiles(FileSearchRequest request) {
var collectionsForUser = transactions.calculateRead(m -> rootSubject.getChildren().stream()
.map(collection -> getCollectionName(collection.getUniqueId()))
.collect(Collectors.toList()));

try (var viewStoreReader = getViewStoreReader()) {
return viewStoreReader.searchFiles(request, collectionsForUser);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
package io.fairspace.saturn.services.views;

import java.util.List;

import io.fairspace.saturn.services.search.FileSearchRequest;
import io.fairspace.saturn.services.search.SearchResultDTO;

/**
* High-level interface for fetching metadata view pages and counts.
* Implemented using Sparql queries on the RDF database directly
Expand All @@ -22,6 +17,4 @@ public interface QueryService {
ViewPageDTO retrieveViewPage(ViewRequest request);

CountDTO count(CountRequest request);

List<SearchResultDTO> searchFiles(FileSearchRequest request);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.apache.jena.query.QueryCancelledException;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QuerySolutionMap;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
Expand All @@ -38,9 +37,6 @@
import io.fairspace.saturn.config.ViewsConfig;
import io.fairspace.saturn.config.ViewsConfig.ColumnType;
import io.fairspace.saturn.config.ViewsConfig.View;
import io.fairspace.saturn.rdf.SparqlUtils;
import io.fairspace.saturn.services.search.FileSearchRequest;
import io.fairspace.saturn.services.search.SearchResultDTO;
import io.fairspace.saturn.vocabulary.FS;

import static io.fairspace.saturn.rdf.ModelUtils.getResourceProperties;
Expand All @@ -54,7 +50,6 @@
import static java.util.stream.Collectors.toSet;
import static org.apache.jena.graph.NodeFactory.createURI;
import static org.apache.jena.rdf.model.ResourceFactory.createProperty;
import static org.apache.jena.rdf.model.ResourceFactory.createStringLiteral;
import static org.apache.jena.sparql.expr.NodeValue.makeBoolean;
import static org.apache.jena.sparql.expr.NodeValue.makeDate;
import static org.apache.jena.sparql.expr.NodeValue.makeDecimal;
Expand Down Expand Up @@ -158,13 +153,6 @@ private Map<String, Set<ValueDTO>> fetch(Resource resource, String viewName) {
return result;
}

public List<SearchResultDTO> searchFiles(FileSearchRequest request) {
var query = getSearchForFilesQuery(request.getParentIRI());
var binding = new QuerySolutionMap();
binding.add("regexQuery", createStringLiteral(SparqlUtils.getQueryRegex(request.getQuery())));
return SparqlUtils.getByQuery(query, binding, ds);
}

private Set<ValueDTO> getValues(Resource resource, View.Column column) {
return new TreeSet<>(resource.listProperties(createProperty(column.source))
.mapWith(Statement::getObject)
Expand Down Expand Up @@ -384,28 +372,6 @@ private View.Column getColumn(String name) {
});
}

private Query getSearchForFilesQuery(String parentIRI) {
var builder = new StringBuilder("PREFIX fs: <")
.append(FS.NS)
.append(">\nPREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\n")
.append("SELECT ?id ?label ?comment ?type\n")
.append("WHERE {\n");

if (parentIRI != null && !parentIRI.trim().isEmpty()) {
validateIRI(parentIRI);
builder.append("?id fs:belongsTo* <").append(parentIRI).append("> .\n");
}

builder.append("?id rdfs:label ?label ; a ?type .\n")
.append("FILTER (?type in (fs:File, fs:Directory, fs:Collection))\n")
.append("OPTIONAL { ?id rdfs:comment ?comment }\n")
.append("FILTER NOT EXISTS { ?id fs:dateDeleted ?anydate }\n")
.append("FILTER (regex(?label, ?regexQuery, \"i\") || regex(?comment, ?regexQuery, \"i\"))\n")
.append("}\nLIMIT 10000");

return QueryFactory.create(builder.toString());
}

private static Calendar convertDateValue(String value) {
var calendar = Calendar.getInstance();
calendar.setTimeInMillis(Instant.parse(value).toEpochMilli());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ private FacetDTO getFacetInfo(View view, View.Column column) {
}
case Number, Date -> {
if (viewStoreClientFactory != null) {
var range = getColumnRange(view, column);
var range = getViewStoreColumnRange(view, column);
if (range != null) {
min = range.getStart();
max = range.getEnd();
Expand Down Expand Up @@ -278,7 +278,7 @@ private Object convertLiteralValue(Object value) {
}

@SneakyThrows
private Range getColumnRange(View view, View.Column column) {
private Range getViewStoreColumnRange(View view, View.Column column) {
if (!EnumSet.of(ColumnType.Date, ColumnType.Number).contains(column.type)) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.fairspace.saturn.webdav;

import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;

import io.milton.http.exceptions.BadRequestException;

import static org.apache.commons.lang3.StringUtils.strip;
Expand Down Expand Up @@ -37,4 +40,10 @@ public static void validateCollectionName(String name) throws BadRequestExceptio
throw new BadRequestException("The collection name contains an illegal character (\\)");
}
}

public static String getCollectionNameByUri(String rootSubjectUri, String uri) {
var rootLocation = rootSubjectUri + "/";
var location = uri.substring(rootLocation.length());
return URLDecoder.decode(location.split("/")[0], StandardCharsets.UTF_8);
}
}
Loading

0 comments on commit a2496ef

Please sign in to comment.