Skip to content

Commit

Permalink
Harvester / CSW / Add XPath filter (#4066)
Browse files Browse the repository at this point in the history
  • Loading branch information
fxprunayre committed Oct 3, 2019
1 parent 3fe9f7d commit 68ae2e1
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -295,17 +295,17 @@ public void destroy() throws Exception {

final IMetadataUtils metadataRepository = context.getBean(IMetadataUtils.class);
final SourceRepository sourceRepository = context.getBean(SourceRepository.class);

final Specifications<? extends AbstractMetadata> ownedByHarvester = Specifications.where(MetadataSpecs.hasHarvesterUuid(getParams().getUuid()));
Set<String> sources = new HashSet<String>();
for (Integer id : metadataRepository.findAllIdsBy(ownedByHarvester)) {
sources.add(metadataUtils.findOne(id).getSourceInfo().getSourceId());
metadataManager.deleteMetadata(context, "" + id);
}

// Remove all sources related to the harvestUuid if they are not linked to any record anymore
for (String sourceUuid : sources) {
Long ownedBySource =
Long ownedBySource =
metadataRepository.count(Specifications.where(MetadataSpecs.hasSource(sourceUuid)));
if (ownedBySource == 0 && !sourceUuid.equals(params.getUuid()) && sourceRepository.exists(sourceUuid)) {
removeIcon(sourceUuid);
Expand Down Expand Up @@ -865,7 +865,7 @@ protected void storeNode(AbstractParams params, String path) throws SQLException

harvesterSettingsManager.add(ID_PREFIX + contentId, "importxslt", params.getImportXslt());
harvesterSettingsManager.add(ID_PREFIX + contentId, "validate", params.getValidate());

//--- setup stats node ----------------------------------------

harvesterSettingsManager.add(ID_PREFIX + infoId, "lastRun", "");
Expand Down Expand Up @@ -942,6 +942,7 @@ public Element getResult() {
add(res, "collectionDatasetRecords", result.collectionDatasetRecords);
add(res, "datasetUuidExist", result.datasetUuidExist);
add(res, "doesNotValidate", result.doesNotValidate);
add(res, "xpathFilterExcluded", result.xpathFilterExcluded);
add(res, "duplicatedResource", result.duplicatedResource);
add(res, "fragmentsMatched", result.fragmentsMatched);
add(res, "fragmentsReturned", result.fragmentsReturned);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class HarvestResult {
public int couldNotInsert;
public int datasetUuidExist; // = uuid already in catalogue
public int doesNotValidate; // = 0 cos' not validated
public int xpathFilterExcluded;
public int duplicatedResource;
public int fragmentsMatched; // = fragments matched in md templates
public int fragmentsReturned; // = fragments generated
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -40,6 +41,7 @@
import javax.transaction.Transactional;
import javax.transaction.Transactional.TxType;

import org.apache.commons.lang.StringUtils;
import org.fao.geonet.GeonetContext;
import org.fao.geonet.Logger;
import org.fao.geonet.constants.Geonet;
Expand Down Expand Up @@ -72,6 +74,7 @@
import org.fao.geonet.repository.OperationAllowedRepository;
import org.fao.geonet.utils.Xml;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.xpath.XPath;

import jeeves.server.context.ServiceContext;
Expand All @@ -87,11 +90,7 @@ public class Aligner extends BaseAligner<CswParams> {
private IMetadataUtils metadataUtils;
private IMetadataManager metadataManager;
private IMetadataIndexer metadataIndexer;
//--------------------------------------------------------------------------
//---
//--- Variables
//---
//--------------------------------------------------------------------------

private HarvestResult result;
private GetRecordByIdRequest request;
private String processName;
Expand All @@ -113,6 +112,7 @@ public Aligner(AtomicBoolean cancelMonitor, ServiceContext sc, CswServer server,
result.unretrievable = 0;
result.uuidSkipped = 0;
result.couldNotInsert = 0;
result.xpathFilterExcluded = 0;

//--- setup get-record-by-id request

Expand Down Expand Up @@ -183,11 +183,11 @@ public HarvestResult align(Collection<RecordInfo> records, Collection<HarvestErr

private void insertOrUpdate(Collection<RecordInfo> records, Collection<HarvestError> errors) {
for (RecordInfo ri : records) {

if (cancelMonitor.get()) {
return;
}

try {

String id = metadataUtils.getMetadataId(ri.uuid);
Expand Down Expand Up @@ -221,9 +221,9 @@ private void insertOrUpdate(Collection<RecordInfo> records, Collection<HarvestEr
updateMetadata(ri, id, false);

}

context.getBean(LuceneIndexLanguageTracker.class).commit();

result.totalMetadata++;
} catch (Throwable t) {
errors.add(new HarvestError(this.context, t));
Expand All @@ -238,13 +238,13 @@ private void insertOrUpdate(Collection<RecordInfo> records, Collection<HarvestEr

/**
* Remove records no longer on the remote CSW server
*
*
* @param records
* @throws Exception
*/
@Transactional(value=TxType.REQUIRES_NEW)
public HarvestResult cleanupRemovedRecords(Set<String> records) throws Exception {

if (cancelMonitor.get()) {
return result;
}
Expand All @@ -258,7 +258,7 @@ public HarvestResult cleanupRemovedRecords(Set<String> records) throws Exception
}
}
dataMan.forceIndexChanges();

return result;
}

Expand All @@ -276,14 +276,21 @@ private void addMetadata(RecordInfo ri, String uuid) throws Exception {
}

String schema = dataMan.autodetectSchema(md, null);

if (schema == null) {
log.debug(" - Metadata skipped due to unknown schema. uuid:" + ri.uuid);
result.unknownSchema++;

return;
}

if (StringUtils.isNotEmpty(params.xpathFilter)) {
Object xpathResult = Xml.selectSingle(md, params.xpathFilter, new ArrayList<Namespace>(dataMan.getSchema(schema).getNamespaces()));
boolean match = xpathResult instanceof Boolean && ((Boolean) xpathResult).booleanValue();
if(!match) {
result.xpathFilterExcluded ++;
return;
}
}

log.debug(" - Adding metadata with remote uuid:" + ri.uuid + " schema:" + schema);

String mdUuid = ri.uuid;
Expand All @@ -295,7 +302,7 @@ private void addMetadata(RecordInfo ri, String uuid) throws Exception {
mdUuid = ri.uuid;
}
}

//
// insert metadata
//
Expand Down Expand Up @@ -362,7 +369,7 @@ private boolean updatingLocalMetadata(RecordInfo ri, String id, Boolean force) t
result.unchangedMetadata++;
return false;
}

if (!params.xslfilter.equals("")) {
md = processMetadata(context, md, processName, processParams);
}
Expand All @@ -376,7 +383,7 @@ private boolean updatingLocalMetadata(RecordInfo ri, String id, Boolean force) t
String language = context.getLanguage();

final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, md, validate, ufo, index, language, ri.changeDate, true);

if(force) {
//change ownership of metadata to new harvester
metadata.getHarvestInfo().setUuid(params.getUuid());
Expand All @@ -392,10 +399,10 @@ private boolean updatingLocalMetadata(RecordInfo ri, String id, Boolean force) t

metadata.getCategories().clear();
addCategories(metadata, params.getCategories(), localCateg, context, null, true);

return true;
}

/**
* Does CSW GetRecordById request. If validation is requested and the metadata does not
* validate, null is returned.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ protected void storeNodeExtra(AbstractParams p, String path, String siteId, Stri
harvesterSettingsManager.add("id:" + siteId, "rejectDuplicateResource", params.rejectDuplicateResource);
harvesterSettingsManager.add("id:" + siteId, "queryScope", params.queryScope);
harvesterSettingsManager.add("id:" + siteId, "hopCount", params.hopCount);
harvesterSettingsManager.add("id:" + siteId, "xpathFilter", params.xpathFilter);
harvesterSettingsManager.add("id:" + siteId, "xslfilter", params.xslfilter);
harvesterSettingsManager.add("id:" + siteId, "outputSchema", params.outputSchema);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,32 +36,21 @@
*
*/
public class CswParams extends AbstractParams {
//--------------------------------------------------------------------------
//---
//--- Constructor
//---
//--------------------------------------------------------------------------

public String capabUrl;

public String icon;
public String outputSchema;

//---------------------------------------------------------------------------
//---
//--- Other API methods
//---
//---------------------------------------------------------------------------
public String outputSchema;

//public Iterable<Element> getSearchElements() { return eltSearches; }
public boolean rejectDuplicateResource;

//---------------------------------------------------------------------------
//---
//--- Variables
//---
//---------------------------------------------------------------------------
public String queryScope;

public String xpathFilter;

public Integer hopCount;

/**
* The filter is a process (see schema/process folder) which depends on the schema. It could be
* composed of parameter which will be sent to XSL transformation using the following syntax :
Expand Down Expand Up @@ -90,6 +79,7 @@ public void create(Element node) throws BadInputEx {
queryScope = Util.getParam(site, "queryScope", "local");
hopCount = Util.getParam(site, "hopCount", 2);
xslfilter = Util.getParam(site, "xslfilter", "");
xpathFilter = Util.getParam(site, "xpathFilter", "");
outputSchema = Util.getParam(site, "outputSchema", outputSchema);
icon = Util.getParam(site, "icon", "default.gif");

Expand Down Expand Up @@ -119,6 +109,7 @@ public void update(Element node) throws BadInputEx {
rejectDuplicateResource = Util.getParam(site, "rejectDuplicateResource", rejectDuplicateResource);
queryScope = Util.getParam(site, "queryScope", queryScope);
hopCount = Util.getParam(site, "hopCount", hopCount);
xpathFilter = Util.getParam(site, "xpathFilter", "");
xslfilter = Util.getParam(site, "xslfilter", "");
outputSchema = Util.getParam(site, "outputSchema", outputSchema);

Expand Down Expand Up @@ -150,6 +141,7 @@ public CswParams copy() {
copy.rejectDuplicateResource = rejectDuplicateResource;
copy.queryScope = queryScope;
copy.hopCount = hopCount;
copy.xpathFilter = xpathFilter;
copy.xslfilter = xslfilter;
copy.outputSchema = outputSchema;

Expand Down
2 changes: 2 additions & 0 deletions web-ui/src/main/resources/catalog/locales/en-admin.json
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@
"csw-dublinCore": "Dublin Core",
"csw-iso19139": "ISO 19139",
"csw-recommendedValues": "Recommended values",
"xpathFilter": "XPath filter",
"xpathFilter-help": "When record is retrived from remote server, check an XPath expression to accept or not the record. The XPath must use namespaces of the schema of the record (eg. gmd, gco, srv for ISO19139) and must return a boolean value. For example, to filter record with status = completed 'count(.//gmd:status/*[@codeListValue = 'completed']) > 0'.",
"currentCatalogLogo": "Current catalog logo",
"customConfiguration": "Custom ...",
"customizeElementSet": "Customize element set",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@
</div>
</fieldset>


<hr/>
<div id="gn-harvest-settings-csw-xpath-filter-title-row">
<label id="gn-harvest-settings-csw-xpath-filter-title-label" class="control-label col-lg-4" data-translate="">xpathFilter</label>
<div class="col-lg-8">
<input id="gn-harvest-settings-csw-xpath-filter-title-input"
type="text"
class="form-control"
data-ng-model="harvesterSelected.site.xpathFilter"/>
</div>
<p class="help-block" data-translate="">xpathFilter-help</p>
</div>

</fieldset>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var gnHarvestercsw = {
"password" : []
},
"capabilitiesUrl" : "http://",
"xpathFilter" : "",
"rejectDuplicateResource" : false,
"xslfilter": [],
"outputSchema": "",
Expand Down Expand Up @@ -71,7 +72,7 @@ var gnHarvestercsw = {
var body = '<node id="' + h['@id'] + '" '
+ ' type="' + h['@type'] + '">'
+ ' <ownerGroup><id>' + h.ownerGroup[0] + '</id></ownerGroup>'
+ ' <ownerUser><id>' + h.ownerUser[0] + '</id></ownerUser>'
+ ' <ownerUser><id>' + h.ownerUser[0] + '</id></ownerUser>'
+ ' <site>'
+ ' <name>' + h.site.name + '</name>'
+ ' <rejectDuplicateResource>' + h.site.rejectDuplicateResource + '</rejectDuplicateResource>'
Expand All @@ -82,6 +83,7 @@ var gnHarvestercsw = {
+ ' <username>' + h.site.account.username + '</username>'
+ ' <password>' + h.site.account.password + '</password>'
+ ' </account>'
+ ' <xpathFilter>' + h.site.xpathFilter + '</xpathFilter>'
+ ' <xslfilter>' + h.site.xslfilter + '</xslfilter>'
+ ' <outputSchema>' + h.site.outputSchema + '</outputSchema>'
+ ' <queryScope>' + h.site.queryScope + '</queryScope>'
Expand Down
19 changes: 3 additions & 16 deletions web/src/main/webapp/xsl/xml/harvesting/csw.xsl
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

<!-- ============================================================================================= -->

<xsl:import href="common.xsl"/>

<!-- ============================================================================================= -->
<!-- === CSW harvesting node -->
<!-- ============================================================================================= -->

<xsl:template match="*" mode="site">
<capabilitiesUrl>
<xsl:value-of select="capabUrl/value"/>
Expand All @@ -23,6 +16,9 @@
<hopCount>
<xsl:value-of select="hopCount/value"/>
</hopCount>
<xpathFilter>
<xsl:value-of select="xpathFilter/value"/>
</xpathFilter>
<xslfilter>
<xsl:value-of select="xslfilter/value"/>
</xslfilter>
Expand All @@ -34,28 +30,19 @@
</outputSchema>
</xsl:template>

<!-- ============================================================================================= -->

<xsl:template match="*" mode="options"/>

<!-- ============================================================================================= -->


<xsl:template match="*" mode="searches">

<searches>
<search>
<xsl:apply-templates select="children"/>
</search>
</searches>

</xsl:template>

<xsl:template match="children">
<xsl:copy-of select="search/children/child::*"/>
</xsl:template>


<!-- ============================================================================================= -->

</xsl:stylesheet>

0 comments on commit 68ae2e1

Please sign in to comment.