From a4e215106dc13403f5908922d870c20444661aaf Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 1 Jun 2025 08:01:26 +0000 Subject: [PATCH] Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue. --- lucene/pom.xml | 150 +++++++ .../ArcadeLuceneIndexFactoryHandler.java | 34 ++ .../lucene/ArcadeLuceneLifecycleManager.java | 46 ++ .../analyzer/OLuceneAnalyzerFactory.java | 141 +++++++ .../OLucenePerFieldAnalyzerWrapper.java | 89 ++++ .../lucene/builder/OLuceneIndexType.java | 207 +++++++++ .../engine/OLuceneCrossClassIndexEngine.java | 399 ++++++++++++++++++ .../engine/OLuceneFullTextIndexEngine.java | 304 +++++++++++++ .../lucene/engine/OLuceneIndexEngine.java | 68 +++ .../OLuceneCrossClassFunctionsFactory.java | 25 ++ .../OLuceneCrossClassSearchFunction.java | 181 ++++++++ .../functions/OLuceneFunctionsFactory.java | 27 ++ .../functions/OLuceneFunctionsUtils.java | 60 +++ .../OLuceneSearchFunctionTemplate.java | 90 ++++ .../OLuceneSearchMoreLikeThisFunction.java | 396 +++++++++++++++++ .../OLuceneSearchOnClassFunction.java | 184 ++++++++ .../OLuceneSearchOnFieldsFunction.java | 200 +++++++++ .../OLuceneSearchOnIndexFunction.java | 198 +++++++++ .../index/ArcadeLuceneFullTextIndex.java | 362 ++++++++++++++++ .../lucene/index/OLuceneFullTextIndex.java | 118 ++++++ .../lucene/query/LuceneIndexCursor.java | 113 +++++ .../lucene/query/OLuceneQueryContext.java | 138 ++++++ .../arcadedb/lucene/tx/OLuceneTxChanges.java | 52 +++ .../lucene/tx/OLuceneTxChangesAbstract.java | 74 ++++ .../lucene/tx/OLuceneTxChangesMultiRid.java | 108 +++++ .../lucene/tx/OLuceneTxChangesSingleRid.java | 92 ++++ .../com.arcadedb.database.index.OIndexFactory | 21 + ...database.sql.functions.OSQLFunctionFactory | 21 + ...atabase.sql.operator.OQueryOperatorFactory | 20 + .../com.arcadedb.index.IndexFactoryHandler | 1 + lucene/src/main/resources/plugin.json | 8 + pom.xml | 1 + 32 files changed, 3928 insertions(+) create mode 100644 lucene/pom.xml create mode 100644 lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java create mode 100644 lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory create mode 100644 lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler create mode 100644 lucene/src/main/resources/plugin.json diff --git a/lucene/pom.xml b/lucene/pom.xml new file mode 100644 index 0000000000..16d6341c39 --- /dev/null +++ b/lucene/pom.xml @@ -0,0 +1,150 @@ + + + 4.0.0 + + + com.arcadedb + arcadedb-parent + 25.6.1-SNAPSHOT + ../pom.xml + + + arcadedb-lucene + jar + ArcadeDB Lucene + Lucene full-text search engine integration for ArcadeDB. + + + + + + 10.2.1 + 0.8 + 1.20.0 + + + + + + com.arcadedb + arcadedb-engine + ${project.version} + + + + + + org.apache.lucene + lucene-core + ${lucene.version} + + + org.apache.lucene + lucene-analysis-common + ${lucene.version} + + + org.apache.lucene + lucene-queryparser + ${lucene.version} + + + org.apache.lucene + lucene-queries + ${lucene.version} + + + org.apache.lucene + lucene-misc + ${lucene.version} + + + org.apache.lucene + lucene-facet + ${lucene.version} + + + org.apache.lucene + lucene-memory + ${lucene.version} + + + org.apache.lucene + lucene-highlighter + ${lucene.version} + + + org.apache.lucene + lucene-codecs + ${lucene.version} + + + org.apache.lucene + lucene-backward-codecs + ${lucene.version} + + + org.apache.lucene + lucene-spatial-extras + ${lucene.version} + + + + + + org.locationtech.spatial4j + spatial4j + ${spatial4j.version} + + + org.locationtech.jts + jts-core + ${jts-core.version} + + + + + org.slf4j + slf4j-api + 1.7.36 + + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + true + + + + + + + + diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java new file mode 100644 index 0000000000..3edb15da20 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneIndexFactoryHandler.java @@ -0,0 +1,34 @@ +package com.arcadedb.lucene; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.index.IndexFactoryHandler; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.schema.IndexBuilder; +import com.arcadedb.schema.Type; +import com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex; +import java.util.Map; + +public class ArcadeLuceneIndexFactoryHandler implements IndexFactoryHandler { + + @Override + public IndexInternal create(IndexBuilder builder) { + DatabaseInternal database = builder.getDatabase(); + String indexName = builder.getIndexName(); + boolean unique = builder.isUnique(); + // Schema.INDEX_TYPE indexType = builder.getIndexType(); // This is implicitly "FULL_TEXT" for this handler + Type[] keyTypes = builder.getKeyTypes(); + Map properties = builder.getProperties(); + String filePath = builder.getFilePath(); + + + String analyzerClassName = org.apache.lucene.analysis.standard.StandardAnalyzer.class.getName(); + if (properties != null && properties.containsKey("analyzer")) { + analyzerClassName = properties.get("analyzer"); + } + + // The actual ArcadeLuceneFullTextIndex will need to be instantiated here. + // Its constructor will need to be defined to accept these parameters. + // Adding filePath and keyTypes to the constructor call. + return new com.arcadedb.lucene.index.ArcadeLuceneFullTextIndex(database, indexName, unique, analyzerClassName, filePath, keyTypes); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java new file mode 100644 index 0000000000..35e0947279 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/ArcadeLuceneLifecycleManager.java @@ -0,0 +1,46 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +// This class might serve as the main plugin class listed in plugin.json for initialization purposes, +// or handle lifecycle events if ArcadeDB's plugin API expects a specific class for that. +// For now, it's minimal. +public class ArcadeLuceneLifecycleManager { + private static final Logger logger = LoggerFactory.getLogger(ArcadeLuceneLifecycleManager.class); + + // This constant might be better placed in ArcadeLuceneIndexFactoryHandler or a shared constants class. + public static final String LUCENE_ALGORITHM = "LUCENE"; + + public ArcadeLuceneLifecycleManager() { + this(false); + } + + public ArcadeLuceneLifecycleManager(boolean manual) { + if (!manual) { + logger.info("ArcadeLuceneLifecycleManager initialized (manual: {}).", manual); + // Further initialization or listener registration logic specific to ArcadeDB's plugin system + // would go here if this class is the entry point. + } + } + + // Any necessary lifecycle methods (e.g., from a specific ArcadeDB plugin interface) would be here. + // For now, assuming it does not need to implement DatabaseListener directly. + // Drop logic for indexes of this type should be handled by the Index.drop() method. +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java new file mode 100644 index 0000000000..3641f43536 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLuceneAnalyzerFactory.java @@ -0,0 +1,141 @@ +package com.arcadedb.lucene.analyzer; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.index.OIndexException; +import com.arcadedb.database.metadata.schema.OType; +import com.arcadedb.database.record.impl.ODocument; +import java.lang.reflect.Constructor; +import java.util.Collection; +import java.util.Locale; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +/** Created by frank on 30/10/2015. */ +public class OLuceneAnalyzerFactory { + private static final OLogger logger = OLogManager.instance().logger(OLuceneAnalyzerFactory.class); + + public Analyzer createAnalyzer( + final OIndexDefinition index, final AnalyzerKind kind, final ODocument metadata) { + if (index == null) { + throw new IllegalArgumentException("Index must not be null"); + } + if (kind == null) { + throw new IllegalArgumentException("Analyzer kind must not be null"); + } + if (metadata == null) { + throw new IllegalArgumentException("Metadata must not be null"); + } + final String defaultAnalyzerFQN = metadata.field("default"); + final String prefix = index.getClassName() + "."; + + final OLucenePerFieldAnalyzerWrapper analyzer = + geLucenePerFieldPresetAnalyzerWrapperForAllFields(defaultAnalyzerFQN); + setDefaultAnalyzerForRequestedKind(index, kind, metadata, prefix, analyzer); + setSpecializedAnalyzersForEachField(index, kind, metadata, prefix, analyzer); + return analyzer; + } + + private OLucenePerFieldAnalyzerWrapper geLucenePerFieldPresetAnalyzerWrapperForAllFields( + final String defaultAnalyzerFQN) { + if (defaultAnalyzerFQN == null) { + return new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + } else { + return new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN)); + } + } + + private void setDefaultAnalyzerForRequestedKind( + final OIndexDefinition index, + final AnalyzerKind kind, + final ODocument metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { + final String specializedAnalyzerFQN = metadata.field(kind.toString()); + if (specializedAnalyzerFQN != null) { + for (final String field : index.getFields()) { + analyzer.add(field, buildAnalyzer(specializedAnalyzerFQN)); + analyzer.add(prefix + field, buildAnalyzer(specializedAnalyzerFQN)); + } + } + } + + private void setSpecializedAnalyzersForEachField( + final OIndexDefinition index, + final AnalyzerKind kind, + final ODocument metadata, + final String prefix, + final OLucenePerFieldAnalyzerWrapper analyzer) { + for (final String field : index.getFields()) { + final String analyzerName = field + "_" + kind.toString(); + final String analyzerStopwords = analyzerName + "_stopwords"; + + if (metadata.containsField(analyzerName) && metadata.containsField(analyzerStopwords)) { + final Collection stopWords = metadata.field(analyzerStopwords, OType.EMBEDDEDLIST); + analyzer.add(field, buildAnalyzer(metadata.field(analyzerName), stopWords)); + analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName), stopWords)); + } else if (metadata.containsField(analyzerName)) { + analyzer.add(field, buildAnalyzer(metadata.field(analyzerName))); + analyzer.add(prefix + field, buildAnalyzer(metadata.field(analyzerName))); + } + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getConstructor(); + return (Analyzer) constructor.newInstance(); + } catch (final ClassNotFoundException e) { + throw OException.wrapException( + new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + } catch (final NoSuchMethodException e) { + Class classAnalyzer = null; + try { + classAnalyzer = Class.forName(analyzerFQN); + return (Analyzer) classAnalyzer.newInstance(); + } catch (Exception e1) { + logger.error("Exception is suppressed, original exception is ", e); + //noinspection ThrowInsideCatchBlockWhichIgnoresCaughtException + throw OException.wrapException( + new OIndexException("Couldn't instantiate analyzer: public constructor not found"), + e1); + } + } catch (Exception e) { + logger.error( + "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + private Analyzer buildAnalyzer(final String analyzerFQN, final Collection stopwords) { + try { + final Class classAnalyzer = Class.forName(analyzerFQN); + final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class); + return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true)); + } catch (final ClassNotFoundException e) { + throw OException.wrapException( + new OIndexException("Analyzer: " + analyzerFQN + " not found"), e); + } catch (final NoSuchMethodException e) { + throw OException.wrapException( + new OIndexException("Couldn't instantiate analyzer: public constructor not found"), e); + } catch (final Exception e) { + logger.error( + "Error on getting analyzer for Lucene index (continuing with StandardAnalyzer)", e); + return new StandardAnalyzer(); + } + } + + public enum AnalyzerKind { + INDEX, + QUERY; + + @Override + public String toString() { + return name().toLowerCase(Locale.ENGLISH); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java new file mode 100644 index 0000000000..53237815ff --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/analyzer/OLucenePerFieldAnalyzerWrapper.java @@ -0,0 +1,89 @@ +package com.arcadedb.lucene.analyzer; + +import static com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract.RID; + +import com.arcadedb.lucene.builder.OLuceneIndexType; +import java.util.HashMap; +import java.util.Map; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; +import org.apache.lucene.analysis.core.KeywordAnalyzer; + +/** + * Created by frank on 10/12/15. + * + *

Doesn't allow to wrap components or readers. Thread local resources can be + delegated to the + * delegate analyzer, but not allocated on this analyzer (limit memory consumption). Uses a per + * field reuse strategy. + */ +public class OLucenePerFieldAnalyzerWrapper extends DelegatingAnalyzerWrapper { + private final Analyzer defaultDelegateAnalyzer; + private final Map fieldAnalyzers; + + /** + * Constructs with default analyzer. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + */ + public OLucenePerFieldAnalyzerWrapper(final Analyzer defaultAnalyzer) { + this(defaultAnalyzer, new HashMap<>()); + } + + /** + * Constructs with default analyzer and a map of analyzers to use for specific fields. + * + * @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use + * the one provided here. + * @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields + */ + public OLucenePerFieldAnalyzerWrapper( + final Analyzer defaultAnalyzer, final Map fieldAnalyzers) { + super(PER_FIELD_REUSE_STRATEGY); + this.defaultDelegateAnalyzer = defaultAnalyzer; + this.fieldAnalyzers = new HashMap<>(); + + this.fieldAnalyzers.putAll(fieldAnalyzers); + + this.fieldAnalyzers.put(RID, new KeywordAnalyzer()); + this.fieldAnalyzers.put(OLuceneIndexType.RID_HASH, new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLASS", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_CLUSTER", new KeywordAnalyzer()); + this.fieldAnalyzers.put("_JSON", new KeywordAnalyzer()); + } + + @Override + protected Analyzer getWrappedAnalyzer(final String fieldName) { + final Analyzer analyzer = fieldAnalyzers.get(fieldName); + return (analyzer != null) ? analyzer : defaultDelegateAnalyzer; + } + + @Override + public String toString() { + return "PerFieldAnalyzerWrapper(" + + fieldAnalyzers + + ", default=" + + defaultDelegateAnalyzer + + ")"; + } + + public OLucenePerFieldAnalyzerWrapper add(final String field, final Analyzer analyzer) { + fieldAnalyzers.put(field, analyzer); + return this; + } + + public OLucenePerFieldAnalyzerWrapper add(final OLucenePerFieldAnalyzerWrapper analyzer) { + fieldAnalyzers.putAll(analyzer.getAnalyzers()); + return this; + } + + public OLucenePerFieldAnalyzerWrapper remove(final String field) { + fieldAnalyzers.remove(field); + return this; + } + + protected Map getAnalyzers() { + return fieldAnalyzers; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java b/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java new file mode 100644 index 0000000000..8459173f22 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/builder/OLuceneIndexType.java @@ -0,0 +1,207 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.builder; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.engine.OLuceneIndexEngineAbstract; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.index.OCompositeKey; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.record.impl.ODocument; +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +/** Created by enricorisa on 21/03/14. */ +public class OLuceneIndexType { + public static final String RID_HASH = "_RID_HASH"; + + public static Field createField( + final String fieldName, final Object value, final Field.Store store /*,Field.Index index*/) { + // metadata fields: _CLASS, _CLUSTER + if (fieldName.startsWith("_CLASS") || fieldName.startsWith("_CLUSTER")) { + return new StringField(fieldName, value.toString(), store); + } + return new TextField(fieldName, value.toString(), Field.Store.YES); + } + + public static String extractId(Document doc) { + String value = doc.get(RID_HASH); + if (value != null) { + int pos = value.indexOf("|"); + if (pos > 0) { + return value.substring(0, pos); + } else { + return value; + } + } else { + return null; + } + } + + public static Field createIdField(final OIdentifiable id, final Object key) { + return new StringField(RID_HASH, genValueId(id, key), Field.Store.YES); + } + + public static Field createOldIdField(final OIdentifiable id) { + return new StringField( + OLuceneIndexEngineAbstract.RID, id.getIdentity().toString(), Field.Store.YES); + } + + public static String genValueId(final OIdentifiable id, final Object key) { + String value = id.getIdentity().toString() + "|"; + value += hashKey(key); + return value; + } + + public static List createFields( + String fieldName, Object value, Field.Store store, Boolean sort) { + List fields = new ArrayList<>(); + if (value instanceof Number) { + Number number = (Number) value; + if (value instanceof Long) { + fields.add(new NumericDocValuesField(fieldName, number.longValue())); + fields.add(new LongPoint(fieldName, number.longValue())); + return fields; + } else if (value instanceof Float) { + fields.add(new FloatDocValuesField(fieldName, number.floatValue())); + fields.add(new FloatPoint(fieldName, number.floatValue())); + return fields; + } else if (value instanceof Double) { + fields.add(new DoubleDocValuesField(fieldName, number.doubleValue())); + fields.add(new DoublePoint(fieldName, number.doubleValue())); + return fields; + } + fields.add(new NumericDocValuesField(fieldName, number.longValue())); + fields.add(new IntPoint(fieldName, number.intValue())); + return fields; + } else if (value instanceof Date) { + Date date = (Date) value; + fields.add(new NumericDocValuesField(fieldName, date.getTime())); + fields.add(new LongPoint(fieldName, date.getTime())); + return fields; + } + if (Boolean.TRUE.equals(sort)) { + fields.add(new SortedDocValuesField(fieldName, new BytesRef(value.toString()))); + } + fields.add(new TextField(fieldName, value.toString(), Field.Store.YES)); + return fields; + } + + public static Query createExactQuery(OIndexDefinition index, Object key) { + Query query = null; + if (key instanceof String) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + if (index.getFields().size() > 0) { + for (String idx : index.getFields()) { + queryBuilder.add( + new TermQuery(new Term(idx, key.toString())), BooleanClause.Occur.SHOULD); + } + } else { + queryBuilder.add( + new TermQuery(new Term(OLuceneIndexEngineAbstract.KEY, key.toString())), + BooleanClause.Occur.SHOULD); + } + query = queryBuilder.build(); + } else if (key instanceof OCompositeKey) { + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + int i = 0; + OCompositeKey keys = (OCompositeKey) key; + for (String idx : index.getFields()) { + String val = (String) keys.getKeys().get(i); + queryBuilder.add(new TermQuery(new Term(idx, val)), BooleanClause.Occur.MUST); + i++; + } + query = queryBuilder.build(); + } + return query; + } + + public static Query createQueryId(OIdentifiable value) { + return new TermQuery(new Term(OLuceneIndexEngineAbstract.RID, value.getIdentity().toString())); + } + + public static Query createQueryId(OIdentifiable value, Object key) { + return new TermQuery(new Term(RID_HASH, genValueId(value, key))); + } + + public static String hashKey(Object key) { + try { + String keyString; + if (key instanceof ODocument) { + keyString = ((ODocument) key).toJSON(); + } else { + keyString = key.toString(); + } + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + byte[] bytes = sha256.digest(keyString.getBytes("UTF-8")); + return Base64.getEncoder().encodeToString(bytes); + } catch (NoSuchAlgorithmException e) { + throw OException.wrapException(new OLuceneIndexException("fail to find sha algorithm"), e); + + } catch (UnsupportedEncodingException e) { + throw OException.wrapException(new OLuceneIndexException("fail to find utf-8 encoding"), e); + } + } + + public static Query createDeleteQuery( + OIdentifiable value, List fields, Object key, ODocument metadata) { + + // TODO Implementation of Composite keys with Collection + final BooleanQuery.Builder filter = new BooleanQuery.Builder(); + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + // TODO: Condition on Id and field key only for backward compatibility + if (value != null) { + builder.add(createQueryId(value), BooleanClause.Occur.MUST); + } + String field = fields.iterator().next(); + builder.add( + new TermQuery(new Term(field, key.toString().toLowerCase(Locale.ENGLISH))), + BooleanClause.Occur.MUST); + + filter.add(builder.build(), BooleanClause.Occur.SHOULD); + if (value != null) { + filter.add(createQueryId(value, key), BooleanClause.Occur.SHOULD); + } + + return filter.build(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java new file mode 100644 index 0000000000..31a8811ddd --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneCrossClassIndexEngine.java @@ -0,0 +1,399 @@ +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.OLuceneIndexFactory.LUCENE_ALGORITHM; + +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.common.util.ORawPair; +import com.arcadedb.lucene.analyzer.OLucenePerFieldAnalyzerWrapper; +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.parser.OLuceneMultiFieldQueryParser; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.config.IndexEngineData; +import com.arcadedb.database.ODatabaseRecordThreadLocal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.index.OIndexDefinition; +import com.arcadedb.database.index.OIndexKeyUpdater; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.index.engine.IndexEngineValidator; +import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; +import com.arcadedb.database.metadata.schema.OClass; +import com.arcadedb.database.metadata.schema.OType; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.storage.OStorage; +import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** + * Created by frank on 03/11/2016. + */ +public class OLuceneCrossClassIndexEngine implements OLuceneIndexEngine { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneCrossClassIndexEngine.class); + private final OStorage storage; + private final String indexName; + private final int indexId; + + public OLuceneCrossClassIndexEngine(int indexId, OStorage storage, String indexName) { + this.indexId = indexId; + + this.storage = storage; + this.indexName = indexName; + } + + @Override + public void init(OIndexMetadata metadata) {} + + @Override + public void flush() {} + + @Override + public int getId() { + return indexId; + } + + @Override + public void create(OAtomicOperation atomicOperation, IndexEngineData data) throws IOException {} + + @Override + public void delete(OAtomicOperation atomicOperation) {} + + @Override + public void load(IndexEngineData data) {} + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key) { + return false; + } + + @Override + public void clear(OAtomicOperation atomicOperation) {} + + @Override + public void close() {} + + @Override + public Object get(Object key) { + + final OLuceneKeyAndMetadata keyAndMeta = (OLuceneKeyAndMetadata) key; + final ODocument metadata = keyAndMeta.metadata; + final List excludes = + Optional.ofNullable(metadata.>getProperty("excludes")) + .orElse(Collections.emptyList()); + final List includes = + Optional.ofNullable(metadata.>getProperty("includes")) + .orElse(Collections.emptyList()); + + final Collection indexes = + ODatabaseRecordThreadLocal.instance() + .get() + .getMetadata() + .getIndexManager() + .getIndexes() + .stream() + .filter(i -> !excludes.contains(i.getName())) + .filter(i -> includes.isEmpty() || includes.contains(i.getName())) + .collect(Collectors.toList()); + + final OLucenePerFieldAnalyzerWrapper globalAnalyzer = + new OLucenePerFieldAnalyzerWrapper(new StandardAnalyzer()); + + final List globalFields = new ArrayList(); + + final List globalReaders = new ArrayList(); + final Map types = new HashMap<>(); + + try { + for (OIndex index : indexes) { + + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_ALGORITHM) + && index.getType().equalsIgnoreCase(OClass.INDEX_TYPE.FULLTEXT.toString())) { + + final OIndexDefinition definition = index.getDefinition(); + final String className = definition.getClassName(); + + String[] indexFields = + definition.getFields().toArray(new String[definition.getFields().size()]); + + for (int i = 0; i < indexFields.length; i++) { + String field = indexFields[i]; + + types.put(className + "." + field, definition.getTypes()[i]); + globalFields.add(className + "." + field); + } + + OLuceneFullTextIndex fullTextIndex = (OLuceneFullTextIndex) index.getInternal(); + + globalAnalyzer.add((OLucenePerFieldAnalyzerWrapper) fullTextIndex.queryAnalyzer()); + + globalReaders.add(fullTextIndex.searcher().getIndexReader()); + } + } + + IndexReader indexReader = new MultiReader(globalReaders.toArray(new IndexReader[] {})); + + IndexSearcher searcher = new IndexSearcher(indexReader); + + Map boost = + Optional.ofNullable(metadata.>getProperty("boost")) + .orElse(new HashMap<>()); + + OLuceneMultiFieldQueryParser p = + new OLuceneMultiFieldQueryParser( + types, globalFields.toArray(new String[] {}), globalAnalyzer, boost); + + p.setAllowLeadingWildcard( + Optional.ofNullable(metadata.getProperty("allowLeadingWildcard")).orElse(false)); + + p.setSplitOnWhitespace( + Optional.ofNullable(metadata.getProperty("splitOnWhitespace")).orElse(true)); + + Object params = keyAndMeta.key.getKeys().get(0); + + Query query = p.parse(params.toString()); + + final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + + OLuceneQueryContext ctx = new OLuceneQueryContext(null, searcher, query, fields); + return new OLuceneResultSet(this, ctx, metadata); + } catch (IOException e) { + logger.error("unable to create multi-reader", e); + } catch (ParseException e) { + logger.error("unable to parse query", e); + } + + return null; + } + + @Override + public void put(OAtomicOperation atomicOperation, Object key, Object value) {} + + @Override + public void put(OAtomicOperation atomicOperation, Object key, ORID value) {} + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { + return false; + } + + @Override + public void update( + OAtomicOperation atomicOperation, Object key, OIndexKeyUpdater updater) {} + + @Override + public boolean validatedPut( + OAtomicOperation atomicOperation, + Object key, + ORID value, + IndexEngineValidator validator) { + return false; + } + + @Override + public Stream> iterateEntriesBetween( + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMajor( + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> iterateEntriesMinor( + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return Stream.empty(); + } + + @Override + public Stream> stream(IndexEngineValuesTransformer valuesTransformer) { + return Stream.empty(); + } + + @Override + public Stream> descStream(IndexEngineValuesTransformer valuesTransformer) { + return Stream.empty(); + } + + @Override + public Stream keyStream() { + return Stream.empty(); + } + + @Override + public long size(IndexEngineValuesTransformer transformer) { + return 0; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public String getName() { + return indexName; + } + + @Override + public boolean acquireAtomicExclusiveLock(Object key) { + return false; + } + + @Override + public String getIndexNameByKey(Object key) { + return null; + } + + @Override + public String indexName() { + return indexName; + } + + @Override + public void onRecordAddedToResultSet( + OLuceneQueryContext queryContext, + OContextualRecordId recordId, + Document ret, + final ScoreDoc score) { + + recordId.setContext( + new HashMap() { + { + Map frag = queryContext.getFragments(); + + frag.entrySet().stream() + .forEach( + f -> { + TextFragment[] fragments = f.getValue(); + StringBuilder hlField = new StringBuilder(); + for (int j = 0; j < fragments.length; j++) { + if ((fragments[j] != null) && (fragments[j].getScore() > 0)) { + hlField.append(fragments[j].toString()); + } + } + put("$" + f.getKey() + "_hl", hlField.toString()); + }); + + put("$score", score.score); + } + }); + } + + @Override + public Document buildDocument(Object key, OIdentifiable value) { + return null; + } + + @Override + public Query buildQuery(Object query) { + return null; + } + + @Override + public Analyzer indexAnalyzer() { + return null; + } + + @Override + public Analyzer queryAnalyzer() { + return null; + } + + @Override + public boolean remove(Object key, OIdentifiable value) { + return false; + } + + @Override + public IndexSearcher searcher() { + return null; + } + + @Override + public void release(IndexSearcher searcher) {} + + @Override + public Set getInTx(Object key, OLuceneTxChanges changes) { + return null; + } + + @Override + public long sizeInTx(OLuceneTxChanges changes) { + return 0; + } + + @Override + public OLuceneTxChanges buildTxChanges() throws IOException { + return null; + } + + @Override + public Query deleteQuery(Object key, OIdentifiable value) { + return null; + } + + @Override + public boolean isCollectionIndex() { + return false; + } + + @Override + public void freeze(boolean throwException) {} + + @Override + public void release() {} + + @Override + public void updateUniqueIndexVersion(Object key) {} + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; + } + + @Override + public boolean remove(Object key) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java new file mode 100644 index 0000000000..646ac57992 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneFullTextIndexEngine.java @@ -0,0 +1,304 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.engine; + +import static com.arcadedb.lucene.builder.OLuceneQueryBuilder.EMPTY_METADATA; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.common.util.ORawPair; +import com.arcadedb.lucene.builder.OLuceneDocumentBuilder; +import com.arcadedb.lucene.builder.OLuceneIndexType; +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.LuceneIndexTransformer; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OCompositeKey; +import com.arcadedb.database.index.OIndexEngineException; +import com.arcadedb.database.index.OIndexKeyUpdater; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.index.engine.IndexEngineValidator; +import com.arcadedb.database.index.engine.IndexEngineValuesTransformer; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.parser.ParseException; +import com.arcadedb.database.storage.OStorage; +import com.arcadedb.database.storage.impl.local.paginated.atomicoperations.OAtomicOperation; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.store.Directory; + +public class OLuceneFullTextIndexEngine extends OLuceneIndexEngineAbstract { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneFullTextIndexEngine.class); + + private final OLuceneDocumentBuilder builder; + private OLuceneQueryBuilder queryBuilder; + private final AtomicLong bonsayFileId = new AtomicLong(0); + + public OLuceneFullTextIndexEngine(OStorage storage, String idxName, int id) { + super(id, storage, idxName); + builder = new OLuceneDocumentBuilder(); + } + + @Override + public void init(OIndexMetadata im) { + super.init(im); + queryBuilder = new OLuceneQueryBuilder(im.getMetadata()); + } + + @Override + public IndexWriter createIndexWriter(Directory directory) throws IOException { + + OLuceneIndexWriterFactory fc = new OLuceneIndexWriterFactory(); + + logger.debug("Creating Lucene index in '%s'...", directory); + + return fc.createIndexWriter(directory, metadata, indexAnalyzer()); + } + + @Override + public void onRecordAddedToResultSet( + final OLuceneQueryContext queryContext, + final OContextualRecordId recordId, + final Document ret, + final ScoreDoc score) { + HashMap data = new HashMap(); + + final Map frag = queryContext.getFragments(); + frag.forEach( + (key, fragments) -> { + final StringBuilder hlField = new StringBuilder(); + for (final TextFragment fragment : fragments) { + if ((fragment != null) && (fragment.getScore() > 0)) { + hlField.append(fragment.toString()); + } + } + data.put("$" + key + "_hl", hlField.toString()); + }); + data.put("$score", score.score); + + recordId.setContext(data); + } + + @Override + public boolean remove(final OAtomicOperation atomicOperation, final Object key) { + return remove(key); + } + + @Override + public boolean remove(OAtomicOperation atomicOperation, Object key, ORID value) { + return remove(key, value); + } + + @Override + public Object get(final Object key) { + return getInTx(key, null); + } + + @Override + public void update( + final OAtomicOperation atomicOperation, + final Object key, + final OIndexKeyUpdater updater) { + put(atomicOperation, key, updater.update(null, bonsayFileId).getValue()); + } + + @Override + public void put(final OAtomicOperation atomicOperation, final Object key, final Object value) { + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, (OIdentifiable) value); + addDocument(doc); + } + + @Override + public void put(OAtomicOperation atomicOperation, Object key, ORID value) { + updateLastAccess(); + openIfClosed(); + final Document doc = buildDocument(key, value); + addDocument(doc); + } + + @Override + public boolean validatedPut( + OAtomicOperation atomicOperation, + Object key, + ORID value, + IndexEngineValidator validator) { + throw new UnsupportedOperationException( + "Validated put is not supported by OLuceneFullTextIndexEngine"); + } + + @Override + public Stream> iterateEntriesBetween( + Object rangeFrom, + boolean fromInclusive, + Object rangeTo, + boolean toInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return LuceneIndexTransformer.transformToStream((OLuceneResultSet) get(rangeFrom), rangeFrom); + } + + private Set getResults( + final Query query, + final OCommandContext context, + final OLuceneTxChanges changes, + final ODocument metadata) { + // sort + final List fields = OLuceneIndexEngineUtils.buildSortFields(metadata); + final IndexSearcher luceneSearcher = searcher(); + final OLuceneQueryContext queryContext = + new OLuceneQueryContext(context, luceneSearcher, query, fields).withChanges(changes); + return new OLuceneResultSet(this, queryContext, metadata); + } + + @Override + public Stream> iterateEntriesMajor( + Object fromKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return null; + } + + @Override + public Stream> iterateEntriesMinor( + Object toKey, + boolean isInclusive, + boolean ascSortOrder, + IndexEngineValuesTransformer transformer) { + return null; + } + + @Override + public boolean hasRangeQuerySupport() { + return false; + } + + @Override + public void updateUniqueIndexVersion(Object key) { + // not implemented + } + + @Override + public int getUniqueIndexVersion(Object key) { + return 0; // not implemented + } + + @Override + public Document buildDocument(Object key, OIdentifiable value) { + if (indexDefinition.isAutomatic()) { + // builder.newBuild(index, key, value); + + return builder.build(indexDefinition, key, value, collectionFields, metadata); + } else { + return putInManualindex(key, value); + } + } + + private static Document putInManualindex(Object key, OIdentifiable oIdentifiable) { + Document doc = new Document(); + doc.add(OLuceneIndexType.createOldIdField(oIdentifiable)); + doc.add(OLuceneIndexType.createIdField(oIdentifiable, key)); + + if (key instanceof OCompositeKey) { + + List keys = ((OCompositeKey) key).getKeys(); + + int k = 0; + for (Object o : keys) { + doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); + k++; + } + } else if (key instanceof Collection) { + @SuppressWarnings("unchecked") + Collection keys = (Collection) key; + + int k = 0; + for (Object o : keys) { + doc.add(OLuceneIndexType.createField("k" + k, o, Field.Store.YES)); + k++; + } + } else { + doc.add(OLuceneIndexType.createField("k0", key, Field.Store.NO)); + } + return doc; + } + + @Override + public Query buildQuery(final Object maybeQuery) { + try { + if (maybeQuery instanceof String) { + return queryBuilder.query(indexDefinition, maybeQuery, EMPTY_METADATA, queryAnalyzer()); + } else { + OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) maybeQuery; + return queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + } + } catch (final ParseException e) { + throw OException.wrapException(new OIndexEngineException("Error parsing query"), e); + } + } + + @Override + public Set getInTx(Object key, OLuceneTxChanges changes) { + updateLastAccess(); + openIfClosed(); + try { + if (key instanceof OLuceneKeyAndMetadata) { + OLuceneKeyAndMetadata q = (OLuceneKeyAndMetadata) key; + Query query = queryBuilder.query(indexDefinition, q.key, q.metadata, queryAnalyzer()); + + OCommandContext commandContext = q.key.getContext(); + return getResults(query, commandContext, changes, q.metadata); + + } else { + Query query = queryBuilder.query(indexDefinition, key, EMPTY_METADATA, queryAnalyzer()); + + OCommandContext commandContext = null; + if (key instanceof OLuceneCompositeKey) { + commandContext = ((OLuceneCompositeKey) key).getContext(); + } + return getResults(query, commandContext, changes, EMPTY_METADATA); + } + } catch (ParseException e) { + throw OException.wrapException(new OIndexEngineException("Error parsing lucene query"), e); + } + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java new file mode 100644 index 0000000000..7336b359b9 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/engine/OLuceneIndexEngine.java @@ -0,0 +1,68 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.engine; + +import com.arcadedb.lucene.query.OLuceneQueryContext; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.OContextualRecordId; +import com.arcadedb.database.index.engine.OIndexEngine; +import com.arcadedb.database.storage.impl.local.OFreezableStorageComponent; +import java.io.IOException; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; + +/** Created by Enrico Risa on 04/09/15. */ +public interface OLuceneIndexEngine extends OIndexEngine, OFreezableStorageComponent { + + String indexName(); + + void onRecordAddedToResultSet( + OLuceneQueryContext queryContext, OContextualRecordId recordId, Document ret, ScoreDoc score); + + Document buildDocument(Object key, OIdentifiable value); + + Query buildQuery(Object query); + + Analyzer indexAnalyzer(); + + Analyzer queryAnalyzer(); + + boolean remove(Object key, OIdentifiable value); + + boolean remove(Object key); + + IndexSearcher searcher(); + + void release(IndexSearcher searcher); + + Set getInTx(Object key, OLuceneTxChanges changes); + + long sizeInTx(OLuceneTxChanges changes); + + OLuceneTxChanges buildTxChanges() throws IOException; + + Query deleteQuery(Object key, OIdentifiable value); + + boolean isCollectionIndex(); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java new file mode 100644 index 0000000000..cd2ac218fb --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassFunctionsFactory.java @@ -0,0 +1,25 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; + +public class OLuceneCrossClassFunctionsFactory extends OSQLFunctionFactoryTemplate { + + public OLuceneCrossClassFunctionsFactory() { + register(new OLuceneCrossClassSearchFunction()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java new file mode 100644 index 0000000000..484616e332 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneCrossClassSearchFunction.java @@ -0,0 +1,181 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS; + +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * This function uses the CrossClassIndex to search documents across all the Lucene indexes defined in a database + *

+ * Created by frank on 19/02/2016. + */ +public class OLuceneCrossClassSearchFunction extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneCrossClassSearchFunction.class); + + public static final String NAME = "SEARCH_CROSS"; + + public OLuceneCrossClassSearchFunction() { + super(NAME, 1, 2); + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); + + OExpression expression = args[0]; + String query = (String) expression.execute((OResult) null, ctx); + + if (fullTextIndex != null) { + + ODocument metadata = getMetadata(args); + List luceneResultSet; + try (Stream rids = + fullTextIndex + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + return luceneResultSet; + } + return Collections.emptySet(); + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return 1L; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return true; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + protected OLuceneFullTextIndex searchForIndex(OCommandContext ctx) { + + Collection indexes = + ((ODatabaseDocumentInternal) ctx.getDatabase()) + .getMetadata() + .getIndexManager() + .getIndexes(); + for (OIndex index : indexes) { + if (index.getInternal() instanceof OLuceneFullTextIndex) { + if (index.getAlgorithm().equalsIgnoreCase(LUCENE_CROSS_CLASS)) { + return (OLuceneFullTextIndex) index; + } + } + } + return null; + } + + private ODocument getMetadata(OExpression[] args) { + if (args.length == 2) { + return new ODocument().fromJSON(args[1].toString()); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable currentRecord, + Object currentResult, + Object[] params, + OCommandContext ctx) { + + OLuceneFullTextIndex fullTextIndex = searchForIndex(ctx); + + String query = (String) params[0]; + + if (fullTextIndex != null) { + + ODocument metadata = getMetadata(params); + + Collection luceneResultSet = + fullTextIndex.get( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata)); + + return luceneResultSet; + } + return Collections.emptySet(); + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 2) { + return new ODocument().fromMap((Map) params[1]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + logger.debug("syntax"); + return "SEARCH_CROSS('', {metadata})"; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java new file mode 100644 index 0000000000..2251ba9e31 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsFactory.java @@ -0,0 +1,27 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.arcadedb.lucene.functions; + +import com.arcadedb.database.sql.functions.OSQLFunctionFactoryTemplate; + +public class OLuceneFunctionsFactory extends OSQLFunctionFactoryTemplate { + public OLuceneFunctionsFactory() { + register(new OLuceneSearchOnIndexFunction()); + register(new OLuceneSearchOnFieldsFunction()); + register(new OLuceneSearchOnClassFunction()); + register(new OLuceneSearchMoreLikeThisFunction()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java new file mode 100644 index 0000000000..f7d2c33646 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneFunctionsUtils.java @@ -0,0 +1,60 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.parser.OExpression; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 13/02/2017. */ +public class OLuceneFunctionsUtils { + public static final String MEMORY_INDEX = "_memoryIndex"; + + protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) { + final String indexName = (String) args[0].execute((OResult) null, ctx); + return getLuceneFullTextIndex(ctx, indexName); + } + + protected static OLuceneFullTextIndex getLuceneFullTextIndex( + final OCommandContext ctx, final String indexName) { + final ODatabaseDocumentInternal documentDatabase = + (ODatabaseDocumentInternal) ctx.getDatabase(); + documentDatabase.activateOnCurrentThread(); + final OMetadataInternal metadata = documentDatabase.getMetadata(); + + final OLuceneFullTextIndex index = + (OLuceneFullTextIndex) + metadata.getIndexManagerInternal().getIndex(documentDatabase, indexName); + if (!(index instanceof OLuceneFullTextIndex)) { + throw new IllegalArgumentException("Not a valid Lucene index:: " + indexName); + } + return index; + } + + public static MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + memoryIndex.reset(); + return memoryIndex; + } + + public static String doubleEscape(final String s) { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); ++i) { + final char c = s.charAt(i); + if (c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 + || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 + || c == 124 || c == 38 || c == 47) { + sb.append('\\'); + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java new file mode 100644 index 0000000000..774e252023 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchFunctionTemplate.java @@ -0,0 +1,90 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.collections.OLuceneResultSet; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import java.util.Map; + +/** Created by frank on 25/05/2017. */ +public abstract class OLuceneSearchFunctionTemplate extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + + public OLuceneSearchFunctionTemplate(String iName, int iMinParams, int iMaxParams) { + super(iName, iMinParams, iMaxParams); + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return allowsIndexedExecution(target, operator, rightValue, ctx, args); + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + return index != null; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + Iterable a = searchFromTarget(target, operator, rightValue, ctx, args); + if (a instanceof OLuceneResultSet) { + return ((OLuceneResultSet) a).size(); + } + long count = 0; + for (Object o : a) { + count++; + } + + return count; + } + + protected ODocument getMetadata(OExpression metadata, OCommandContext ctx) { + final Object md = metadata.execute((OResult) null, ctx); + if (md instanceof ODocument) { + return (ODocument) md; + } else if (md instanceof Map) { + return new ODocument().fromMap((Map) md); + } else if (md instanceof String) { + return new ODocument().fromJSON((String) md); + } else { + return new ODocument().fromJSON(metadata.toString()); + } + } + + protected abstract OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args); +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java new file mode 100644 index 0000000000..813ee9f0e1 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchMoreLikeThisFunction.java @@ -0,0 +1,396 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.io.OIOException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseSession; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.id.ORecordId; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.ORecord; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.functions.OIndexableSQLFunction; +import com.arcadedb.database.sql.functions.OSQLFunctionAbstract; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery.Builder; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchMoreLikeThisFunction extends OSQLFunctionAbstract + implements OIndexableSQLFunction { + + private static final OLogger logger = + OLogManager.instance().logger(OLuceneSearchMoreLikeThisFunction.class); + + public static final String NAME = "search_more"; + + public OLuceneSearchMoreLikeThisFunction() { + super(OLuceneSearchMoreLikeThisFunction.NAME, 1, 2); + } + + @Override + public String getName() { + return OLuceneSearchMoreLikeThisFunction.NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + // TODO: slow implementation can be made faster + if (!(iCurrentRecord instanceof ODocument)) { + return false; + } + String className = ((ODocument) iCurrentRecord).getClassName(); + OLuceneFullTextIndex index = this.searchForIndex(ctx, className); + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); + + ODocument metadata = new ODocument((Map) params[1]); + + List ridsAsString = parseRidsObj(ctx, params[0]); + + List others = + ridsAsString.stream() + .map( + rid -> { + ORecordId recordId = new ORecordId(); + + recordId.fromString(rid); + return recordId; + }) + .map(id -> id.getRecord()) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); + + Builder queryBuilder = new Builder(); + + excludeOtherFromResults(ridsAsString, queryBuilder); + + ODatabaseSession contest = ctx.getDatabase(); + addLikeQueries(others, mlt, queryBuilder, contest); + + Query mltQuery = queryBuilder.build(); + + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), + metadata))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet.contains(iCurrentRecord); + } + + @Override + public String getSyntax() { + return "SEARCH_MORE( [rids], [ metdatada {} ] )"; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + if (index == null) return Collections.emptySet(); + + IndexSearcher searcher = index.searcher(); + + OExpression expression = args[0]; + + ODocument metadata = parseMetadata(args); + + List ridsAsString = parseRids(ctx, expression); + + List others = + ridsAsString.stream() + .map( + rid -> { + ORecordId recordId = new ORecordId(); + + recordId.fromString(rid); + return recordId; + }) + .map(id -> id.getRecord()) + .collect(Collectors.toList()); + + MoreLikeThis mlt = buildMoreLikeThis(index, searcher, metadata); + + Builder queryBuilder = new Builder(); + + excludeOtherFromResults(ridsAsString, queryBuilder); + + ODatabaseSession contest = ctx.getDatabase(); + addLikeQueries(others, mlt, queryBuilder, contest); + + Query mltQuery = queryBuilder.build(); + + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx), + metadata))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet; + } + + private List parseRids(OCommandContext ctx, OExpression expression) { + + Object expResult = expression.execute((OResult) null, ctx); + return parseRidsObj(ctx, expResult); + } + + private List parseRidsObj(OCommandContext ctx, Object expResult) { + // single rind + if (expResult instanceof OIdentifiable) { + return Collections.singletonList(((OIdentifiable) expResult).getIdentity().toString()); + } + + Iterator iter; + if (expResult instanceof Iterable) { + iter = ((Iterable) expResult).iterator(); + } else if (expResult instanceof Iterator) { + iter = (Iterator) expResult; + } else { + return Collections.emptyList(); + } + + List rids = new ArrayList<>(); + while (iter.hasNext()) { + Object item = iter.next(); + if (item instanceof OResult) { + if (((OResult) item).isElement()) { + rids.add(((OResult) item).getIdentity().get().toString()); + } else { + Set properties = ((OResult) item).getPropertyNames(); + if (properties.size() == 1) { + Object val = ((OResult) item).getProperty(properties.iterator().next()); + if (val instanceof OIdentifiable) { + rids.add(((OIdentifiable) val).getIdentity().toString()); + } + } + } + } else if (item instanceof OIdentifiable) { + rids.add(((OIdentifiable) item).getIdentity().toString()); + } + } + return rids; + } + + private ODocument parseMetadata(OExpression[] args) { + ODocument metadata = new ODocument(); + if (args.length == 2) { + metadata.fromJSON(args[1].toString()); + } + return metadata; + } + + private MoreLikeThis buildMoreLikeThis( + OLuceneFullTextIndex index, IndexSearcher searcher, ODocument metadata) { + + try { + MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); + + mlt.setAnalyzer(index.queryAnalyzer()); + + mlt.setFieldNames( + Optional.ofNullable(metadata.>getProperty("fieldNames")) + .orElse(index.getDefinition().getFields()) + .toArray(new String[] {})); + + mlt.setMaxQueryTerms( + Optional.ofNullable(metadata.getProperty("maxQueryTerms")) + .orElse(MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); + + mlt.setMinTermFreq( + Optional.ofNullable(metadata.getProperty("minTermFreq")) + .orElse(MoreLikeThis.DEFAULT_MIN_TERM_FREQ)); + + mlt.setMaxDocFreq( + Optional.ofNullable(metadata.getProperty("maxDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + + mlt.setMinDocFreq( + Optional.ofNullable(metadata.getProperty("minDocFreq")) + .orElse(MoreLikeThis.DEFAULT_MAX_DOC_FREQ)); + + mlt.setBoost( + Optional.ofNullable(metadata.getProperty("boost")) + .orElse(MoreLikeThis.DEFAULT_BOOST)); + + mlt.setBoostFactor( + Optional.ofNullable(metadata.getProperty("boostFactor")).orElse(1f)); + + mlt.setMaxWordLen( + Optional.ofNullable(metadata.getProperty("maxWordLen")) + .orElse(MoreLikeThis.DEFAULT_MAX_WORD_LENGTH)); + + mlt.setMinWordLen( + Optional.ofNullable(metadata.getProperty("minWordLen")) + .orElse(MoreLikeThis.DEFAULT_MIN_WORD_LENGTH)); + + mlt.setMaxNumTokensParsed( + Optional.ofNullable(metadata.getProperty("maxNumTokensParsed")) + .orElse(MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); + + mlt.setStopWords( + (Set) + Optional.ofNullable(metadata.getProperty("stopWords")) + .orElse(MoreLikeThis.DEFAULT_STOP_WORDS)); + + return mlt; + } catch (IOException e) { + throw OException.wrapException(new OIOException("Lucene IO Exception"), e); + } + } + + private void addLikeQueries( + List others, MoreLikeThis mlt, Builder queryBuilder, ODatabaseSession contest) { + others.stream() + .map(or -> contest.load(or)) + .forEach( + element -> + Arrays.stream(mlt.getFieldNames()) + .forEach( + fieldName -> { + String property = element.getProperty(fieldName); + try { + Query fieldQuery = mlt.like(fieldName, new StringReader(property)); + if (!fieldQuery.toString().isEmpty()) + queryBuilder.add(fieldQuery, Occur.SHOULD); + } catch (IOException e) { + // FIXME handle me! + logger.error("Error during Lucene query generation", e); + } + })); + } + + private void excludeOtherFromResults(List ridsAsString, Builder queryBuilder) { + ridsAsString.stream() + .forEach( + rid -> + queryBuilder.add( + new TermQuery(new Term("RID", QueryParser.escape(rid))), Occur.MUST_NOT)); + } + + private OLuceneFullTextIndex searchForIndex(OFromClause target, OCommandContext ctx) { + OFromItem item = target.getItem(); + + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(ctx, className); + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException("too many full-text indices on given class: " + className); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + @Override + public long estimate( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + if (index != null) return index.size(); + return 0; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } + + @Override + public boolean allowsIndexedExecution( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = this.searchForIndex(target, ctx); + + return index != null; + } + + @Override + public boolean shouldExecuteAfterSearch( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java new file mode 100644 index 0000000000..e9e6f21e04 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnClassFunction.java @@ -0,0 +1,184 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnClassFunction extends OLuceneSearchFunctionTemplate { + + public static final String NAME = "search_class"; + + public OLuceneSearchOnClassFunction() { + super(NAME, 1, 2); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public boolean canExecuteInline( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + return true; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + OResult result; + if (iThis instanceof OResult) { + result = (OResult) iThis; + } else { + result = new OResultInternal((OIdentifiable) iThis); + } + + if (!result.getElement().isPresent()) return false; + OElement element = result.getElement().get(); + if (!element.getSchemaType().isPresent()) return false; + + String className = element.getSchemaType().get().getName(); + + OLuceneFullTextIndex index = searchForIndex(ctx, className); + + if (index == null) return false; + + String query = (String) params[0]; + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 2) { + return new ODocument().fromMap((Map) params[1]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public boolean filterResult() { + return true; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx); + + OExpression expression = args[0]; + String query = (String) expression.execute((OResult) null, ctx); + + if (index != null) { + + ODocument metadata = getMetadata(args, ctx); + + List luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + + return luceneResultSet; + } + return Collections.emptySet(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 2) { + return getMetadata(args[1], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + OFromItem item = target.getItem(); + + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(ctx, className); + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException("too many full-text indices on given class: " + className); + } + + return indices.size() == 0 ? null : indices.get(0); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java new file mode 100644 index 0000000000..7ebe6e7b27 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnFieldsFunction.java @@ -0,0 +1,200 @@ +package com.arcadedb.lucene.functions; + +import static com.arcadedb.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.metadata.OMetadataInternal; +import com.arcadedb.database.record.OElement; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnFieldsFunction extends OLuceneSearchFunctionTemplate { + + public static final String NAME = "search_fields"; + + public OLuceneSearchOnFieldsFunction() { + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + + if (iThis instanceof ORID) { + iThis = ((ORID) iThis).getRecord(); + } + if (iThis instanceof OIdentifiable) { + iThis = new OResultInternal((OIdentifiable) iThis); + } + OResult result = (OResult) iThis; + + if (!result.getElement().isPresent()) return false; + OElement element = result.getElement().get(); + if (!element.getSchemaType().isPresent()) return false; + String className = element.getSchemaType().get().getName(); + List fieldNames = (List) params[0]; + + OLuceneFullTextIndex index = searchForIndex(className, ctx, fieldNames); + + if (index == null) return false; + + String query; + if (params[1] == null) { + query = null; + } else { + query = (String) params[1].toString(); + } + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> element.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 3) { + return new ODocument().fromMap((Map) params[2]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + + OExpression expression = args[1]; + Object query = expression.execute((OResult) null, ctx); + if (index != null) { + + ODocument meta = getMetadata(args, ctx); + Set luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { + luceneResultSet = rids.collect(Collectors.toSet()); + } + + return luceneResultSet; + } + throw new RuntimeException(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 3) { + return getMetadata(args[2], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + List fieldNames = (List) args[0].execute((OResult) null, ctx); + OFromItem item = target.getItem(); + String className = item.getIdentifier().getStringValue(); + + return searchForIndex(className, ctx, fieldNames); + } + + private OLuceneFullTextIndex searchForIndex( + String className, OCommandContext ctx, List fieldNames) { + OMetadataInternal dbMetadata = + (OMetadataInternal) ctx.getDatabase().activateOnCurrentThread().getMetadata(); + + List indices = + dbMetadata.getImmutableSchemaSnapshot().getClass(className).getIndexes().stream() + .filter(idx -> idx instanceof OLuceneFullTextIndex) + .map(idx -> (OLuceneFullTextIndex) idx) + .filter(idx -> intersect(idx.getDefinition().getFields(), fieldNames)) + .collect(Collectors.toList()); + + if (indices.size() > 1) { + throw new IllegalArgumentException( + "too many indices matching given field name: " + String.join(",", fieldNames)); + } + + return indices.size() == 0 ? null : indices.get(0); + } + + public List intersection(List list1, List list2) { + List list = new ArrayList(); + + for (T t : list1) { + if (list2.contains(t)) { + list.add(t); + } + } + + return list; + } + + public boolean intersect(List list1, List list2) { + + for (T t : list1) { + if (list2.contains(t)) { + return true; + } + } + + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java new file mode 100644 index 0000000000..c27b3ea8ff --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/functions/OLuceneSearchOnIndexFunction.java @@ -0,0 +1,198 @@ +package com.arcadedb.lucene.functions; + +import com.arcadedb.lucene.builder.OLuceneQueryBuilder; +import com.arcadedb.lucene.collections.OLuceneCompositeKey; +import com.arcadedb.lucene.index.OLuceneFullTextIndex; +import com.arcadedb.lucene.query.OLuceneKeyAndMetadata; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.ODatabaseDocumentInternal; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.id.ORID; +import com.arcadedb.database.index.OIndex; +import com.arcadedb.database.record.impl.ODocument; +import com.arcadedb.database.sql.executor.OResult; +import com.arcadedb.database.sql.executor.OResultInternal; +import com.arcadedb.database.sql.parser.OBinaryCompareOperator; +import com.arcadedb.database.sql.parser.OExpression; +import com.arcadedb.database.sql.parser.OFromClause; +import com.arcadedb.database.sql.parser.OFromItem; +import com.arcadedb.database.sql.parser.OIdentifier; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; + +/** Created by frank on 15/01/2017. */ +public class OLuceneSearchOnIndexFunction extends OLuceneSearchFunctionTemplate { + + public static final String MEMORY_INDEX = "_memoryIndex"; + + public static final String NAME = "search_index"; + + public OLuceneSearchOnIndexFunction() { + super(NAME, 2, 3); + } + + @Override + public String getName() { + return NAME; + } + + @Override + public Object execute( + Object iThis, + OIdentifiable iCurrentRecord, + Object iCurrentResult, + Object[] params, + OCommandContext ctx) { + if (iThis instanceof ORID) { + iThis = ((ORID) iThis).getRecord(); + } + if (iThis instanceof OIdentifiable) { + iThis = new OResultInternal((OIdentifiable) iThis); + } + OResult result = (OResult) iThis; + + String indexName = (String) params[0]; + + OLuceneFullTextIndex index = searchForIndex(ctx, indexName); + + if (index == null) return false; + + String query = (String) params[1]; + + MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx); + + List key = + index.getDefinition().getFields().stream() + .map(s -> result.getProperty(s)) + .collect(Collectors.toList()); + + for (IndexableField field : index.buildDocument(key, iCurrentRecord).getFields()) { + memoryIndex.addField(field, index.indexAnalyzer()); + } + + ODocument metadata = getMetadata(params); + OLuceneKeyAndMetadata keyAndMetadata = + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), metadata); + + return memoryIndex.search(index.buildQuery(keyAndMetadata)) > 0.0f; + } + + private ODocument getMetadata(Object[] params) { + + if (params.length == 3) { + return new ODocument().fromMap((Map) params[2]); + } + + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + private MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) { + MemoryIndex memoryIndex = (MemoryIndex) ctx.getVariable(MEMORY_INDEX); + if (memoryIndex == null) { + memoryIndex = new MemoryIndex(); + ctx.setVariable(MEMORY_INDEX, memoryIndex); + } + + memoryIndex.reset(); + return memoryIndex; + } + + @Override + public String getSyntax() { + return "SEARCH_INDEX( indexName, [ metdatada {} ] )"; + } + + @Override + public boolean filterResult() { + return true; + } + + @Override + public Iterable searchFromTarget( + OFromClause target, + OBinaryCompareOperator operator, + Object rightValue, + OCommandContext ctx, + OExpression... args) { + + OLuceneFullTextIndex index = searchForIndex(target, ctx, args); + + OExpression expression = args[1]; + String query = (String) expression.execute((OResult) null, ctx); + if (index != null && query != null) { + + ODocument meta = getMetadata(args, ctx); + + List luceneResultSet; + try (Stream rids = + index + .getInternal() + .getRids( + new OLuceneKeyAndMetadata( + new OLuceneCompositeKey(Arrays.asList(query)).setContext(ctx), meta))) { + luceneResultSet = rids.collect(Collectors.toList()); + } + + return luceneResultSet; + } + return Collections.emptyList(); + } + + private ODocument getMetadata(OExpression[] args, OCommandContext ctx) { + if (args.length == 3) { + return getMetadata(args[2], ctx); + } + return OLuceneQueryBuilder.EMPTY_METADATA; + } + + @Override + protected OLuceneFullTextIndex searchForIndex( + OFromClause target, OCommandContext ctx, OExpression... args) { + + OFromItem item = target.getItem(); + OIdentifier identifier = item.getIdentifier(); + return searchForIndex(identifier.getStringValue(), ctx, args); + } + + private OLuceneFullTextIndex searchForIndex( + String className, OCommandContext ctx, OExpression... args) { + + String indexName = (String) args[0].execute((OResult) null, ctx); + + final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); + OIndex index = + database + .getMetadata() + .getIndexManagerInternal() + .getClassIndex(database, className, indexName); + + if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { + return (OLuceneFullTextIndex) index; + } + + return null; + } + + private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String indexName) { + final ODatabaseDocumentInternal database = (ODatabaseDocumentInternal) ctx.getDatabase(); + OIndex index = database.getMetadata().getIndexManagerInternal().getIndex(database, indexName); + + if (index != null && index.getInternal() instanceof OLuceneFullTextIndex) { + return (OLuceneFullTextIndex) index; + } + + return null; + } + + @Override + public Object getResult(OCommandContext ctx) { + return super.getResult(ctx); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java new file mode 100644 index 0000000000..520cf75087 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/ArcadeLuceneFullTextIndex.java @@ -0,0 +1,362 @@ +package com.arcadedb.lucene.index; + +import com.arcadedb.database.DatabaseInternal; +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.index.Index; +import com.arcadedb.index.IndexCursor; +import com.arcadedb.index.IndexException; +import com.arcadedb.index.IndexInternal; +import com.arcadedb.index.RangeIndexCursor; +import com.arcadedb.index.TypeIndex; +import com.arcadedb.index.engine.IndexEngine; +import com.arcadedb.schema.IndexBuilder; // Added for build method +import com.arcadedb.schema.IndexDefinition; +import com.arcadedb.schema.Type; +import com.arcadedb.tx.TransactionContext; + +import java.io.IOException; // Added for compact +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; + +public class ArcadeLuceneFullTextIndex implements IndexInternal { + + private final DatabaseInternal database; + private final String name; + private final boolean unique; + private final String analyzerClassName; + private final String filePath; + private final Type[] keyTypes; + // Other fields like IndexDefinition, IndexEngine, pageSize, nullStrategy, etc. + private IndexDefinition definition; // Will be set by setMetadata or build + + public ArcadeLuceneFullTextIndex(DatabaseInternal database, String name, boolean unique, String analyzerClassName, String filePath, Type[] keyTypes) { + this.database = database; + this.name = name; + this.unique = unique; + this.analyzerClassName = analyzerClassName; + this.filePath = filePath; // Store filePath + this.keyTypes = keyTypes; // Store keyTypes + // Further initialization for Lucene engine would go here. + // This constructor might be called by the handler, then setMetadata/build by schema loading/creation. + } + + // --- IndexInternal Methods --- + + @Override + public String getAssociatedFileName() { + return filePath; // Return stored filePath + } + + @Override + public void build(IndexBuilder builder) { + // This method is typically called when an index is being built from scratch. + // The IndexBuilder contains all necessary information. + // this.definition = builder.getIndexDefinition(); // Or create one + // Initialize/create the Lucene IndexWriter and other resources here. + throw new UnsupportedOperationException("Not yet implemented: build"); + } + + @Override + public void setMetadata(IndexDefinition definition, String filePath, int pageSize, byte nullStrategy) { + this.definition = definition; + // this.filePath = filePath; // Already set in constructor, ensure consistency or update + // this.pageSize = pageSize; + // this.nullStrategy = nullStrategy; + throw new UnsupportedOperationException("Not yet implemented: setMetadata"); + } + + @Override + public STATUS getStatus() { + // Return current status, e.g., from engine + throw new UnsupportedOperationException("Not yet implemented: getStatus"); + } + + + @Override + public void setStatus(STATUS status) { + // Set current status, e.g., on engine + throw new UnsupportedOperationException("Not yet implemented: setStatus"); + } + + @Override + public void close() { + // Release Lucene resources (IndexWriter, IndexSearcher, Directory) + throw new UnsupportedOperationException("Not yet implemented: close"); + } + + @Override + public void drop() { + // Remove Lucene index files from disk. + // Unregister from schema should be handled by Schema.dropIndex() calling this. + throw new UnsupportedOperationException("Not yet implemented: drop"); + } + + @Override + public int getFileId() { + // Lucene might not use file IDs in the same way ArcadeDB's native engine does. + // Return a sentinel or appropriate value. + return -1; + } + + @Override + public T getComponent(String name, Class type) { + // Used for accessing underlying components, might be relevant for engine access. + throw new UnsupportedOperationException("Not yet implemented: getComponent"); + } + + @Override + public Type[] getKeyTypes() { + return keyTypes; // Return stored keyTypes + } + + @Override + public byte[] getBinaryKeyTypes() { + // Convert Type[] to byte[] if necessary for serialization, or return null if not used. + throw new UnsupportedOperationException("Not yet implemented: getBinaryKeyTypes"); + } + + @Override + public void setTypeIndex(TypeIndex typeIndex) { + // Associated with schema type's index list. + throw new UnsupportedOperationException("Not yet implemented: setTypeIndex"); + } + + @Override + public TypeIndex getTypeIndex() { + throw new UnsupportedOperationException("Not yet implemented: getTypeIndex"); + } + + @Override + public void scheduleCompaction() { + // Lucene has its own merging/optimization, might not map directly. + throw new UnsupportedOperationException("Not yet implemented: scheduleCompaction"); + } + + @Override + public String getMostRecentFileName() { + // Relates to WAL, might not be applicable or needs specific handling for Lucene. + throw new UnsupportedOperationException("Not yet implemented: getMostRecentFileName"); + } + + @Override + public Map toJSON() { + // Serialize index configuration/stats to JSON. + throw new UnsupportedOperationException("Not yet implemented: toJSON"); + } + + @Override + public Index getAssociatedIndex() { + // For sub-indexes, typically null for a main index. + return null; + } + + // --- Index Methods --- + + @Override + public String getName() { + return name; + } + + @Override + public String getTypeName() { + // This should return the algorithm name, e.g., "LUCENE" + // return ArcadeLuceneLifecycleManager.LUCENE_ALGORITHM; // If constant is accessible + return "LUCENE"; // Or get from definition if set + } + + @Override + public IndexDefinition getDefinition() { + // Return the stored IndexDefinition + if (this.definition == null) { + throw new UnsupportedOperationException("IndexDefinition not set for index: " + name); + } + return this.definition; + } + + @Override + public boolean isUnique() { + return this.unique; + } + + @Override + public List getPropertyNames() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.getPropertyNames(); + } + + @Override + public long countEntries() { + // Count documents in Lucene index + throw new UnsupportedOperationException("Not yet implemented: countEntries"); + } + + @Override + public IndexCursor get(Object[] keys) { + // Perform Lucene search + throw new UnsupportedOperationException("Not yet implemented: get"); + } + + @Override + public IndexCursor get(Object[] keys, int limit) { + throw new UnsupportedOperationException("Not yet implemented: get with limit"); + } + + + @Override + public Stream getRidsStream(Object[] keys) { + throw new UnsupportedOperationException("Not yet implemented: getRidsStream"); + } + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded) { + throw new UnsupportedOperationException("Not yet implemented: range"); + } + + @Override + public RangeIndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit) { + throw new UnsupportedOperationException("Not yet implemented: range with limit"); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder) { + // Iterate all documents + throw new UnsupportedOperationException("Not yet implemented: iterator"); + } + + @Override + public IndexCursor iterator(boolean ascendingOrder, Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Not yet implemented: iterator with fromKey"); + } + + @Override + public IndexCursor descendingIterator() { + throw new UnsupportedOperationException("Not yet implemented: descendingIterator"); + } + + @Override + public IndexCursor descendingIterator(Object[] fromKey, boolean fromKeyInclusive) { + throw new UnsupportedOperationException("Not yet implemented: descendingIterator with fromKey"); + } + + @Override + public boolean supportsOrderedIterations() { + return false; // Lucene supports score-based ordering, key-based might not be natural. + } + + @Override + public boolean isAutomatic() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.isAutomatic(); + } + + @Override + public void setRebuilding(boolean rebuilding) { + // Set a flag if the index is rebuilding + throw new UnsupportedOperationException("Not yet implemented: setRebuilding"); + } + + @Override + public IndexEngine getEngine() { + // Return the LuceneIndexEngine instance associated with this index + throw new UnsupportedOperationException("Not yet implemented: getEngine"); + } + + @Override + public boolean isValid() { + throw new UnsupportedOperationException("Not yet implemented: isValid"); + } + + @Override + public Map getStats() { + // Return Lucene specific stats + throw new UnsupportedOperationException("Not yet implemented: getStats"); + } + + @Override + public void setStats(Map stats) { + // Not typically set from outside + throw new UnsupportedOperationException("Not yet implemented: setStats"); + } + + @Override + public void compact() throws IOException { + // Trigger Lucene merge/optimize if applicable + throw new UnsupportedOperationException("Not yet implemented: compact"); + } + + @Override + public boolean isCompacting() { + // Check if Lucene merge/optimize is running + throw new UnsupportedOperationException("Not yet implemented: isCompacting"); + } + + @Override + public List getFileIds() { + // Lucene manages its own files; this might not map directly. + throw new UnsupportedOperationException("Not yet implemented: getFileIds"); + } + + @Override + public int getPageSize() { + // Lucene doesn't use pages in the same way as ArcadeDB's native engine. + throw new UnsupportedOperationException("Not yet implemented: getPageSize"); + } + + @Override + public void setPageSize(int pageSize) { + throw new UnsupportedOperationException("Not yet implemented: setPageSize"); + } + + @Override + public byte getNullStrategy() { + // Get from IndexDefinition + if (this.definition == null) throw new UnsupportedOperationException("Definition not set"); + return this.definition.getNullStrategy().getValue(); + } + + @Override + public void setNullStrategy(byte nullStrategy) { + // Set in IndexDefinition (usually immutable after creation) + throw new UnsupportedOperationException("Not yet implemented: setNullStrategy"); + } + + @Override + public void set(TransactionContext tx, Object[] keys, RID[] rids) throws IndexException { + // Add entries to Lucene index + throw new UnsupportedOperationException("Not yet implemented: set"); + } + + @Override + public void remove(TransactionContext tx, Object[] keys, Identifiable rid) throws IndexException { + // Remove specific RID associated with keys + throw new UnsupportedOperationException("Not yet implemented: remove with rid"); + } + + @Override + public void remove(TransactionContext tx, Object[] keys) throws IndexException { + // Remove all RIDs associated with keys + throw new UnsupportedOperationException("Not yet implemented: remove"); + } + + @Override + public IndexCursor range(boolean ascendingOrder) { + throw new UnsupportedOperationException("Not yet implemented: range without keys"); + } + + @Override + public IndexCursor range(boolean ascendingOrder, Object[] beginKeys, boolean beginKeysIncluded, Object[] endKeys, boolean endKeysIncluded, int limit, int skip) { + throw new UnsupportedOperationException("Not yet implemented: range with limit and skip"); + } + + @Override + public int getAssociatedBucketId() { + // Lucene indexes are not directly associated with a single bucket in the same way. + return -1; // Or derive from schema/type if applicable + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java new file mode 100644 index 0000000000..a0b1cc9d48 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/index/OLuceneFullTextIndex.java @@ -0,0 +1,118 @@ +/* + * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.arcadedb.lucene.index; + +import com.arcadedb.lucene.OLuceneCrossClassIndexFactory; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.database.OIdentifiable; +import com.arcadedb.database.exception.OInvalidIndexEngineIdException; +import com.arcadedb.database.index.OIndexMetadata; +import com.arcadedb.database.storage.OStorage; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.Query; + +public class OLuceneFullTextIndex extends OLuceneIndexNotUnique { + + public OLuceneFullTextIndex(OIndexMetadata im, final OStorage storage) { + super(im, storage); + } + + public Document buildDocument(final Object key, OIdentifiable identifieable) { + + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildDocument(key, identifieable); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Query buildQuery(final Object query) { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.buildQuery(query); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public Analyzer queryAnalyzer() { + while (true) + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.queryAnalyzer(); + }); + } catch (final OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + + public boolean isCollectionIndex() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.isCollectionIndex(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + public Analyzer indexAnalyzer() { + while (true) { + try { + return storage.callIndexEngine( + false, + indexId, + engine -> { + OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine; + return indexEngine.indexAnalyzer(); + }); + } catch (OInvalidIndexEngineIdException e) { + doReloadIndexEngine(); + } + } + } + + @Override + public boolean isAutomatic() { + return super.isAutomatic() + || OLuceneCrossClassIndexFactory.LUCENE_CROSS_CLASS.equals(im.getAlgorithm()); + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java new file mode 100644 index 0000000000..0c311f42d5 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/LuceneIndexCursor.java @@ -0,0 +1,113 @@ +package com.arcadedb.lucene.query; + +import com.arcadedb.database.Identifiable; +import com.arcadedb.database.RID; +import com.arcadedb.index.IndexCursor; + +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; + +// import org.apache.lucene.search.ScoreDoc; +// import org.apache.lucene.search.IndexSearcher; +// import org.apache.lucene.document.Document; +// import java.io.IOException; + +public class LuceneIndexCursor implements IndexCursor { + + // private ScoreDoc[] scoreDocs; + // private IndexSearcher searcher; + // private int currentIndex = 0; + // private Document currentDocument; + // private RID currentRID; + + // public LuceneIndexCursor(ScoreDoc[] scoreDocs, IndexSearcher searcher) { + // this.scoreDocs = scoreDocs; + // this.searcher = searcher; + // // Potentially pre-fetch the first one or do it in hasNext/next + // } + + @Override + public Object[] getKeys() { + // This would typically return the terms that matched for the current document, + // which might not be straightforward or always relevant for a Lucene full-text search result. + // Or, if the cursor iterates over specific keys that led to this document. + throw new UnsupportedOperationException("Not yet implemented: getKeys"); + } + + @Override + public Identifiable getRecord() { + // if (currentRID == null && currentDocument != null) { + // // Assuming RID is stored in a field, e.g., "RID" + // String ridString = currentDocument.get("RID"); + // if (ridString != null) { + // currentRID = new RID(null, ridString); // Database instance might be needed + // } + // } + // return currentRID; + throw new UnsupportedOperationException("Not yet implemented: getRecord"); + } + + @Override + public Map getProperties() { + throw new UnsupportedOperationException("Not implemented for LuceneIndexCursor"); + } + + @Override + public int getScore() { + // if (currentIndex > 0 && currentIndex <= scoreDocs.length) { + // return (int) (scoreDocs[currentIndex -1].score * 1000); // Example scaling + // } + return 0; + } + + @Override + public boolean hasNext() { + // return currentIndex < scoreDocs.length; + throw new UnsupportedOperationException("Not yet implemented: hasNext"); + } + + @Override + public Identifiable next() { + // if (!hasNext()) { + // throw new NoSuchElementException(); + // } + // try { + // currentDocument = searcher.doc(scoreDocs[currentIndex].doc); + // currentRID = null; // Reset so getRecord re-fetches it + // currentIndex++; + // return getRecord(); // This might need the database instance to load the actual record + // } catch (IOException e) { + // throw new RuntimeException("Error fetching document from Lucene index", e); + // } + throw new UnsupportedOperationException("Not yet implemented: next"); + } + + @Override + public void close() { + // Release any Lucene resources if necessary, e.g., if the searcher was context-specific. + // scoreDocs = null; + // searcher = null; + } + + @Override + public long size() { + // return scoreDocs != null ? scoreDocs.length : 0; + throw new UnsupportedOperationException("Not yet implemented: size"); + } + + @Override + public void setLimit(int limit) { + throw new UnsupportedOperationException("Not supported after creation."); + } + + @Override + public int getLimit() { + return -1; // Or actual limit if supported + } + + @Override + public boolean isPaginated() { + return true; // Or based on actual implementation + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java b/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java new file mode 100644 index 0000000000..3bbee4581e --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/query/OLuceneQueryContext.java @@ -0,0 +1,138 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.query; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.lucene.tx.OLuceneTxChanges; +import com.arcadedb.database.OCommandContext; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.highlight.TextFragment; + +/** Created by Enrico Risa on 08/01/15. */ +public class OLuceneQueryContext { + private final OCommandContext context; + private final IndexSearcher searcher; + private final Query query; + private final Sort sort; + private Optional changes; + private HashMap fragments; + + public OLuceneQueryContext( + final OCommandContext context, final IndexSearcher searcher, final Query query) { + this(context, searcher, query, Collections.emptyList()); + } + + public OLuceneQueryContext( + final OCommandContext context, + final IndexSearcher searcher, + final Query query, + final List sortFields) { + this.context = context; + this.searcher = searcher; + this.query = query; + if (sortFields.isEmpty()) { + sort = null; + } else { + sort = new Sort(sortFields.toArray(new SortField[] {})); + } + changes = Optional.empty(); + fragments = new HashMap<>(); + } + + public boolean isInTx() { + return changes.isPresent(); + } + + public OLuceneQueryContext withChanges(final OLuceneTxChanges changes) { + this.changes = Optional.ofNullable(changes); + return this; + } + + public OLuceneQueryContext addHighlightFragment( + final String field, final TextFragment[] fieldFragment) { + fragments.put(field, fieldFragment); + return this; + } + + public OCommandContext getContext() { + return context; + } + + public Query getQuery() { + return query; + } + + public Optional getChanges() { + return changes; + } + + public Sort getSort() { + return sort; + } + + public IndexSearcher getSearcher() { + return changes.map(c -> new IndexSearcher(multiReader(c))).orElse(searcher); + } + + private MultiReader multiReader(final OLuceneTxChanges luceneTxChanges) { + final IndexReader primaryReader = searcher.getIndexReader(); + final IndexReader txReader = luceneTxChanges.searcher().getIndexReader(); + try { + // Transfer ownership to the MultiReader so the index searcher can be released transparently. + // Without this, the primary IndexReader will leak a refcount each time it is wrapped. + MultiReader multiReader = new MultiReader(new IndexReader[] {primaryReader, txReader}, false); + primaryReader.decRef(); + txReader.decRef(); + return multiReader; + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to create reader on changes"), e); + } + } + + public long deletedDocs(final Query query) { + return changes.map(c -> c.deletedDocs(query)).orElse(0l); + } + + public boolean isUpdated(final Document doc, final Object key, final OIdentifiable value) { + return changes.map(c -> c.isUpdated(doc, key, value)).orElse(false); + } + + public boolean isDeleted(final Document doc, final Object key, final OIdentifiable value) { + return changes.map(c -> c.isDeleted(doc, key, value)).orElse(false); + } + + public Map getFragments() { + return fragments; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java new file mode 100644 index 0000000000..117c4d911f --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChanges.java @@ -0,0 +1,52 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.database.OIdentifiable; +import java.util.Collections; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public interface OLuceneTxChanges { + + void put(Object key, OIdentifiable value, Document doc); + + void remove(Object key, OIdentifiable value); + + IndexSearcher searcher(); + + default long numDocs() { + return 0; + } + + default Set getDeletedDocs() { + return Collections.emptySet(); + } + + boolean isDeleted(Document document, Object key, OIdentifiable value); + + boolean isUpdated(Document document, Object key, OIdentifiable value); + + default long deletedDocs(Query query) { + return 0; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java new file mode 100644 index 0000000000..52fd4f629f --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesAbstract.java @@ -0,0 +1,74 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.common.log.OLogManager; +import com.arcadedb.common.log.OLogger; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import java.io.IOException; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; + +/** Created by Enrico Risa on 28/09/15. */ +public abstract class OLuceneTxChangesAbstract implements OLuceneTxChanges { + private static final OLogger logger = + OLogManager.instance().logger(OLuceneTxChangesAbstract.class); + public static final String TMP = "_tmp_rid"; + + protected final OLuceneIndexEngine engine; + protected final IndexWriter writer; + protected final IndexWriter deletedIdx; + + public OLuceneTxChangesAbstract( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + this.engine = engine; + this.writer = writer; + this.deletedIdx = deletedIdx; + } + + public IndexSearcher searcher() { + // TODO optimize + try { + return new IndexSearcher(DirectoryReader.open(writer, true, true)); + } catch (IOException e) { + // logger.error("Error during searcher index instantiation on new documents", e); + throw OException.wrapException( + new OLuceneIndexException("Error during searcher index instantiation on new documents"), + e); + } + } + + @Override + public long deletedDocs(Query query) { + try { + final IndexSearcher indexSearcher = + new IndexSearcher(DirectoryReader.open(deletedIdx, true, true)); + final TopDocs search = indexSearcher.search(query, Integer.MAX_VALUE); + return search.totalHits.value; + } catch (IOException e) { + logger.error("Error during searcher index instantiation on deleted documents ", e); + } + return 0; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java new file mode 100644 index 0000000000..c3758ba6d6 --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesMultiRid.java @@ -0,0 +1,108 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.Query; + +/** Created by Enrico Risa on 15/09/15. */ +public class OLuceneTxChangesMultiRid extends OLuceneTxChangesAbstract { + private final Map> deleted = new HashMap>(); + private final Set deletedDocs = new HashSet(); + + public OLuceneTxChangesMultiRid( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + super(engine, writer, deletedIdx); + } + + public void put(final Object key, final OIdentifiable value, final Document doc) { + try { + writer.addDocument(doc); + } catch (IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to add document to changes index"), e); + } + } + + public void remove(final Object key, final OIdentifiable value) { + try { + if (value.getIdentity().isTemporary()) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else { + deleted.putIfAbsent(value.getIdentity().toString(), new ArrayList<>()); + deleted.get(value.getIdentity().toString()).add(key.toString()); + + final Document doc = engine.buildDocument(key, value); + deletedDocs.add(doc); + deletedIdx.addDocument(doc); + } + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException( + "Error while deleting documents in transaction from lucene index"), + e); + } + } + + public long numDocs() { + return searcher().getIndexReader().numDocs() - deletedDocs.size(); + } + + public Set getDeletedDocs() { + return deletedDocs; + } + + public boolean isDeleted(final Document document, final Object key, final OIdentifiable value) { + boolean match = false; + final List strings = deleted.get(value.getIdentity().toString()); + if (strings != null) { + final MemoryIndex memoryIndex = new MemoryIndex(); + for (final String string : strings) { + final Query q = engine.deleteQuery(string, value); + memoryIndex.reset(); + for (final IndexableField field : document.getFields()) { + memoryIndex.addField(field.name(), field.stringValue(), new KeywordAnalyzer()); + } + match = match || (memoryIndex.search(q) > 0.0f); + } + return match; + } + return match; + } + + // TODO is this valid? + public boolean isUpdated(final Document document, final Object key, final OIdentifiable value) { + return false; + } +} diff --git a/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java new file mode 100644 index 0000000000..dcc87fe84e --- /dev/null +++ b/lucene/src/main/java/com/arcadedb/lucene/tx/OLuceneTxChangesSingleRid.java @@ -0,0 +1,92 @@ +/* + * + * * Copyright 2010-2016 OrientDB LTD (http://orientdb.com) + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.arcadedb.lucene.tx; + +import com.arcadedb.common.exception.OException; +import com.arcadedb.lucene.builder.OLuceneIndexType; +import com.arcadedb.lucene.engine.OLuceneIndexEngine; +import com.arcadedb.lucene.exception.OLuceneIndexException; +import com.arcadedb.database.OIdentifiable; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; + +/** Created by Enrico Risa on 15/09/15. */ +public class OLuceneTxChangesSingleRid extends OLuceneTxChangesAbstract { + private final Set deleted = new HashSet(); + private final Set updated = new HashSet(); + private final Set deletedDocs = new HashSet(); + + public OLuceneTxChangesSingleRid( + final OLuceneIndexEngine engine, final IndexWriter writer, final IndexWriter deletedIdx) { + super(engine, writer, deletedIdx); + } + + public void put(final Object key, final OIdentifiable value, final Document doc) { + if (deleted.remove(value.getIdentity().toString())) { + doc.add(OLuceneIndexType.createField(TMP, value.getIdentity().toString(), Field.Store.YES)); + updated.add(value.getIdentity().toString()); + } + try { + writer.addDocument(doc); + } catch (IOException e) { + throw OException.wrapException( + new OLuceneIndexException("unable to add document to changes index"), e); + } + } + + public void remove(final Object key, final OIdentifiable value) { + try { + if (value == null) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else if (value.getIdentity().isTemporary()) { + writer.deleteDocuments(engine.deleteQuery(key, value)); + } else { + deleted.add(value.getIdentity().toString()); + Document doc = engine.buildDocument(key, value); + deletedDocs.add(doc); + deletedIdx.addDocument(doc); + } + } catch (final IOException e) { + throw OException.wrapException( + new OLuceneIndexException( + "Error while deleting documents in transaction from lucene index"), + e); + } + } + + public long numDocs() { + return searcher().getIndexReader().numDocs() - deleted.size() - updated.size(); + } + + public Set getDeletedDocs() { + return deletedDocs; + } + + public boolean isDeleted(Document document, Object key, OIdentifiable value) { + return deleted.contains(value.getIdentity().toString()); + } + + public boolean isUpdated(Document document, Object key, OIdentifiable value) { + return updated.contains(value.getIdentity().toString()); + } +} diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory new file mode 100644 index 0000000000..2dbcff89d3 --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.index.OIndexFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.OLuceneIndexFactory +com.orientechnologies.spatial.OLuceneSpatialIndexFactory +com.arcadedb.lucene.OLuceneCrossClassIndexFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory new file mode 100644 index 0000000000..72a6b3fbab --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.functions.OSQLFunctionFactory @@ -0,0 +1,21 @@ +# +# /* +# * Copyright 2015 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.functions.OLuceneFunctionsFactory +com.orientechnologies.spatial.functions.OSpatialFunctionsFactory +com.arcadedb.lucene.functions.OLuceneCrossClassFunctionsFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory new file mode 100644 index 0000000000..02b1024bcd --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.database.sql.operator.OQueryOperatorFactory @@ -0,0 +1,20 @@ +# +# /* +# * Copyright 2014 Orient Technologies. +# * +# * Licensed under the Apache License, Version 2.0 (the "License"); +# * you may not use this file except in compliance with the License. +# * You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +com.arcadedb.lucene.operator.OLuceneOperatorFactory +com.orientechnologies.spatial.operator.OLuceneSpatialOperatorFactory diff --git a/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler new file mode 100644 index 0000000000..db660fa15e --- /dev/null +++ b/lucene/src/main/resources/META-INF/services/com.arcadedb.index.IndexFactoryHandler @@ -0,0 +1 @@ +com.arcadedb.lucene.ArcadeLuceneIndexFactoryHandler diff --git a/lucene/src/main/resources/plugin.json b/lucene/src/main/resources/plugin.json new file mode 100644 index 0000000000..c0ef9caa33 --- /dev/null +++ b/lucene/src/main/resources/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "lucene-index", + "version": "1.7", + "javaClass": "com.arcadedb.lucene.ArcadeLuceneLifecycleManager", + "parameters": {}, + "description": "This is the Lucene Index integration", + "copyrights": "Orient Technologies LTD" +} diff --git a/pom.xml b/pom.xml index 4dc3484765..a07eaa3c68 100644 --- a/pom.xml +++ b/pom.xml @@ -120,6 +120,7 @@ studio package e2e + lucene