diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
deleted file mode 100644
index a6077f8e84259..0000000000000
--- a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
+++ /dev/null
@@ -1,1136 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.lucene.search.suggest.analyzing;
-
-import com.carrotsearch.hppc.ObjectIntHashMap;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.search.suggest.InputIterator;
-import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.store.*;
-import org.apache.lucene.util.*;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.*;
-import org.apache.lucene.util.fst.FST.BytesReader;
-import org.apache.lucene.util.fst.PairOutputs.Pair;
-import org.apache.lucene.util.fst.Util.Result;
-import org.apache.lucene.util.fst.Util.TopResults;
-import org.elasticsearch.common.collect.HppcMaps;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.*;
-
-/**
- * Suggester that first analyzes the surface form, adds the
- * analyzed form to a weighted FST, and then does the same
- * thing at lookup time. This means lookup is based on the
- * analyzed form while suggestions are still the surface
- * form(s).
- *
- *
- * This can result in powerful suggester functionality. For
- * example, if you use an analyzer removing stop words,
- * then the partial text "ghost chr..." could see the
- * suggestion "The Ghost of Christmas Past". Note that
- * position increments MUST NOT be preserved for this example
- * to work, so you should call the constructor with
- * preservePositionIncrements parameter set to
- * false
- *
- *
- * If SynonymFilter is used to map wifi and wireless network to
- * hotspot then the partial text "wirele..." could suggest
- * "wifi router". Token normalization like stemmers, accent
- * removal, etc., would allow suggestions to ignore such
- * variations.
- *
- *
- * When two matching suggestions have the same weight, they
- * are tie-broken by the analyzed form. If their analyzed
- * form is the same then the order is undefined.
- *
- *
- * There are some limitations:
- *
- *
- *
A lookup from a query like "net" in English won't
- * be any different than "net " (ie, user added a
- * trailing space) because analyzers don't reflect
- * when they've seen a token separator and when they
- * haven't.
- *
- *
If you're using {@code StopFilter}, and the user will
- * type "fast apple", but so far all they've typed is
- * "fast a", again because the analyzer doesn't convey whether
- * it's seen a token separator after the "a",
- * {@code StopFilter} will remove that "a" causing
- * far more matches than you'd expect.
- *
- *
Lookups with the empty string return no results
- * instead of all results.
- *
- *
- * @lucene.experimental
- */
-public class XAnalyzingSuggester extends Lookup {
-
- /**
- * FST<Weight,Surface>:
- * input is the analyzed form, with a null byte between terms
- * weights are encoded as costs: (Integer.MAX_VALUE-weight)
- * surface is the original, unanalyzed form.
- */
- private FST> fst = null;
-
- /**
- * Analyzer that will be used for analyzing suggestions at
- * index time.
- */
- private final Analyzer indexAnalyzer;
-
- /**
- * Analyzer that will be used for analyzing suggestions at
- * query time.
- */
- private final Analyzer queryAnalyzer;
-
- /**
- * True if exact match suggestions should always be returned first.
- */
- private final boolean exactFirst;
-
- /**
- * True if separator between tokens should be preserved.
- */
- private final boolean preserveSep;
-
- /** Include this flag in the options parameter to {@code
- * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always
- * return the exact match first, regardless of score. This
- * has no performance impact but could result in
- * low-quality suggestions. */
- public static final int EXACT_FIRST = 1;
-
- /** Include this flag in the options parameter to {@code
- * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve
- * token separators when matching. */
- public static final int PRESERVE_SEP = 2;
-
- /** Represents the separation between tokens, if
- * PRESERVE_SEP was specified */
- public static final int SEP_LABEL = '\u001F';
-
- /** Marks end of the analyzed input and start of dedup
- * byte. */
- public static final int END_BYTE = 0x0;
-
- /** Maximum number of dup surface forms (different surface
- * forms for the same analyzed form). */
- private final int maxSurfaceFormsPerAnalyzedForm;
-
- /** Maximum graph paths to index for a single analyzed
- * surface form. This only matters if your analyzer
- * makes lots of alternate paths (e.g. contains
- * SynonymFilter). */
- private final int maxGraphExpansions;
-
- /** Highest number of analyzed paths we saw for any single
- * input surface form. For analyzers that never create
- * graphs this will always be 1. */
- private int maxAnalyzedPathsForOneInput;
-
- private boolean hasPayloads;
-
- private final int sepLabel;
- private final int payloadSep;
- private final int endByte;
- private final int holeCharacter;
-
- public static final int PAYLOAD_SEP = '\u001F';
- public static final int HOLE_CHARACTER = '\u001E';
-
- private final Automaton queryPrefix;
-
- /** Whether position holes should appear in the automaton. */
- private boolean preservePositionIncrements;
-
- /** Number of entries the lookup was built with */
- private long count = 0;
-
- /**
- * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
- * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
- * PRESERVE_SEP, 256, -1)}
- */
- public XAnalyzingSuggester(Analyzer analyzer) {
- this(analyzer, null, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
- }
-
- /**
- * Calls {@code #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
- * AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
- * PRESERVE_SEP, 256, -1)}
- */
- public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
- this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
- }
-
- /**
- * Creates a new suggester.
- *
- * @param indexAnalyzer Analyzer that will be used for
- * analyzing suggestions while building the index.
- * @param queryAnalyzer Analyzer that will be used for
- * analyzing query text during lookup
- * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
- * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
- * surface forms to keep for a single analyzed form.
- * When there are too many surface forms we discard the
- * lowest weighted ones.
- * @param maxGraphExpansions Maximum number of graph paths
- * to expand from the analyzed form. Set this to -1 for
- * no limit.
- */
- public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
- boolean preservePositionIncrements, FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
- int sepLabel, int payloadSep, int endByte, int holeCharacter) {
- // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
- this.indexAnalyzer = indexAnalyzer;
- this.queryAnalyzer = queryAnalyzer;
- this.fst = fst;
- this.hasPayloads = hasPayloads;
- if ((options & ~(EXACT_FIRST | PRESERVE_SEP)) != 0) {
- throw new IllegalArgumentException("options should only contain EXACT_FIRST and PRESERVE_SEP; got " + options);
- }
- this.exactFirst = (options & EXACT_FIRST) != 0;
- this.preserveSep = (options & PRESERVE_SEP) != 0;
-
- // FLORIAN EDIT: I added queryPrefix for context dependent suggestions
- this.queryPrefix = queryPrefix;
-
- // NOTE: this is just an implementation limitation; if
- // somehow this is a problem we could fix it by using
- // more than one byte to disambiguate ... but 256 seems
- // like it should be way more then enough.
- if (maxSurfaceFormsPerAnalyzedForm <= 0 || maxSurfaceFormsPerAnalyzedForm > 256) {
- throw new IllegalArgumentException("maxSurfaceFormsPerAnalyzedForm must be > 0 and < 256 (got: " + maxSurfaceFormsPerAnalyzedForm + ")");
- }
- this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
-
- if (maxGraphExpansions < 1 && maxGraphExpansions != -1) {
- throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
- }
- this.maxGraphExpansions = maxGraphExpansions;
- this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
- this.preservePositionIncrements = preservePositionIncrements;
- this.sepLabel = sepLabel;
- this.payloadSep = payloadSep;
- this.endByte = endByte;
- this.holeCharacter = holeCharacter;
- }
-
- /** Returns byte size of the underlying FST. */
- @Override
-public long ramBytesUsed() {
- return fst == null ? 0 : fst.ramBytesUsed();
- }
-
- public int getMaxAnalyzedPathsForOneInput() {
- return maxAnalyzedPathsForOneInput;
- }
-
- // Replaces SEP with epsilon or remaps them if
- // we were asked to preserve them:
- private Automaton replaceSep(Automaton a) {
-
- Automaton result = new Automaton();
-
- // Copy all states over
- int numStates = a.getNumStates();
- for(int s=0;s visited = new HashSet<>();
- final LinkedList worklist = new LinkedList<>();
- worklist.add(0);
- visited.add(0);
- int upto = 0;
- states[upto] = 0;
- upto++;
- Transition t = new Transition();
- while (worklist.size() > 0) {
- int s = worklist.removeFirst();
- int count = a.initTransition(s, t);
- for (int i=0;i {
-
- private final boolean hasPayloads;
-
- public AnalyzingComparator(boolean hasPayloads) {
- this.hasPayloads = hasPayloads;
- }
-
- private final ByteArrayDataInput readerA = new ByteArrayDataInput();
- private final ByteArrayDataInput readerB = new ByteArrayDataInput();
- private final BytesRef scratchA = new BytesRef();
- private final BytesRef scratchB = new BytesRef();
-
- @Override
- public int compare(BytesRef a, BytesRef b) {
-
- // First by analyzed form:
- readerA.reset(a.bytes, a.offset, a.length);
- scratchA.length = readerA.readShort();
- scratchA.bytes = a.bytes;
- scratchA.offset = readerA.getPosition();
-
- readerB.reset(b.bytes, b.offset, b.length);
- scratchB.bytes = b.bytes;
- scratchB.length = readerB.readShort();
- scratchB.offset = readerB.getPosition();
-
- int cmp = scratchA.compareTo(scratchB);
- if (cmp != 0) {
- return cmp;
- }
- readerA.skipBytes(scratchA.length);
- readerB.skipBytes(scratchB.length);
- // Next by cost:
- long aCost = readerA.readInt();
- long bCost = readerB.readInt();
- if (aCost < bCost) {
- return -1;
- } else if (aCost > bCost) {
- return 1;
- }
-
- // Finally by surface form:
- if (hasPayloads) {
- scratchA.length = readerA.readShort();
- scratchA.offset = readerA.getPosition();
- scratchB.length = readerB.readShort();
- scratchB.offset = readerB.getPosition();
- } else {
- scratchA.offset = readerA.getPosition();
- scratchA.length = a.length - scratchA.offset;
- scratchB.offset = readerB.getPosition();
- scratchB.length = b.length - scratchB.offset;
- }
- return scratchA.compareTo(scratchB);
- }
- }
-
- @Override
- public void build(InputIterator iterator) throws IOException {
- String prefix = getClass().getSimpleName();
- Path directory = OfflineSorter.getDefaultTempDir();
- Path tempInput = Files.createTempFile(directory, prefix, ".input");
- Path tempSorted = Files.createTempFile(directory, prefix, ".sorted");
-
- hasPayloads = iterator.hasPayloads();
-
- OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
- OfflineSorter.ByteSequencesReader reader = null;
- BytesRefBuilder scratch = new BytesRefBuilder();
-
- TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
-
- boolean success = false;
- count = 0;
- byte buffer[] = new byte[8];
- try {
- ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-
- for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null;) {
- LimitedFiniteStringsIterator finiteStrings =
- new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions);
- for (IntsRef string; (string = finiteStrings.next()) != null; count++) {
- Util.toBytesRef(string, scratch);
-
- // length of the analyzed text (FST input)
- if (scratch.length() > Short.MAX_VALUE-2) {
- throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length() + ")");
- }
- short analyzedLength = (short) scratch.length();
-
- // compute the required length:
- // analyzed sequence + weight (4) + surface + analyzedLength (short)
- int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;
-
- BytesRef payload;
-
- if (hasPayloads) {
- if (surfaceForm.length > (Short.MAX_VALUE-2)) {
- throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
- }
- payload = iterator.payload();
- // payload + surfaceLength (short)
- requiredLength += payload.length + 2;
- } else {
- payload = null;
- }
-
- buffer = ArrayUtil.grow(buffer, requiredLength);
-
- output.reset(buffer);
-
- output.writeShort(analyzedLength);
-
- output.writeBytes(scratch.bytes(), 0, scratch.length());
-
- output.writeInt(encodeWeight(iterator.weight()));
-
- if (hasPayloads) {
- for(int i=0;i outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
- Builder> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
-
- // Build FST:
- BytesRefBuilder previousAnalyzed = null;
- BytesRefBuilder analyzed = new BytesRefBuilder();
- BytesRef surface = new BytesRef();
- IntsRefBuilder scratchInts = new IntsRefBuilder();
- ByteArrayDataInput input = new ByteArrayDataInput();
-
- // Used to remove duplicate surface forms (but we
- // still index the hightest-weight one). We clear
- // this when we see a new analyzed form, so it cannot
- // grow unbounded (at most 256 entries):
- Set seenSurfaceForms = new HashSet<>();
-
- int dedup = 0;
- while (reader.read(scratch)) {
- input.reset(scratch.bytes(), 0, scratch.length());
- short analyzedLength = input.readShort();
- analyzed.grow(analyzedLength+2);
- input.readBytes(analyzed.bytes(), 0, analyzedLength);
- analyzed.setLength(analyzedLength);
-
- long cost = input.readInt();
-
- surface.bytes = scratch.bytes();
- if (hasPayloads) {
- surface.length = input.readShort();
- surface.offset = input.getPosition();
- } else {
- surface.offset = input.getPosition();
- surface.length = scratch.length() - surface.offset;
- }
-
- if (previousAnalyzed == null) {
- previousAnalyzed = new BytesRefBuilder();
- previousAnalyzed.copyBytes(analyzed);
- seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
- } else if (analyzed.get().equals(previousAnalyzed.get())) {
- dedup++;
- if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
- // More than maxSurfaceFormsPerAnalyzedForm
- // dups: skip the rest:
- continue;
- }
- if (seenSurfaceForms.contains(surface)) {
- continue;
- }
- seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
- } else {
- dedup = 0;
- previousAnalyzed.copyBytes(analyzed);
- seenSurfaceForms.clear();
- seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
- }
-
- // TODO: I think we can avoid the extra 2 bytes when
- // there is no dup (dedup==0), but we'd have to fix
- // the exactFirst logic ... which would be sort of
- // hairy because we'd need to special case the two
- // (dup/not dup)...
-
- // NOTE: must be byte 0 so we sort before whatever
- // is next
- analyzed.append((byte) 0);
- analyzed.append((byte) dedup);
-
- Util.toIntsRef(analyzed.get(), scratchInts);
- //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
- if (!hasPayloads) {
- builder.add(scratchInts.get(), outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
- } else {
- int payloadOffset = input.getPosition() + surface.length;
- int payloadLength = scratch.length() - payloadOffset;
- BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
- System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
- br.bytes[surface.length] = (byte) payloadSep;
- System.arraycopy(scratch.bytes(), payloadOffset, br.bytes, surface.length+1, payloadLength);
- br.length = br.bytes.length;
- builder.add(scratchInts.get(), outputs.newPair(cost, br));
- }
- }
- fst = builder.finish();
-
- //PrintWriter pw = new PrintWriter("/tmp/out.dot");
- //Util.toDot(fst, pw, true, true);
- //pw.close();
-
- success = true;
- } finally {
- IOUtils.closeWhileHandlingException(reader, writer);
-
- if (success) {
- IOUtils.deleteFilesIfExist(tempInput, tempSorted);
- } else {
- IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
- }
- }
- }
-
- @Override
- public boolean store(OutputStream output) throws IOException {
- DataOutput dataOut = new OutputStreamDataOutput(output);
- try {
- if (fst == null) {
- return false;
- }
-
- fst.save(dataOut);
- dataOut.writeVInt(maxAnalyzedPathsForOneInput);
- dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
- } finally {
- IOUtils.close(output);
- }
- return true;
- }
-
- @Override
- public long getCount() {
- return count;
- }
-
- @Override
- public boolean load(InputStream input) throws IOException {
- DataInput dataIn = new InputStreamDataInput(input);
- try {
- this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
- maxAnalyzedPathsForOneInput = dataIn.readVInt();
- hasPayloads = dataIn.readByte() == 1;
- } finally {
- IOUtils.close(input);
- }
- return true;
- }
-
- private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
- LookupResult result;
- if (hasPayloads) {
- int sepIndex = -1;
- for(int i=0;i= output2.length) {
- return false;
- }
- for(int i=0;i lookup(final CharSequence key, Set contexts, boolean onlyMorePopular, int num) {
- assert num > 0;
-
- if (onlyMorePopular) {
- throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
- }
- if (fst == null) {
- return Collections.emptyList();
- }
-
- //System.out.println("lookup key=" + key + " num=" + num);
- for (int i = 0; i < key.length(); i++) {
- if (key.charAt(i) == holeCharacter) {
- throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved");
- }
- if (key.charAt(i) == sepLabel) {
- throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved");
- }
- }
- final BytesRef utf8Key = new BytesRef(key);
- try {
-
- Automaton lookupAutomaton = toLookupAutomaton(key);
-
- final CharsRefBuilder spare = new CharsRefBuilder();
-
- //System.out.println(" now intersect exactFirst=" + exactFirst);
-
- // Intersect automaton w/ suggest wFST and get all
- // prefix starting nodes & their outputs:
- //final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
-
- //System.out.println(" prefixPaths: " + prefixPaths.size());
-
- BytesReader bytesReader = fst.getBytesReader();
-
- FST.Arc> scratchArc = new FST.Arc<>();
-
- final List results = new ArrayList<>();
-
- List>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst);
-
- if (exactFirst) {
-
- int count = 0;
- for (FSTUtil.Path> path : prefixPaths) {
- if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
- // This node has END_BYTE arc leaving, meaning it's an
- // "exact" match:
- count++;
- }
- }
-
- // Searcher just to find the single exact only
- // match, if present:
- Util.TopNSearcher> searcher;
- searcher = new Util.TopNSearcher<>(fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator);
-
- // NOTE: we could almost get away with only using
- // the first start node. The only catch is if
- // maxSurfaceFormsPerAnalyzedForm had kicked in and
- // pruned our exact match from one of these nodes
- // ...:
- for (FSTUtil.Path> path : prefixPaths) {
- if (fst.findTargetArc(endByte, path.fstNode, scratchArc, bytesReader) != null) {
- // This node has END_BYTE arc leaving, meaning it's an
- // "exact" match:
- searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input);
- }
- }
-
- Util.TopResults> completions = searcher.search();
-
- // NOTE: this is rather inefficient: we enumerate
- // every matching "exactly the same analyzed form"
- // path, and then do linear scan to see if one of
- // these exactly matches the input. It should be
- // possible (though hairy) to do something similar
- // to getByOutput, since the surface form is encoded
- // into the FST output, so we more efficiently hone
- // in on the exact surface-form match. Still, I
- // suspect very little time is spent in this linear
- // seach: it's bounded by how many prefix start
- // nodes we have and the
- // maxSurfaceFormsPerAnalyzedForm:
- for(Result> completion : completions) {
- BytesRef output2 = completion.output.output2;
- if (sameSurfaceForm(utf8Key, output2)) {
- results.add(getLookupResult(completion.output.output1, output2, spare));
- break;
- }
- }
-
- if (results.size() == num) {
- // That was quick:
- return results;
- }
- }
-
- Util.TopNSearcher> searcher;
- searcher = new Util.TopNSearcher>(fst,
- num - results.size(),
- num * maxAnalyzedPathsForOneInput,
- weightComparator) {
- private final Set seen = new HashSet<>();
-
- @Override
- protected boolean acceptResult(IntsRef input, Pair output) {
-
- // Dedup: when the input analyzes to a graph we
- // can get duplicate surface forms:
- if (seen.contains(output.output2)) {
- return false;
- }
- seen.add(output.output2);
-
- if (!exactFirst) {
- return true;
- } else {
- // In exactFirst mode, don't accept any paths
- // matching the surface form since that will
- // create duplicate results:
- if (sameSurfaceForm(utf8Key, output.output2)) {
- // We found exact match, which means we should
- // have already found it in the first search:
- assert results.size() == 1;
- return false;
- } else {
- return true;
- }
- }
- }
- };
-
- prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
-
- for (FSTUtil.Path> path : prefixPaths) {
- searcher.addStartPaths(path.fstNode, path.output, true, path.input);
- }
-
- TopResults> completions = searcher.search();
-
- for(Result> completion : completions) {
-
- LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
-
- // TODO: for fuzzy case would be nice to return
- // how many edits were required
-
- //System.out.println(" result=" + result);
- results.add(result);
-
- if (results.size() == num) {
- // In the exactFirst=true case the search may
- // produce one extra path
- break;
- }
- }
-
- return results;
- } catch (IOException bogus) {
- throw new RuntimeException(bogus);
- }
- }
-
- @Override
- public boolean store(DataOutput output) throws IOException {
- output.writeVLong(count);
- if (fst == null) {
- return false;
- }
-
- fst.save(output);
- output.writeVInt(maxAnalyzedPathsForOneInput);
- output.writeByte((byte) (hasPayloads ? 1 : 0));
- return true;
- }
-
- @Override
- public boolean load(DataInput input) throws IOException {
- count = input.readVLong();
- this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
- maxAnalyzedPathsForOneInput = input.readVInt();
- hasPayloads = input.readByte() == 1;
- return true;
- }
-
- /** Returns all completion paths to initialize the search. */
- protected List>> getFullPrefixPaths(List>> prefixPaths,
- Automaton lookupAutomaton,
- FST> fst)
- throws IOException {
- return prefixPaths;
- }
-
- final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
- try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
- return toAutomaton(ts, ts2a);
- }
- }
-
- final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
- // Create corresponding automaton: labels are bytes
- // from each analyzed token, with byte 0 used as
- // separator between tokens:
- Automaton automaton = ts2a.toAutomaton(ts);
-
- automaton = replaceSep(automaton);
- automaton = convertAutomaton(automaton);
-
- // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
- // assert SpecialOperations.isFinite(automaton);
-
- // Get all paths from the automaton (there can be
- // more than one path, eg if the analyzer created a
- // graph using SynFilter or WDF):
-
- return automaton;
- }
-
- // EDIT: Adrien, needed by lookup providers
- // NOTE: these XForks are unmaintainable, we need to get rid of them...
- public Set toFiniteStrings(TokenStream stream) throws IOException {
- final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
- Automaton automaton;
- try (TokenStream ts = stream) {
- automaton = toAutomaton(ts, ts2a);
- }
- LimitedFiniteStringsIterator finiteStrings =
- new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
- Set set = new HashSet<>();
- for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
- set.add(IntsRef.deepCopyOf(string));
- }
- return Collections.unmodifiableSet(set);
- }
-
- final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
- // TODO: is there a Reader from a CharSequence?
- // Turn tokenstream into automaton:
- Automaton automaton = null;
-
- try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
- automaton = getTokenStreamToAutomaton().toAutomaton(ts);
- }
-
- automaton = replaceSep(automaton);
-
- // TODO: we can optimize this somewhat by determinizing
- // while we convert
-
- // This automaton should not blow up during determinize:
- automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
- return automaton;
- }
-
-
-
- /**
- * Returns the weight associated with an input string,
- * or null if it does not exist.
- */
- public Object get(CharSequence key) {
- throw new UnsupportedOperationException();
- }
-
- /** cost -> weight */
- public static int decodeWeight(long encoded) {
- return (int)(Integer.MAX_VALUE - encoded);
- }
-
- /** weight -> cost */
- public static int encodeWeight(long value) {
- if (value < 0 || value > Integer.MAX_VALUE) {
- throw new UnsupportedOperationException("cannot encode value: " + value);
- }
- return Integer.MAX_VALUE - (int)value;
- }
-
- static final Comparator> weightComparator = new Comparator> () {
- @Override
- public int compare(Pair left, Pair right) {
- return left.output1.compareTo(right.output1);
- }
- };
-
-
- public static class XBuilder {
- private Builder> builder;
- private int maxSurfaceFormsPerAnalyzedForm;
- private IntsRefBuilder scratchInts = new IntsRefBuilder();
- private final PairOutputs outputs;
- private boolean hasPayloads;
- private BytesRefBuilder analyzed = new BytesRefBuilder();
- private final SurfaceFormAndPayload[] surfaceFormsAndPayload;
- private int count;
- private ObjectIntHashMap seenSurfaceForms = HppcMaps.Object.Integer.ensureNoNullKeys(256, 0.75f);
- private int payloadSep;
-
- public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) {
- this.payloadSep = payloadSep;
- this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
- this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
- this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
- this.hasPayloads = hasPayloads;
- surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm];
-
- }
- public void startTerm(BytesRef analyzed) {
- this.analyzed.grow(analyzed.length+2);
- this.analyzed.copyBytes(analyzed);
- }
-
- private final static class SurfaceFormAndPayload implements Comparable {
- BytesRef payload;
- long weight;
-
- public SurfaceFormAndPayload(BytesRef payload, long cost) {
- super();
- this.payload = payload;
- this.weight = cost;
- }
-
- @Override
- public int compareTo(SurfaceFormAndPayload o) {
- int res = compare(weight, o.weight);
- if (res == 0 ){
- return payload.compareTo(o.payload);
- }
- return res;
- }
- public static int compare(long x, long y) {
- return (x < y) ? -1 : ((x == y) ? 0 : 1);
- }
- }
-
- public void addSurface(BytesRef surface, BytesRef payload, long cost) throws IOException {
- int surfaceIndex = -1;
- long encodedWeight = cost == -1 ? cost : encodeWeight(cost);
- /*
- * we need to check if we have seen this surface form, if so only use the
- * the surface form with the highest weight and drop the rest no matter if
- * the payload differs.
- */
- if (count >= maxSurfaceFormsPerAnalyzedForm) {
- // More than maxSurfaceFormsPerAnalyzedForm
- // dups: skip the rest:
- return;
- }
-
- BytesRef surfaceCopy;
- final int keySlot;
- if (count > 0 && (keySlot = seenSurfaceForms.indexOf(surface)) >= 0) {
- surfaceIndex = seenSurfaceForms.indexGet(keySlot);
- SurfaceFormAndPayload surfaceFormAndPayload = surfaceFormsAndPayload[surfaceIndex];
- if (encodedWeight >= surfaceFormAndPayload.weight) {
- return;
- }
- surfaceCopy = BytesRef.deepCopyOf(surface);
- } else {
- surfaceIndex = count++;
- surfaceCopy = BytesRef.deepCopyOf(surface);
- seenSurfaceForms.put(surfaceCopy, surfaceIndex);
- }
-
- BytesRef payloadRef;
- if (!hasPayloads) {
- payloadRef = surfaceCopy;
- } else {
- int len = surface.length + 1 + payload.length;
- final BytesRef br = new BytesRef(len);
- System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
- br.bytes[surface.length] = (byte) payloadSep;
- System.arraycopy(payload.bytes, payload.offset, br.bytes, surface.length + 1, payload.length);
- br.length = len;
- payloadRef = br;
- }
- if (surfaceFormsAndPayload[surfaceIndex] == null) {
- surfaceFormsAndPayload[surfaceIndex] = new SurfaceFormAndPayload(payloadRef, encodedWeight);
- } else {
- surfaceFormsAndPayload[surfaceIndex].payload = payloadRef;
- surfaceFormsAndPayload[surfaceIndex].weight = encodedWeight;
- }
- }
-
- public void finishTerm(long defaultWeight) throws IOException {
- ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
- int deduplicator = 0;
- analyzed.append((byte) 0);
- analyzed.setLength(analyzed.length() + 1);
- analyzed.grow(analyzed.length());
- for (int i = 0; i < count; i++) {
- analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
- Util.toIntsRef(analyzed.get(), scratchInts);
- SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
- long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
- builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
- }
- seenSurfaceForms.clear();
- count = 0;
- }
-
- public FST> build() throws IOException {
- return builder.finish();
- }
-
- public boolean hasPayloads() {
- return hasPayloads;
- }
-
- public int maxSurfaceFormsPerAnalyzedForm() {
- return maxSurfaceFormsPerAnalyzedForm;
- }
-
- }
-}
diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
deleted file mode 100644
index a4338f8a65aaa..0000000000000
--- a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.lucene.search.suggest.analyzing;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.*;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES;
-
-/**
- * Implements a fuzzy {@link AnalyzingSuggester}. The similarity measurement is
- * based on the Damerau-Levenshtein (optimal string alignment) algorithm, though
- * you can explicitly choose classic Levenshtein by passing false
- * for the transpositions parameter.
- *
- * At most, this query will match terms up to
- * {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}
- * edits. Higher distances are not supported. Note that the
- * fuzzy distance is measured in "byte space" on the bytes
- * returned by the {@link org.apache.lucene.analysis.TokenStream}'s {@link
- * org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute}, usually UTF8. By default
- * the analyzed bytes must be at least 3 {@link
- * #DEFAULT_MIN_FUZZY_LENGTH} bytes before any edits are
- * considered. Furthermore, the first 1 {@link
- * #DEFAULT_NON_FUZZY_PREFIX} byte is not allowed to be
- * edited. We allow up to 1 (@link
- * #DEFAULT_MAX_EDITS} edit.
- * If {@link #unicodeAware} parameter in the constructor is set to true, maxEdits,
- * minFuzzyLength, transpositions and nonFuzzyPrefix are measured in Unicode code
- * points (actual letters) instead of bytes.*
- *
- *
- * NOTE: This suggester does not boost suggestions that
- * required no edits over suggestions that did require
- * edits. This is a known limitation.
- *
- *
- * Note: complex query analyzers can have a significant impact on the lookup
- * performance. It's recommended to not use analyzers that drop or inject terms
- * like synonyms to keep the complexity of the prefix intersection low for good
- * lookup performance. At index time, complex analyzers can safely be used.
- *
- *
- * @lucene.experimental
- */
-public final class XFuzzySuggester extends XAnalyzingSuggester {
- private final int maxEdits;
- private final boolean transpositions;
- private final int nonFuzzyPrefix;
- private final int minFuzzyLength;
- private final boolean unicodeAware;
-
- /**
- * Measure maxEdits, minFuzzyLength, transpositions and nonFuzzyPrefix
- * parameters in Unicode code points (actual letters)
- * instead of bytes.
- */
- public static final boolean DEFAULT_UNICODE_AWARE = false;
-
- /**
- * The default minimum length of the key passed to {@link
- * #lookup} before any edits are allowed.
- */
- public static final int DEFAULT_MIN_FUZZY_LENGTH = 3;
-
- /**
- * The default prefix length where edits are not allowed.
- */
- public static final int DEFAULT_NON_FUZZY_PREFIX = 1;
-
- /**
- * The default maximum number of edits for fuzzy
- * suggestions.
- */
- public static final int DEFAULT_MAX_EDITS = 1;
-
- /**
- * The default transposition value passed to {@link org.apache.lucene.util.automaton.LevenshteinAutomata}
- */
- public static final boolean DEFAULT_TRANSPOSITIONS = true;
-
- /**
- * Creates a {@link FuzzySuggester} instance initialized with default values.
- *
- * @param analyzer the analyzer used for this suggester
- */
- public XFuzzySuggester(Analyzer analyzer) {
- this(analyzer, analyzer);
- }
-
- /**
- * Creates a {@link FuzzySuggester} instance with an index & a query analyzer initialized with default values.
- *
- * @param indexAnalyzer
- * Analyzer that will be used for analyzing suggestions while building the index.
- * @param queryAnalyzer
- * Analyzer that will be used for analyzing query text during lookup
- */
- public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
- this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
- DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
-
- }
-
- /**
- * Creates a {@link FuzzySuggester} instance.
- *
- * @param indexAnalyzer Analyzer that will be used for
- * analyzing suggestions while building the index.
- * @param queryAnalyzer Analyzer that will be used for
- * analyzing query text during lookup
- * @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
- * @param maxSurfaceFormsPerAnalyzedForm Maximum number of
- * surface forms to keep for a single analyzed form.
- * When there are too many surface forms we discard the
- * lowest weighted ones.
- * @param maxGraphExpansions Maximum number of graph paths
- * to expand from the analyzed form. Set this to -1 for
- * no limit.
- * @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
- * @param transpositions true if transpositions should be treated as a primitive
- * edit operation. If this is false, comparisons will implement the classic
- * Levenshtein algorithm.
- * @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
- * @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
- * @param sepLabel separation label
- * @param payloadSep payload separator byte
- * @param endByte end byte marker byte
- */
- public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
- int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware,
- FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
- int sepLabel, int payloadSep, int endByte, int holeCharacter) {
- super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
- if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
- throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
- }
- if (nonFuzzyPrefix < 0) {
- throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
- }
- if (minFuzzyLength < 0) {
- throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
- }
-
- this.maxEdits = maxEdits;
- this.transpositions = transpositions;
- this.nonFuzzyPrefix = nonFuzzyPrefix;
- this.minFuzzyLength = minFuzzyLength;
- this.unicodeAware = unicodeAware;
- }
-
- @Override
- protected List>> getFullPrefixPaths(List>> prefixPaths,
- Automaton lookupAutomaton,
- FST> fst)
- throws IOException {
-
- // TODO: right now there's no penalty for fuzzy/edits,
- // ie a completion whose prefix matched exactly what the
- // user typed gets no boost over completions that
- // required an edit, which get no boost over completions
- // requiring two edits. I suspect a multiplicative
- // factor is appropriate (eg, say a fuzzy match must be at
- // least 2X better weight than the non-fuzzy match to
- // "compete") ... in which case I think the wFST needs
- // to be log weights or something ...
-
- Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
- /*
- Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
- w.write(levA.toDot());
- w.close();
- System.out.println("Wrote LevA to out.dot");
- */
- return FSTUtil.intersectPrefixPaths(levA, fst);
- }
-
- @Override
- protected Automaton convertAutomaton(Automaton a) {
- if (unicodeAware) {
- // FLORIAN EDIT: get converted Automaton from superclass
- Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a));
- // This automaton should not blow up during determinize:
- utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE);
- return utf8automaton;
- } else {
- return super.convertAutomaton(a);
- }
- }
-
- @Override
- public TokenStreamToAutomaton getTokenStreamToAutomaton() {
- final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton();
- tsta.setUnicodeArcs(unicodeAware);
- return tsta;
- }
-
- Automaton toLevenshteinAutomata(Automaton automaton) {
- List subs = new ArrayList<>();
- FiniteStringsIterator finiteStrings = new FiniteStringsIterator(automaton);
- for (IntsRef string; (string = finiteStrings.next()) != null;) {
- if (string.length <= nonFuzzyPrefix || string.length < minFuzzyLength) {
- subs.add(Automata.makeString(string.ints, string.offset, string.length));
- } else {
- int ints[] = new int[string.length-nonFuzzyPrefix];
- System.arraycopy(string.ints, string.offset+nonFuzzyPrefix, ints, 0, ints.length);
- // TODO: maybe add alphaMin to LevenshteinAutomata,
- // and pass 1 instead of 0? We probably don't want
- // to allow the trailing dedup bytes to be
- // edited... but then 0 byte is "in general" allowed
- // on input (but not in UTF8).
- LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
- subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
- }
- }
-
- if (subs.isEmpty()) {
- // automaton is empty, there is no accepted paths through it
- return Automata.makeEmpty(); // matches nothing
- } else if (subs.size() == 1) {
- // no synonyms or anything: just a single path through the tokenstream
- return subs.get(0);
- } else {
- // multiple paths: this is really scary! is it slow?
- // maybe we should not do this and throw UOE?
- Automaton a = Operations.union(subs);
- // TODO: we could call toLevenshteinAutomata() before det?
- // this only happens if you have multiple paths anyway (e.g. synonyms)
- return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
- }
- }
-}
diff --git a/core/src/main/java/org/elasticsearch/action/suggest/TransportSuggestAction.java b/core/src/main/java/org/elasticsearch/action/suggest/TransportSuggestAction.java
index 539ae2153ab02..6bc62cf83bb0f 100644
--- a/core/src/main/java/org/elasticsearch/action/suggest/TransportSuggestAction.java
+++ b/core/src/main/java/org/elasticsearch/action/suggest/TransportSuggestAction.java
@@ -142,7 +142,8 @@ protected ShardSuggestResponse shardOperation(ShardSuggestRequest request) {
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
throw new IllegalArgumentException("suggest content missing");
}
- final SuggestionSearchContext context = suggestPhase.parseElement().parseInternal(parser, indexService.mapperService(), request.shardId().getIndex(), request.shardId().id(), request);
+ final SuggestionSearchContext context = suggestPhase.parseElement().parseInternal(parser, indexService.mapperService(),
+ indexService.fieldData(), request.shardId().getIndex(), request.shardId().id(), request);
final Suggest result = suggestPhase.execute(context, searcher.searcher());
return new ShardSuggestResponse(request.shardId(), result);
}
diff --git a/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java b/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java
index 95def1161c401..e5ba66300f39e 100644
--- a/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java
+++ b/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java
@@ -214,8 +214,16 @@ public void declareIntArray(BiConsumer> consumer, ParseFiel
private final List parseArray(XContentParser parser, IOSupplier supplier) throws IOException {
List list = new ArrayList<>();
- while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
- list.add(supplier.get());
+ if (parser.currentToken().isValue()) {
+ list.add(supplier.get()); // single value
+ } else {
+ while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
+ if (parser.currentToken().isValue()) {
+ list.add(supplier.get());
+ } else {
+ throw new IllegalStateException("expected value but got [" + parser.currentToken() + "]");
+ }
+ }
}
return list;
}
@@ -224,6 +232,19 @@ public void declareObject(BiConsumer consumer, BiFunction consumer.accept(v, objectParser.apply(p, c)), field, ValueType.OBJECT);
}
+ public void declareObjectOrDefault(BiConsumer consumer, BiFunction objectParser, Supplier defaultValue, ParseField field) {
+ declareField((p, v, c) -> {
+ if (p.currentToken() == XContentParser.Token.VALUE_BOOLEAN) {
+ if (p.booleanValue()) {
+ consumer.accept(v, defaultValue.get());
+ }
+ } else {
+ consumer.accept(v, objectParser.apply(p, c));
+ }
+ }, field, ValueType.OBJECT_OR_BOOLEAN);
+ }
+
+
public void declareFloat(BiConsumer consumer, ParseField field) {
declareField((p, v, c) -> consumer.accept(v, p.floatValue()), field, ValueType.FLOAT);
}
@@ -240,6 +261,10 @@ public void declareInt(BiConsumer consumer, ParseField field) {
declareField((p, v, c) -> consumer.accept(v, p.intValue()), field, ValueType.INT);
}
+ public void declareValue(BiConsumer consumer, ParseField field) {
+ declareField((p, v, c) -> consumer.accept(v, p), field, ValueType.VALUE);
+ }
+
public void declareString(BiConsumer consumer, ParseField field) {
declareField((p, v, c) -> consumer.accept(v, p.text()), field, ValueType.STRING);
}
@@ -296,13 +321,15 @@ public enum ValueType {
DOUBLE(EnumSet.of(XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
LONG(EnumSet.of(XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
INT(EnumSet.of(XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
- BOOLEAN(EnumSet.of(XContentParser.Token.VALUE_BOOLEAN)), STRING_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- FLOAT_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- DOUBLE_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- LONG_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- INT_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- BOOLEAN_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY)),
- OBJECT(EnumSet.of(XContentParser.Token.START_OBJECT));
+ BOOLEAN(EnumSet.of(XContentParser.Token.VALUE_BOOLEAN)), STRING_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_STRING)),
+ FLOAT_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
+ DOUBLE_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
+ LONG_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
+ INT_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_NUMBER, XContentParser.Token.VALUE_STRING)),
+ BOOLEAN_ARRAY(EnumSet.of(XContentParser.Token.START_ARRAY, XContentParser.Token.VALUE_BOOLEAN)),
+ OBJECT(EnumSet.of(XContentParser.Token.START_OBJECT)),
+ OBJECT_OR_BOOLEAN(EnumSet.of(XContentParser.Token.START_OBJECT, XContentParser.Token.VALUE_BOOLEAN)),
+ VALUE(EnumSet.of(XContentParser.Token.VALUE_BOOLEAN, XContentParser.Token.VALUE_NULL ,XContentParser.Token.VALUE_EMBEDDED_OBJECT,XContentParser.Token.VALUE_NUMBER,XContentParser.Token.VALUE_STRING));
private final EnumSet tokens;
diff --git a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
index b504c4c21c52d..2c23f9474752c 100644
--- a/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
+++ b/core/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java
@@ -58,10 +58,7 @@ public PostingsFormat getPostingsFormatForField(String field) {
if (indexName == null) {
logger.warn("no index mapper found for field: [{}] returning default postings format", field);
} else if (indexName instanceof CompletionFieldMapper.CompletionFieldType) {
- // CompletionFieldMapper needs a special postings format
- final CompletionFieldMapper.CompletionFieldType fieldType = (CompletionFieldMapper.CompletionFieldType) indexName;
- final PostingsFormat defaultFormat = super.getPostingsFormatForField(field);
- return fieldType.postingsFormat(defaultFormat);
+ return CompletionFieldMapper.CompletionFieldType.postingsFormat();
}
return super.getPostingsFormatForField(field);
}
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
index 3a3a854915111..7f793ab6161ab 100644
--- a/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
@@ -18,144 +18,91 @@
*/
package org.elasticsearch.index.mapper.core;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Field;
-import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
+import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
+import org.apache.lucene.search.suggest.document.CompletionQuery;
+import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
+import org.apache.lucene.search.suggest.document.PrefixCompletionQuery;
+import org.apache.lucene.search.suggest.document.RegexCompletionQuery;
+import org.apache.lucene.search.suggest.document.SuggestField;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.NumberType;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.mapper.FieldMapper;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.Mapper;
-import org.elasticsearch.index.mapper.MapperException;
-import org.elasticsearch.index.mapper.MapperParsingException;
-import org.elasticsearch.index.mapper.MergeMappingException;
-import org.elasticsearch.index.mapper.MergeResult;
-import org.elasticsearch.index.mapper.ParseContext;
-import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
-import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
-import org.elasticsearch.search.suggest.context.ContextBuilder;
-import org.elasticsearch.search.suggest.context.ContextMapping;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig;
+import org.elasticsearch.index.mapper.*;
+import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
+import org.elasticsearch.search.suggest.completion.CompletionSuggester;
+import org.elasticsearch.search.suggest.completion.context.ContextMapping;
+import org.elasticsearch.search.suggest.completion.context.ContextMappings;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.*;
import static org.elasticsearch.index.mapper.MapperBuilders.completionField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
/**
+ * Mapper for completion field. The field values are indexed as a weighted FST for
+ * fast auto-completion/search-as-you-type functionality.
*
+ * Type properties:
+ *
+ *
"analyzer": "simple", (default)
+ *
"search_analyzer": "simple", (default)
+ *
"preserve_separators" : true, (default)
+ *
"preserve_position_increments" : true (default)
+ *
"min_input_length": 50 (default)
+ *
"contexts" : CONTEXTS
+ *
+ * see {@link ContextMappings#load(Object, Version)} for CONTEXTS
+ * see {@link #parse(ParseContext)} for acceptable inputs for indexing
+ *
+ * This field type constructs completion queries that are run
+ * against the weighted FST index by the {@link CompletionSuggester}.
+ * This field can also be extended to add search criteria to suggestions
+ * for query-time filtering and boosting (see {@link ContextMappings}
*/
-public class CompletionFieldMapper extends FieldMapper {
+public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapperParser {
public static final String CONTENT_TYPE = "completion";
public static class Defaults {
- public static final CompletionFieldType FIELD_TYPE = new CompletionFieldType();
-
+ public static final MappedFieldType FIELD_TYPE = new CompletionFieldType();
static {
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
-
public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
public static final boolean DEFAULT_POSITION_INCREMENTS = true;
- public static final boolean DEFAULT_HAS_PAYLOADS = false;
public static final int DEFAULT_MAX_INPUT_LENGTH = 50;
}
public static class Fields {
// Mapping field names
- public static final String ANALYZER = "analyzer";
+ public static final ParseField ANALYZER = new ParseField("analyzer");
public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer");
public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators");
public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments");
- public static final String PAYLOADS = "payloads";
- public static final String TYPE = "type";
+ public static final ParseField TYPE = new ParseField("type");
+ public static final ParseField CONTEXTS = new ParseField("contexts");
public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len");
// Content field names
public static final String CONTENT_FIELD_NAME_INPUT = "input";
- public static final String CONTENT_FIELD_NAME_OUTPUT = "output";
- public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload";
public static final String CONTENT_FIELD_NAME_WEIGHT = "weight";
- public static final String CONTEXT = "context";
+ public static final String CONTENT_FIELD_NAME_CONTEXTS = "contexts";
}
public static final Set ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT,
- Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTEXT);
-
- public static class Builder extends FieldMapper.Builder {
-
- private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
- private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
- private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
- private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
- private SortedMap contextMapping = ContextMapping.EMPTY_MAPPING;
-
- public Builder(String name) {
- super(name, Defaults.FIELD_TYPE);
- builder = this;
- }
-
- public Builder payloads(boolean payloads) {
- this.payloads = payloads;
- return this;
- }
-
- public Builder preserveSeparators(boolean preserveSeparators) {
- this.preserveSeparators = preserveSeparators;
- return this;
- }
-
- public Builder preservePositionIncrements(boolean preservePositionIncrements) {
- this.preservePositionIncrements = preservePositionIncrements;
- return this;
- }
-
- public Builder maxInputLength(int maxInputLength) {
- if (maxInputLength <= 0) {
- throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
- }
- this.maxInputLength = maxInputLength;
- return this;
- }
-
- public Builder contextMapping(SortedMap contextMapping) {
- this.contextMapping = contextMapping;
- return this;
- }
-
- @Override
- public CompletionFieldMapper build(Mapper.BuilderContext context) {
- setupFieldType(context);
- CompletionFieldType completionFieldType = (CompletionFieldType)fieldType;
- completionFieldType.setProvider(new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads));
- completionFieldType.setContextMapping(contextMapping);
- return new CompletionFieldMapper(name, fieldType, maxInputLength, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
- }
-
- }
+ Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTENT_FIELD_NAME_CONTEXTS);
public static class TypeParser implements Mapper.TypeParser {
@@ -171,17 +118,12 @@ public static class TypeParser implements Mapper.TypeParser {
if (fieldName.equals("type")) {
continue;
}
- if (Fields.ANALYZER.equals(fieldName) || // index_analyzer is for backcompat, remove for v3.0
- fieldName.equals("index_analyzer") && parserContext.indexVersionCreated().before(Version.V_2_0_0_beta1)) {
-
+ if (parserContext.parseFieldMatcher().match(fieldName, Fields.ANALYZER)) {
indexAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
iterator.remove();
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.SEARCH_ANALYZER)) {
searchAnalyzer = getNamedAnalyzer(parserContext, fieldNode.toString());
iterator.remove();
- } else if (fieldName.equals(Fields.PAYLOADS)) {
- builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
- iterator.remove();
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.PRESERVE_SEPARATORS)) {
builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
iterator.remove();
@@ -191,14 +133,14 @@ public static class TypeParser implements Mapper.TypeParser {
} else if (parserContext.parseFieldMatcher().match(fieldName, Fields.MAX_INPUT_LENGTH)) {
builder.maxInputLength(Integer.parseInt(fieldNode.toString()));
iterator.remove();
- } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) {
+ } else if (parserContext.parseFieldMatcher().match(fieldName, Fields.CONTEXTS)) {
+ builder.contextMappings(ContextMappings.load(fieldNode, parserContext.indexVersionCreated()));
iterator.remove();
- } else if (fieldName.equals(Fields.CONTEXT)) {
- builder.contextMapping(ContextBuilder.loadMappings(fieldNode, parserContext.indexVersionCreated()));
+ } else if (parseMultiField(builder, name, parserContext, fieldName, fieldNode)) {
iterator.remove();
}
}
-
+
if (indexAnalyzer == null) {
if (searchAnalyzer != null) {
throw new MapperParsingException("analyzer on completion field [" + name + "] must be set when search_analyzer is set");
@@ -207,9 +149,9 @@ public static class TypeParser implements Mapper.TypeParser {
} else if (searchAnalyzer == null) {
searchAnalyzer = indexAnalyzer;
}
+
builder.indexAnalyzer(indexAnalyzer);
builder.searchAnalyzer(searchAnalyzer);
-
return builder;
}
@@ -223,40 +165,138 @@ private NamedAnalyzer getNamedAnalyzer(ParserContext parserContext, String name)
}
public static final class CompletionFieldType extends MappedFieldType {
- private PostingsFormat postingsFormat;
- private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
- private SortedMap contextMapping = ContextMapping.EMPTY_MAPPING;
+
+ private static PostingsFormat postingsFormat;
+
+ private boolean preserveSep = Defaults.DEFAULT_PRESERVE_SEPARATORS;
+ private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
+ private ContextMappings contextMappings = null;
public CompletionFieldType() {
setFieldDataType(null);
}
- protected CompletionFieldType(CompletionFieldType ref) {
+ private CompletionFieldType(CompletionFieldType ref) {
super(ref);
- this.postingsFormat = ref.postingsFormat;
- this.analyzingSuggestLookupProvider = ref.analyzingSuggestLookupProvider;
- this.contextMapping = ref.contextMapping;
+ this.contextMappings = ref.contextMappings;
+ this.preserveSep = ref.preserveSep;
+ this.preservePositionIncrements = ref.preservePositionIncrements;
+ }
+
+ public void setPreserveSep(boolean preserveSep) {
+ checkIfFrozen();
+ this.preserveSep = preserveSep;
+ }
+
+ public void setPreservePositionIncrements(boolean preservePositionIncrements) {
+ checkIfFrozen();
+ this.preservePositionIncrements = preservePositionIncrements;
+ }
+
+ public void setContextMappings(ContextMappings contextMappings) {
+ checkIfFrozen();
+ this.contextMappings = contextMappings;
+ }
+
+ @Override
+ public NamedAnalyzer indexAnalyzer() {
+ final NamedAnalyzer indexAnalyzer = super.indexAnalyzer();
+ if (indexAnalyzer != null && !(indexAnalyzer.analyzer() instanceof CompletionAnalyzer)) {
+ return new NamedAnalyzer(indexAnalyzer.name(),
+ new CompletionAnalyzer(indexAnalyzer, preserveSep, preservePositionIncrements));
+
+ }
+ return indexAnalyzer;
+ }
+
+ @Override
+ public NamedAnalyzer searchAnalyzer() {
+ final NamedAnalyzer searchAnalyzer = super.searchAnalyzer();
+ if (searchAnalyzer != null && !(searchAnalyzer.analyzer() instanceof CompletionAnalyzer)) {
+ return new NamedAnalyzer(searchAnalyzer.name(),
+ new CompletionAnalyzer(searchAnalyzer, preserveSep, preservePositionIncrements));
+ }
+ return searchAnalyzer;
+ }
+
+ /**
+ * @return true if there are one or more context mappings defined
+ * for this field type
+ */
+ public boolean hasContextMappings() {
+ return contextMappings != null;
+ }
+
+ /**
+ * @return associated context mappings for this field type
+ */
+ public ContextMappings getContextMappings() {
+ return contextMappings;
+ }
+
+ public boolean preserveSep() {
+ return preserveSep;
+ }
+
+ public boolean preservePositionIncrements() {
+ return preservePositionIncrements;
+ }
+
+ /**
+ * @return postings format to use for this field-type
+ */
+ public static synchronized PostingsFormat postingsFormat() {
+ if (postingsFormat == null) {
+ postingsFormat = new Completion50PostingsFormat();
+ }
+ return postingsFormat;
+ }
+
+ /**
+ * Completion prefix query
+ */
+ public CompletionQuery prefixQuery(Object value) {
+ return new PrefixCompletionQuery(searchAnalyzer().analyzer(), createTerm(value));
+ }
+
+ /**
+ * Completion prefix regular expression query
+ */
+ public CompletionQuery regexpQuery(Object value, int flags, int maxDeterminizedStates) {
+ return new RegexCompletionQuery(createTerm(value), flags, maxDeterminizedStates);
+ }
+
+ /**
+ * Completion prefix fuzzy query
+ */
+ public CompletionQuery fuzzyQuery(String value, Fuzziness fuzziness, int nonFuzzyPrefixLength,
+ int minFuzzyPrefixLength, int maxExpansions, boolean transpositions,
+ boolean unicodeAware) {
+ return new FuzzyCompletionQuery(searchAnalyzer().analyzer(), createTerm(value), null,
+ fuzziness.asDistance(), transpositions, nonFuzzyPrefixLength, minFuzzyPrefixLength,
+ unicodeAware, maxExpansions);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
- if (!(o instanceof CompletionFieldType)) return false;
+ if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
- CompletionFieldType fieldType = (CompletionFieldType) o;
- return analyzingSuggestLookupProvider.getPreserveSep() == fieldType.analyzingSuggestLookupProvider.getPreserveSep() &&
- analyzingSuggestLookupProvider.getPreservePositionsIncrements() == fieldType.analyzingSuggestLookupProvider.getPreservePositionsIncrements() &&
- analyzingSuggestLookupProvider.hasPayloads() == fieldType.analyzingSuggestLookupProvider.hasPayloads() &&
- Objects.equals(getContextMapping(), fieldType.getContextMapping());
+
+ CompletionFieldType that = (CompletionFieldType) o;
+
+ if (preserveSep != that.preserveSep) return false;
+ if (preservePositionIncrements != that.preservePositionIncrements) return false;
+ return !(contextMappings != null ? !contextMappings.equals(that.contextMappings) : that.contextMappings != null);
+
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(),
- analyzingSuggestLookupProvider.getPreserveSep(),
- analyzingSuggestLookupProvider.getPreservePositionsIncrements(),
- analyzingSuggestLookupProvider.hasPayloads(),
- getContextMapping());
+ preserveSep,
+ preservePositionIncrements,
+ contextMappings);
}
@Override
@@ -273,69 +313,99 @@ public String typeName() {
public void checkCompatibility(MappedFieldType fieldType, List conflicts, boolean strict) {
super.checkCompatibility(fieldType, conflicts, strict);
CompletionFieldType other = (CompletionFieldType)fieldType;
- if (analyzingSuggestLookupProvider.hasPayloads() != other.analyzingSuggestLookupProvider.hasPayloads()) {
- conflicts.add("mapper [" + names().fullName() + "] has different [payload] values");
- }
- if (analyzingSuggestLookupProvider.getPreservePositionsIncrements() != other.analyzingSuggestLookupProvider.getPreservePositionsIncrements()) {
+
+ if (preservePositionIncrements != other.preservePositionIncrements) {
conflicts.add("mapper [" + names().fullName() + "] has different [preserve_position_increments] values");
}
- if (analyzingSuggestLookupProvider.getPreserveSep() != other.analyzingSuggestLookupProvider.getPreserveSep()) {
+ if (preserveSep != other.preserveSep) {
conflicts.add("mapper [" + names().fullName() + "] has different [preserve_separators] values");
}
- if(!ContextMapping.mappingsAreEqual(getContextMapping(), other.getContextMapping())) {
- conflicts.add("mapper [" + names().fullName() + "] has different [context_mapping] values");
+ if (hasContextMappings() != other.hasContextMappings()) {
+ conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values");
+ } else if (hasContextMappings() && contextMappings.equals(other.contextMappings) == false) {
+ conflicts.add("mapper [" + names().fullName() + "] has different [context_mappings] values");
}
}
- public void setProvider(AnalyzingCompletionLookupProvider provider) {
- checkIfFrozen();
- this.analyzingSuggestLookupProvider = provider;
+ @Override
+ public String value(Object value) {
+ if (value == null) {
+ return null;
+ }
+ return value.toString();
}
- public synchronized PostingsFormat postingsFormat(PostingsFormat in) {
- if (in instanceof Completion090PostingsFormat) {
- throw new IllegalStateException("Double wrapping of " + Completion090PostingsFormat.class);
- }
- if (postingsFormat == null) {
- postingsFormat = new Completion090PostingsFormat(in, analyzingSuggestLookupProvider);
- }
- return postingsFormat;
+ @Override
+ public boolean isSortable() {
+ return false;
}
- public void setContextMapping(SortedMap contextMapping) {
- checkIfFrozen();
- this.contextMapping = contextMapping;
+ }
+
+ /**
+ * Builder for {@link CompletionFieldMapper}
+ */
+ public static class Builder extends FieldMapper.Builder {
+
+ private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH;
+ private ContextMappings contextMappings = null;
+ private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
+ private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
+
+ /**
+ * @param name of the completion field to build
+ */
+ public Builder(String name) {
+ super(name, new CompletionFieldType());
+ builder = this;
}
- /** Get the context mapping associated with this completion field */
- public SortedMap getContextMapping() {
- return contextMapping;
+ /**
+ * @param maxInputLength maximum expected prefix length
+ * NOTE: prefixes longer than this will
+ * be truncated
+ */
+ public Builder maxInputLength(int maxInputLength) {
+ if (maxInputLength <= 0) {
+ throw new IllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]");
+ }
+ this.maxInputLength = maxInputLength;
+ return this;
}
- /** @return true if a context mapping has been defined */
- public boolean requiresContext() {
- return contextMapping.isEmpty() == false;
+ /**
+ * Add context mapping to this field
+ * @param contextMappings see {@link ContextMappings#load(Object, Version)}
+ */
+ public Builder contextMappings(ContextMappings contextMappings) {
+ this.contextMappings = contextMappings;
+ return this;
}
- @Override
- public String value(Object value) {
- if (value == null) {
- return null;
- }
- return value.toString();
+ public Builder preserveSeparators(boolean preserveSeparators) {
+ this.preserveSeparators = preserveSeparators;
+ return this;
+ }
+
+ public Builder preservePositionIncrements(boolean preservePositionIncrements) {
+ this.preservePositionIncrements = preservePositionIncrements;
+ return this;
}
@Override
- public boolean isSortable() {
- return false;
+ public CompletionFieldMapper build(BuilderContext context) {
+ setupFieldType(context);
+ CompletionFieldType completionFieldType = (CompletionFieldType) this.fieldType;
+ completionFieldType.setContextMappings(contextMappings);
+ completionFieldType.setPreservePositionIncrements(preservePositionIncrements);
+ completionFieldType.setPreserveSep(preserveSeparators);
+ return new CompletionFieldMapper(name, this.fieldType, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, maxInputLength);
}
}
- private static final BytesRef EMPTY = new BytesRef();
-
private int maxInputLength;
- public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, int maxInputLength, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
+ public CompletionFieldMapper(String simpleName, MappedFieldType fieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, int maxInputLength) {
super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, multiFields, copyTo);
this.maxInputLength = maxInputLength;
}
@@ -345,216 +415,188 @@ public CompletionFieldType fieldType() {
return (CompletionFieldType) super.fieldType();
}
+ /**
+ * Parses and indexes inputs
+ *
+ * Parsing:
+ * Acceptable format:
+ * "STRING" - interpreted as field value (input)
+ * "ARRAY" - each element can be one of {@link #parse(ParseContext, Token, XContentParser, Map)}
+ * "OBJECT" - see {@link #parse(ParseContext, Token, XContentParser, Map)}
+ *
+ * Indexing:
+ * if context mappings are defined, delegates to {@link ContextMappings#addField(ParseContext.Document, String, String, int, Map)}
+ * else adds inputs as a {@link org.apache.lucene.search.suggest.document.SuggestField}
+ */
@Override
public Mapper parse(ParseContext context) throws IOException {
+ // parse
XContentParser parser = context.parser();
- XContentParser.Token token = parser.currentToken();
- if (token == XContentParser.Token.VALUE_NULL) {
+ Token token = parser.currentToken();
+ Map inputMap = new HashMap<>(1);
+ if (token == Token.VALUE_NULL) {
throw new MapperParsingException("completion field [" + fieldType().names().fullName() + "] does not support null values");
+ } else if (token == Token.START_ARRAY) {
+ while ((token = parser.nextToken()) != Token.END_ARRAY) {
+ parse(context, token, parser, inputMap);
+ }
+ } else {
+ parse(context, token, parser, inputMap);
+ }
+
+ // index
+ for (Map.Entry completionInput : inputMap.entrySet()) {
+ String input = completionInput.getKey();
+ // truncate input
+ if (input.length() > maxInputLength) {
+ int len = Math.min(maxInputLength, input.length());
+ if (Character.isHighSurrogate(input.charAt(len - 1))) {
+ assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
+ len += 1;
+ }
+ input = input.substring(0, len);
+ }
+ CompletionInputMetaData metaData = completionInput.getValue();
+ if (fieldType().hasContextMappings()) {
+ fieldType().getContextMappings().addField(context.doc(), fieldType().names().indexName(),
+ input, metaData.weight, metaData.contexts);
+ } else {
+ context.doc().add(new SuggestField(fieldType().names().indexName(), input, metaData.weight));
+ }
}
+ multiFields.parse(this, context);
+ return null;
+ }
- String surfaceForm = null;
- BytesRef payload = null;
- long weight = -1;
- List inputs = new ArrayList<>(4);
-
- SortedMap contextConfig = null;
-
- if (token == XContentParser.Token.VALUE_STRING) {
- inputs.add(parser.text());
- multiFields.parse(this, context);
- } else {
- String currentFieldName = null;
- while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
- if (token == XContentParser.Token.FIELD_NAME) {
+ /**
+ * Acceptable inputs:
+ * "STRING" - interpreted as the field value (input)
+ * "OBJECT" - { "input": STRING|ARRAY, "weight": STRING|INT, "contexts": ARRAY|OBJECT }
+ */
+ private void parse(ParseContext parseContext, Token token, XContentParser parser, Map inputMap) throws IOException {
+ String currentFieldName = null;
+ if (token == Token.VALUE_STRING) {
+ inputMap.put(parser.text(), new CompletionInputMetaData(Collections.>emptyMap(), 1));
+ } else if (token == Token.START_OBJECT) {
+ Set inputs = new HashSet<>();
+ int weight = 1;
+ Map> contextsMap = new HashMap<>();
+ while ((token = parser.nextToken()) != Token.END_OBJECT) {
+ if (token == Token.FIELD_NAME) {
currentFieldName = parser.currentName();
if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) {
- throw new IllegalArgumentException("Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
+ throw new IllegalArgumentException("unknown field name [" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES);
}
- } else if (Fields.CONTEXT.equals(currentFieldName)) {
- SortedMap configs = new TreeMap<>();
-
- if (token == Token.START_OBJECT) {
- while ((token = parser.nextToken()) != Token.END_OBJECT) {
- String name = parser.text();
- ContextMapping mapping = fieldType().getContextMapping().get(name);
- if (mapping == null) {
- throw new ElasticsearchParseException("context [{}] is not defined", name);
- } else {
- token = parser.nextToken();
- configs.put(name, mapping.parseContext(context, parser));
+ } else if (currentFieldName != null) {
+ if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
+ if (token == Token.VALUE_STRING) {
+ inputs.add(parser.text());
+ } else if (token == Token.START_ARRAY) {
+ while ((token = parser.nextToken()) != Token.END_ARRAY) {
+ if (token == Token.VALUE_STRING) {
+ inputs.add(parser.text());
+ } else {
+ throw new IllegalArgumentException("input array must have string values, but was [" + token.name() + "]");
+ }
}
+ } else {
+ throw new IllegalArgumentException("input must be a string or array, but was [" + token.name() + "]");
}
- contextConfig = new TreeMap<>();
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- ContextConfig config = configs.get(mapping.name());
- contextConfig.put(mapping.name(), config==null ? mapping.defaultConfig() : config);
+ } else if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
+ final Number weightValue;
+ if (token == Token.VALUE_STRING) {
+ try {
+ weightValue = Long.parseLong(parser.text());
+ } catch (NumberFormatException e) {
+ throw new IllegalArgumentException("weight must be an integer, but was [" + parser.text() + "]");
+ }
+ } else if (token == Token.VALUE_NUMBER) {
+ NumberType numberType = parser.numberType();
+ if (NumberType.LONG != numberType && NumberType.INT != numberType) {
+ throw new IllegalArgumentException("weight must be an integer, but was [" + parser.numberValue() + "]");
+ }
+ weightValue = parser.numberValue();
+ } else {
+ throw new IllegalArgumentException("weight must be a number or string, but was [" + token.name() + "]");
}
- } else {
- throw new ElasticsearchParseException("context must be an object");
- }
- } else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) {
- if (!isStoringPayloads()) {
- throw new MapperException("Payloads disabled in mapping");
- }
- if (token == XContentParser.Token.START_OBJECT) {
- XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
- payload = payloadBuilder.bytes().toBytesRef();
- payloadBuilder.close();
- } else if (token.isValue()) {
- payload = parser.utf8BytesOrNull();
- } else {
- throw new MapperException("payload doesn't support type " + token);
- }
- } else if (token == XContentParser.Token.VALUE_STRING) {
- if (Fields.CONTENT_FIELD_NAME_OUTPUT.equals(currentFieldName)) {
- surfaceForm = parser.text();
- }
- if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
- inputs.add(parser.text());
- }
- if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
- Number weightValue;
- try {
- weightValue = Long.parseLong(parser.text());
- } catch (NumberFormatException e) {
- throw new IllegalArgumentException("Weight must be a string representing a numeric value, but was [" + parser.text() + "]");
+ if (weightValue.longValue() < 0 || weightValue.longValue() > Integer.MAX_VALUE) { // always parse a long to make sure we don't get overflow
+ throw new IllegalArgumentException("weight must be in the interval [0..2147483647], but was [" + weightValue.longValue() + "]");
}
- weight = weightValue.longValue(); // always parse a long to make sure we don't get overflow
- checkWeight(weight);
- }
- } else if (token == XContentParser.Token.VALUE_NUMBER) {
- if (Fields.CONTENT_FIELD_NAME_WEIGHT.equals(currentFieldName)) {
- NumberType numberType = parser.numberType();
- if (NumberType.LONG != numberType && NumberType.INT != numberType) {
- throw new IllegalArgumentException("Weight must be an integer, but was [" + parser.numberValue() + "]");
+ weight = weightValue.intValue();
+ } else if (Fields.CONTENT_FIELD_NAME_CONTEXTS.equals(currentFieldName)) {
+ if (fieldType().hasContextMappings() == false) {
+ throw new IllegalArgumentException("contexts field is not supported for field: [" + fieldType().names().fullName() + "]");
}
- weight = parser.longValue(); // always parse a long to make sure we don't get overflow
- checkWeight(weight);
- }
- } else if (token == XContentParser.Token.START_ARRAY) {
- if (Fields.CONTENT_FIELD_NAME_INPUT.equals(currentFieldName)) {
- while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
- inputs.add(parser.text());
+ ContextMappings contextMappings = fieldType().getContextMappings();
+ XContentParser.Token currentToken = parser.currentToken();
+ if (currentToken == XContentParser.Token.START_OBJECT) {
+ ContextMapping contextMapping = null;
+ String fieldName = null;
+ while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (currentToken == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ contextMapping = contextMappings.get(fieldName);
+ } else if (currentToken == XContentParser.Token.VALUE_STRING
+ || currentToken == XContentParser.Token.START_ARRAY
+ || currentToken == XContentParser.Token.START_OBJECT) {
+ assert fieldName != null;
+ assert !contextsMap.containsKey(fieldName);
+ contextsMap.put(fieldName, contextMapping.parseContext(parseContext, parser));
+ } else {
+ throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]");
+ }
+ }
+ } else {
+ throw new IllegalArgumentException("contexts must be an object or an array , but was [" + currentToken + "]");
}
}
}
}
- }
-
- if(contextConfig == null) {
- contextConfig = new TreeMap<>();
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- contextConfig.put(mapping.name(), mapping.defaultConfig());
- }
- }
-
- final ContextMapping.Context ctx = new ContextMapping.Context(contextConfig, context.doc());
-
- payload = payload == null ? EMPTY : payload;
- if (surfaceForm == null) { // no surface form use the input
for (String input : inputs) {
- if (input.length() == 0) {
- continue;
+ if (inputMap.containsKey(input) == false || inputMap.get(input).weight < weight) {
+ inputMap.put(input, new CompletionInputMetaData(contextsMap, weight));
}
- BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
- input), weight, payload);
- context.doc().add(getCompletionField(ctx, input, suggestPayload));
}
} else {
- BytesRef suggestPayload = fieldType().analyzingSuggestLookupProvider.buildPayload(new BytesRef(
- surfaceForm), weight, payload);
- for (String input : inputs) {
- if (input.length() == 0) {
- continue;
- }
- context.doc().add(getCompletionField(ctx, input, suggestPayload));
- }
+ throw new ElasticsearchParseException("failed to parse expected text or object got" + token.name());
}
- return null;
}
- private void checkWeight(long weight) {
- if (weight < 0 || weight > Integer.MAX_VALUE) {
- throw new IllegalArgumentException("Weight must be in the interval [0..2147483647], but was [" + weight + "]");
- }
- }
+ static class CompletionInputMetaData {
+ public final Map> contexts;
+ public final int weight;
- public Field getCompletionField(ContextMapping.Context ctx, String input, BytesRef payload) {
- final String originalInput = input;
- if (input.length() > maxInputLength) {
- final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
- input = input.substring(0, len);
- }
- for (int i = 0; i < input.length(); i++) {
- if (isReservedChar(input.charAt(i))) {
- throw new IllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
- + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
- + "] at position " + i + " is a reserved character");
- }
+ CompletionInputMetaData(Map> contexts, int weight) {
+ this.contexts = contexts;
+ this.weight = weight;
}
- return new SuggestField(fieldType().names().indexName(), ctx, input, fieldType(), payload, fieldType().analyzingSuggestLookupProvider);
}
- public static int correctSubStringLen(String input, int len) {
- if (Character.isHighSurrogate(input.charAt(len - 1))) {
- assert input.length() >= len + 1 && Character.isLowSurrogate(input.charAt(len));
- return len + 1;
- }
- return len;
- }
-
- public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
- return fieldType().analyzingSuggestLookupProvider.buildPayload(surfaceForm, weight, payload);
- }
-
- private static final class SuggestField extends Field {
- private final BytesRef payload;
- private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
- private final ContextMapping.Context ctx;
-
- public SuggestField(String name, ContextMapping.Context ctx, String value, MappedFieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
- super(name, value, type);
- this.payload = payload;
- this.toFiniteStrings = toFiniteStrings;
- this.ctx = ctx;
- }
-
- @Override
- public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException {
- TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer, previous));
- return new CompletionTokenStream(ts, payload, toFiniteStrings);
- }
- }
-
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(simpleName())
- .field(Fields.TYPE, CONTENT_TYPE);
-
- builder.field(Fields.ANALYZER, fieldType().indexAnalyzer().name());
+ .field(Fields.TYPE.getPreferredName(), CONTENT_TYPE);
+ builder.field(Fields.ANALYZER.getPreferredName(), fieldType().indexAnalyzer().name());
if (fieldType().indexAnalyzer().name().equals(fieldType().searchAnalyzer().name()) == false) {
builder.field(Fields.SEARCH_ANALYZER.getPreferredName(), fieldType().searchAnalyzer().name());
}
- builder.field(Fields.PAYLOADS, fieldType().analyzingSuggestLookupProvider.hasPayloads());
- builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreserveSep());
- builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().analyzingSuggestLookupProvider.getPreservePositionsIncrements());
+ builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), fieldType().preserveSep());
+ builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), fieldType().preservePositionIncrements());
builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength);
- multiFields.toXContent(builder, params);
- if(fieldType().requiresContext()) {
- builder.startObject(Fields.CONTEXT);
- for (ContextMapping mapping : fieldType().getContextMapping().values()) {
- builder.value(mapping);
- }
- builder.endObject();
+ if (fieldType().hasContextMappings()) {
+ builder.startArray(Fields.CONTEXTS.getPreferredName());
+ fieldType().getContextMappings().toXContent(builder, params);
+ builder.endArray();
}
+ multiFields.toXContent(builder, params);
return builder.endObject();
}
@Override
protected void parseCreateField(ParseContext context, List fields) throws IOException {
+ // no-op
}
@Override
@@ -562,10 +604,6 @@ protected String contentType() {
return CONTENT_TYPE;
}
- public boolean isStoringPayloads() {
- return fieldType().analyzingSuggestLookupProvider.hasPayloads();
- }
-
@Override
public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMappingException {
super.merge(mergeWith, mergeResult);
@@ -574,22 +612,4 @@ public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMapping
this.maxInputLength = fieldMergeWith.maxInputLength;
}
}
-
- // this should be package private but our tests don't allow it.
- public static boolean isReservedChar(char character) {
- /* we use 0x001F as a SEP_LABEL in the suggester but we can use the UTF-16 representation since they
- * are equivalent. We also don't need to convert the input character to UTF-8 here to check for
- * the 0x00 end label since all multi-byte UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00
- * it's the single byte UTF-8 CP */
- assert XAnalyzingSuggester.PAYLOAD_SEP == XAnalyzingSuggester.SEP_LABEL; // ensure they are the same!
- switch(character) {
- case XAnalyzingSuggester.END_BYTE:
- case XAnalyzingSuggester.SEP_LABEL:
- case XAnalyzingSuggester.HOLE_CHARACTER:
- case ContextMapping.SEPARATOR:
- return true;
- default:
- return false;
- }
- }
}
diff --git a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
index 45f58c47da60e..1ebf44e23f26d 100644
--- a/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
+++ b/core/src/main/java/org/elasticsearch/index/query/RegexpFlag.java
@@ -108,7 +108,7 @@ public int value() {
* @param flags A string representing a list of regular expression flags
* @return The combined OR'ed value for all the flags
*/
- static int resolveValue(String flags) {
+ public static int resolveValue(String flags) {
if (flags == null || flags.isEmpty()) {
return RegExp.ALL;
}
diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
index 6e7893df9838b..aad1497c9dcdc 100644
--- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
+++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
@@ -19,7 +19,6 @@
package org.elasticsearch.index.shard;
-import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
@@ -106,8 +105,8 @@
import org.elasticsearch.indices.recovery.RecoveryFailedException;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.percolator.PercolatorService;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
import org.elasticsearch.search.suggest.completion.CompletionStats;
+import org.elasticsearch.search.suggest.completion.CompletionFieldStats;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
@@ -618,15 +617,8 @@ public SuggestStats suggestStats() {
public CompletionStats completionStats(String... fields) {
CompletionStats completionStats = new CompletionStats();
- final Engine.Searcher currentSearcher = acquireSearcher("completion_stats");
- try {
- PostingsFormat postingsFormat = PostingsFormat.forName(Completion090PostingsFormat.CODEC_NAME);
- if (postingsFormat instanceof Completion090PostingsFormat) {
- Completion090PostingsFormat completionPostingsFormat = (Completion090PostingsFormat) postingsFormat;
- completionStats.add(completionPostingsFormat.completionStats(currentSearcher.reader(), fields));
- }
- } finally {
- currentSearcher.close();
+ try (final Engine.Searcher currentSearcher = acquireSearcher("completion_stats")) {
+ completionStats.add(CompletionFieldStats.completionStats(currentSearcher.reader(), fields));
}
return completionStats;
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java b/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
index 95ce7ce70a060..db60d58953a1f 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/Suggest.java
@@ -126,7 +126,7 @@ public void readFrom(StreamInput in) throws IOException {
suggestion = new PhraseSuggestion();
break;
default:
- suggestion = new Suggestion>();
+ suggestion = new Suggestion();
break;
}
suggestion.readFrom(in);
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java
index ea45c1033e9e4..5621e03e7defe 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java
@@ -20,9 +20,6 @@
import org.elasticsearch.action.support.ToXContentToBytes;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.search.suggest.context.CategoryContextMapping;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
-import org.elasticsearch.search.suggest.context.GeolocationContextMapping;
import java.io.IOException;
import java.util.ArrayList;
@@ -101,90 +98,18 @@ public static abstract class SuggestionBuilder extends ToXContentToBytes {
private String name;
private String suggester;
private String text;
+ private String prefix;
+ private String regex;
private String field;
private String analyzer;
private Integer size;
private Integer shardSize;
-
- private List contextQueries = new ArrayList<>();
public SuggestionBuilder(String name, String suggester) {
this.name = name;
this.suggester = suggester;
}
- @SuppressWarnings("unchecked")
- private T addContextQuery(ContextQuery ctx) {
- this.contextQueries.add(ctx);
- return (T) this;
- }
-
- /**
- * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
- * @param lat Latitude of the location
- * @param lon Longitude of the Location
- * @return this
- */
- public T addGeoLocation(String name, double lat, double lon, int ... precisions) {
- return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions));
- }
-
- /**
- * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
- * @param lat Latitude of the location
- * @param lon Longitude of the Location
- * @param precisions precisions as string var-args
- * @return this
- */
- public T addGeoLocationWithPrecision(String name, double lat, double lon, String ... precisions) {
- return addContextQuery(GeolocationContextMapping.query(name, lat, lon, precisions));
- }
-
- /**
- * Setup a Geolocation for suggestions. See {@link GeolocationContextMapping}.
- * @param geohash Geohash of the location
- * @return this
- */
- public T addGeoLocation(String name, String geohash) {
- return addContextQuery(GeolocationContextMapping.query(name, geohash));
- }
-
- /**
- * Setup a Category for suggestions. See {@link CategoryContextMapping}.
- * @param categories name of the category
- * @return this
- */
- public T addCategory(String name, CharSequence...categories) {
- return addContextQuery(CategoryContextMapping.query(name, categories));
- }
-
- /**
- * Setup a Category for suggestions. See {@link CategoryContextMapping}.
- * @param categories name of the category
- * @return this
- */
- public T addCategory(String name, Iterable extends CharSequence> categories) {
- return addContextQuery(CategoryContextMapping.query(name, categories));
- }
-
- /**
- * Setup a Context Field for suggestions. See {@link CategoryContextMapping}.
- * @param fieldvalues name of the category
- * @return this
- */
- public T addContextField(String name, CharSequence...fieldvalues) {
- return addContextQuery(CategoryContextMapping.query(name, fieldvalues));
- }
-
- /**
- * Setup a Context Field for suggestions. See {@link CategoryContextMapping}.
- * @param fieldvalues name of the category
- * @return this
- */
- public T addContextField(String name, Iterable extends CharSequence> fieldvalues) {
- return addContextQuery(CategoryContextMapping.query(name, fieldvalues));
- }
-
/**
* Same as in {@link SuggestBuilder#setText(String)}, but in the suggestion scope.
*/
@@ -194,12 +119,26 @@ public T text(String text) {
return (T) this;
}
+ protected void setPrefix(String prefix) {
+ this.prefix = prefix;
+ }
+
+ protected void setRegex(String regex) {
+ this.regex = regex;
+ }
+
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
if (text != null) {
builder.field("text", text);
}
+ if (prefix != null) {
+ builder.field("prefix", prefix);
+ }
+ if (regex != null) {
+ builder.field("regex", regex);
+ }
builder.startObject(suggester);
if (analyzer != null) {
builder.field("analyzer", analyzer);
@@ -214,13 +153,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field("shard_size", shardSize);
}
- if (!contextQueries.isEmpty()) {
- builder.startObject("context");
- for (ContextQuery query : contextQueries) {
- query.toXContent(builder, params);
- }
- builder.endObject();
- }
builder = innerToXContent(builder, params);
builder.endObject();
builder.endObject();
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java
index 16957986e2789..66b917394ffce 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestBuilders.java
@@ -20,7 +20,6 @@
package org.elasticsearch.search.suggest;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
-import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
@@ -61,15 +60,4 @@ public static PhraseSuggestionBuilder phraseSuggestion(String name) {
public static CompletionSuggestionBuilder completionSuggestion(String name) {
return new CompletionSuggestionBuilder(name);
}
-
- /**
- * Creates a fuzzy completion suggestion lookup query with the provided name
- *
- * @param name The suggestion name
- * @return a {@link org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder}
- * instance
- */
- public static CompletionSuggestionFuzzyBuilder fuzzyCompletionSuggestion(String name) {
- return new CompletionSuggestionFuzzyBuilder(name);
- }
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java
index ddb2235513e96..a8050d1acaf95 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestContextParser.java
@@ -20,11 +20,12 @@
import org.elasticsearch.common.HasContextAndHeaders;
import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.MapperService;
import java.io.IOException;
public interface SuggestContextParser {
- SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService, HasContextAndHeaders headersContext) throws IOException;
+ SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService, IndexFieldDataService indexFieldDataService, HasContextAndHeaders headersContext) throws IOException;
}
\ No newline at end of file
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
index 23bdaab99a496..650eb76b1c4a1 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
@@ -22,6 +22,7 @@
import org.elasticsearch.common.HasContextAndHeaders;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.internal.SearchContext;
@@ -44,13 +45,13 @@ public SuggestParseElement(Suggesters suggesters) {
@Override
public void parse(XContentParser parser, SearchContext context) throws Exception {
- SuggestionSearchContext suggestionSearchContext = parseInternal(parser, context.mapperService(),
+ SuggestionSearchContext suggestionSearchContext = parseInternal(parser, context.mapperService(), context.fieldData(),
context.shardTarget().index(), context.shardTarget().shardId(), context);
context.suggest(suggestionSearchContext);
}
- public SuggestionSearchContext parseInternal(XContentParser parser, MapperService mapperService,
- String index, int shardId, HasContextAndHeaders headersContext) throws IOException {
+ public SuggestionSearchContext parseInternal(XContentParser parser, MapperService mapperService, IndexFieldDataService fieldDataService,
+ String index, int shardId, HasContextAndHeaders headersContext) throws IOException {
SuggestionSearchContext suggestionSearchContext = new SuggestionSearchContext();
BytesRef globalText = null;
@@ -70,6 +71,8 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
} else if (token == XContentParser.Token.START_OBJECT) {
String suggestionName = fieldName;
BytesRef suggestText = null;
+ BytesRef prefix = null;
+ BytesRef regex = null;
SuggestionContext suggestionContext = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@@ -78,6 +81,10 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
} else if (token.isValue()) {
if ("text".equals(fieldName)) {
suggestText = parser.utf8Bytes();
+ } else if ("prefix".equals(fieldName)) {
+ prefix = parser.utf8Bytes();
+ } else if ("regex".equals(fieldName)) {
+ regex = parser.utf8Bytes();
} else {
throw new IllegalArgumentException("[suggest] does not support [" + fieldName + "]");
}
@@ -89,14 +96,22 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
throw new IllegalArgumentException("Suggester[" + fieldName + "] not supported");
}
final SuggestContextParser contextParser = suggesters.get(fieldName).getContextParser();
- suggestionContext = contextParser.parse(parser, mapperService, headersContext);
+ suggestionContext = contextParser.parse(parser, mapperService, fieldDataService, headersContext);
}
}
if (suggestionContext != null) {
- suggestionContext.setText(suggestText);
+ if (suggestText != null && prefix == null) {
+ suggestionContext.setPrefix(suggestText);
+ suggestionContext.setText(suggestText);
+ } else if (suggestText == null && prefix != null) {
+ suggestionContext.setPrefix(prefix);
+ suggestionContext.setText(prefix);
+ } else if (regex != null) {
+ suggestionContext.setRegex(regex);
+ suggestionContext.setText(regex);
+ }
suggestionContexts.put(suggestionName, suggestionContext);
}
-
}
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java
index 2cb36f5391453..1d3339e0578ba 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java
@@ -40,6 +40,8 @@ public Map suggestions() {
public static class SuggestionContext {
private BytesRef text;
+ private BytesRef prefix;
+ private BytesRef regex;
private final Suggester suggester;
private String field;
private Analyzer analyzer;
@@ -55,7 +57,23 @@ public BytesRef getText() {
public void setText(BytesRef text) {
this.text = text;
}
-
+
+ public BytesRef getPrefix() {
+ return prefix;
+ }
+
+ public void setPrefix(BytesRef prefix) {
+ this.prefix = prefix;
+ }
+
+ public BytesRef getRegex() {
+ return regex;
+ }
+
+ public void setRegex(BytesRef regex) {
+ this.regex = regex;
+ }
+
public SuggestionContext(Suggester suggester) {
this.suggester = suggester;
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
deleted file mode 100644
index c5b1b5931e94c..0000000000000
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.search.suggest.completion;
-
-import com.carrotsearch.hppc.ObjectLongHashMap;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
-import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.LimitedFiniteStringsIterator;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs;
-import org.apache.lucene.util.fst.PairOutputs.Pair;
-import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.elasticsearch.common.regex.Regex;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
-import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider {
-
- // for serialization
- public static final int SERIALIZE_PRESERVE_SEPARATORS = 1;
- public static final int SERIALIZE_HAS_PAYLOADS = 2;
- public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
-
- private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
- private static final int MAX_GRAPH_EXPANSIONS = -1;
-
- public static final String CODEC_NAME = "analyzing";
- public static final int CODEC_VERSION_START = 1;
- public static final int CODEC_VERSION_SERIALIZED_LABELS = 2;
- public static final int CODEC_VERSION_CHECKSUMS = 3;
- public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS;
-
- private final boolean preserveSep;
- private final boolean preservePositionIncrements;
- private final int maxSurfaceFormsPerAnalyzedForm;
- private final int maxGraphExpansions;
- private final boolean hasPayloads;
- private final XAnalyzingSuggester prototype;
-
- public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) {
- this.preserveSep = preserveSep;
- this.preservePositionIncrements = preservePositionIncrements;
- this.hasPayloads = hasPayloads;
- this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
- this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
- int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
- // needs to fixed in the suggester first before it can be supported
- //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
- prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
- }
-
- @Override
- public String getName() {
- return "analyzing";
- }
-
- public boolean getPreserveSep() {
- return preserveSep;
- }
-
- public boolean getPreservePositionsIncrements() {
- return preservePositionIncrements;
- }
-
- public boolean hasPayloads() {
- return hasPayloads;
- }
-
- @Override
- public FieldsConsumer consumer(final IndexOutput output) throws IOException {
- CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION_LATEST);
- return new FieldsConsumer() {
- private Map fieldOffsets = new HashMap<>();
-
- @Override
- public void close() throws IOException {
- try {
- /*
- * write the offsets per field such that we know where
- * we need to load the FSTs from
- */
- long pointer = output.getFilePointer();
- output.writeVInt(fieldOffsets.size());
- for (Map.Entry entry : fieldOffsets.entrySet()) {
- output.writeString(entry.getKey());
- output.writeVLong(entry.getValue());
- }
- output.writeLong(pointer);
- CodecUtil.writeFooter(output);
- } finally {
- IOUtils.close(output);
- }
- }
-
- @Override
- public void write(Fields fields) throws IOException {
- for(String field : fields) {
- Terms terms = fields.terms(field);
- if (terms == null) {
- continue;
- }
- TermsEnum termsEnum = terms.iterator();
- PostingsEnum docsEnum = null;
- final SuggestPayload spare = new SuggestPayload();
- int maxAnalyzedPathsForOneInput = 0;
- final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads, XAnalyzingSuggester.PAYLOAD_SEP);
- int docCount = 0;
- while (true) {
- BytesRef term = termsEnum.next();
- if (term == null) {
- break;
- }
- docsEnum = termsEnum.postings(docsEnum, PostingsEnum.PAYLOADS);
- builder.startTerm(term);
- int docFreq = 0;
- while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
- for (int i = 0; i < docsEnum.freq(); i++) {
- final int position = docsEnum.nextPosition();
- AnalyzingCompletionLookupProvider.this.parsePayload(docsEnum.getPayload(), spare);
- builder.addSurface(spare.surfaceForm.get(), spare.payload.get(), spare.weight);
- // multi fields have the same surface form so we sum up here
- maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position + 1);
- }
- docFreq++;
- docCount = Math.max(docCount, docsEnum.docID()+1);
- }
- builder.finishTerm(docFreq);
- }
- /*
- * Here we are done processing the field and we can
- * buid the FST and write it to disk.
- */
- FST> build = builder.build();
- assert build != null || docCount == 0: "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
- /*
- * it's possible that the FST is null if we have 2 segments that get merged
- * and all docs that have a value in this field are deleted. This will cause
- * a consumer to be created but it doesn't consume any values causing the FSTBuilder
- * to return null.
- */
- if (build != null) {
- fieldOffsets.put(field, output.getFilePointer());
- build.save(output);
- /* write some more meta-info */
- output.writeVInt(maxAnalyzedPathsForOneInput);
- output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
- output.writeInt(maxGraphExpansions); // can be negative
- int options = 0;
- options |= preserveSep ? SERIALIZE_PRESERVE_SEPARATORS : 0;
- options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
- options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
- output.writeVInt(options);
- output.writeVInt(XAnalyzingSuggester.SEP_LABEL);
- output.writeVInt(XAnalyzingSuggester.END_BYTE);
- output.writeVInt(XAnalyzingSuggester.PAYLOAD_SEP);
- output.writeVInt(XAnalyzingSuggester.HOLE_CHARACTER);
- }
- }
- }
- };
- }
-
-
- @Override
- public LookupFactory load(IndexInput input) throws IOException {
- long sizeInBytes = 0;
- int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
- if (version >= CODEC_VERSION_CHECKSUMS) {
- CodecUtil.checksumEntireFile(input);
- }
- final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8);
- final Map lookupMap = new HashMap<>();
- input.seek(metaPointerPosition);
- long metaPointer = input.readLong();
- input.seek(metaPointer);
- int numFields = input.readVInt();
-
- Map meta = new TreeMap<>();
- for (int i = 0; i < numFields; i++) {
- String name = input.readString();
- long offset = input.readVLong();
- meta.put(offset, name);
- }
-
- for (Map.Entry entry : meta.entrySet()) {
- input.seek(entry.getKey());
- FST> fst = new FST<>(input, new PairOutputs<>(
- PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
- int maxAnalyzedPathsForOneInput = input.readVInt();
- int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
- int maxGraphExpansions = input.readInt();
- int options = input.readVInt();
- boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPARATORS) != 0;
- boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
- boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
-
- // first version did not include these three fields, so fall back to old default (before the analyzingsuggester
- // was updated in Lucene, so we cannot use the suggester defaults)
- int sepLabel, payloadSep, endByte, holeCharacter;
- switch (version) {
- case CODEC_VERSION_START:
- sepLabel = 0xFF;
- payloadSep = '\u001f';
- endByte = 0x0;
- holeCharacter = '\u001E';
- break;
- default:
- sepLabel = input.readVInt();
- endByte = input.readVInt();
- payloadSep = input.readVInt();
- holeCharacter = input.readVInt();
- }
-
- AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
- hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter);
- sizeInBytes += fst.ramBytesUsed();
- lookupMap.put(entry.getValue(), holder);
- }
- final long ramBytesUsed = sizeInBytes;
- return new LookupFactory() {
- @Override
- public Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) {
- AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName());
- if (analyzingSuggestHolder == null) {
- return null;
- }
- int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0;
-
- final XAnalyzingSuggester suggester;
- final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null;
-
- if (suggestionContext.isFuzzy()) {
- suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
- analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
- suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
- suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
- analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
- analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
- analyzingSuggestHolder.holeCharacter);
- } else {
- suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags,
- analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
- analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
- analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
- analyzingSuggestHolder.holeCharacter);
- }
- return suggester;
- }
-
- @Override
- public CompletionStats stats(String... fields) {
- long sizeInBytes = 0;
- ObjectLongHashMap completionFields = null;
- if (fields != null && fields.length > 0) {
- completionFields = new ObjectLongHashMap<>(fields.length);
- }
-
- for (Map.Entry entry : lookupMap.entrySet()) {
- sizeInBytes += entry.getValue().fst.ramBytesUsed();
- if (fields == null || fields.length == 0) {
- continue;
- }
- if (Regex.simpleMatch(fields, entry.getKey())) {
- long fstSize = entry.getValue().fst.ramBytesUsed();
- completionFields.addTo(entry.getKey(), fstSize);
- }
- }
-
- return new CompletionStats(sizeInBytes, completionFields);
- }
-
- @Override
- AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType) {
- return lookupMap.get(fieldType.names().indexName());
- }
-
- @Override
- public long ramBytesUsed() {
- return ramBytesUsed;
- }
-
- @Override
- public Collection getChildResources() {
- return Accountables.namedAccountables("field", lookupMap);
- }
- };
- }
-
- static class AnalyzingSuggestHolder implements Accountable {
- final boolean preserveSep;
- final boolean preservePositionIncrements;
- final int maxSurfaceFormsPerAnalyzedForm;
- final int maxGraphExpansions;
- final boolean hasPayloads;
- final int maxAnalyzedPathsForOneInput;
- final FST> fst;
- final int sepLabel;
- final int payloadSep;
- final int endByte;
- final int holeCharacter;
-
- public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
- boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST> fst) {
- this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
- }
-
- public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
- this.preserveSep = preserveSep;
- this.preservePositionIncrements = preservePositionIncrements;
- this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
- this.maxGraphExpansions = maxGraphExpansions;
- this.hasPayloads = hasPayloads;
- this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
- this.fst = fst;
- this.sepLabel = sepLabel;
- this.payloadSep = payloadSep;
- this.endByte = endByte;
- this.holeCharacter = holeCharacter;
- }
-
- public boolean getPreserveSeparator() {
- return preserveSep;
- }
-
- public boolean getPreservePositionIncrements() {
- return preservePositionIncrements;
- }
-
- public boolean hasPayloads() {
- return hasPayloads;
- }
-
- @Override
- public long ramBytesUsed() {
- if (fst != null) {
- return fst.ramBytesUsed();
- } else {
- return 0;
- }
- }
-
- @Override
- public Collection getChildResources() {
- if (fst != null) {
- return Collections.singleton(Accountables.namedAccountable("fst", fst));
- } else {
- return Collections.emptyList();
- }
- }
- }
-
- @Override
- public Set toFiniteStrings(TokenStream stream) throws IOException {
- return prototype.toFiniteStrings(stream);
- }
-
-
-}
\ No newline at end of file
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java
deleted file mode 100644
index 447b3fd71986d..0000000000000
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.elasticsearch.search.suggest.completion;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FilterLeafReader.FilterTerms;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.store.IOContext.Context;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.store.OutputStreamDataOutput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.elasticsearch.common.logging.ESLogger;
-import org.elasticsearch.common.logging.Loggers;
-import org.elasticsearch.index.mapper.MappedFieldType;
-import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
-import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import static java.util.Collections.singletonMap;
-
-/**
- * This {@link PostingsFormat} is basically a T-Sink for a default postings
- * format that is used to store postings on disk fitting the lucene APIs and
- * builds a suggest FST as an auxiliary data structure next to the actual
- * postings format. It uses the delegate postings format for simplicity to
- * handle all the merge operations. The auxiliary suggest FST data structure is
- * only loaded if a FieldsProducer is requested for reading, for merging it uses
- * the low memory delegate postings format.
- */
-public class Completion090PostingsFormat extends PostingsFormat {
-
- public static final String CODEC_NAME = "completion090";
- public static final int SUGGEST_CODEC_VERSION = 1;
- public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION;
- public static final String EXTENSION = "cmp";
-
- private static final ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class);
- private static final CompletionLookupProvider LOOKUP_PROVIDER = new AnalyzingCompletionLookupProvider(true, false, true, false);
- private static final Map PROVIDERS = singletonMap(LOOKUP_PROVIDER.getName(), LOOKUP_PROVIDER);
- private PostingsFormat delegatePostingsFormat;
- private CompletionLookupProvider writeProvider;
-
- public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) {
- super(CODEC_NAME);
- this.delegatePostingsFormat = delegatePostingsFormat;
- this.writeProvider = provider;
- assert delegatePostingsFormat != null && writeProvider != null;
- }
-
- /*
- * Used only by core Lucene at read-time via Service Provider instantiation
- * do not use at Write-time in application code.
- */
- public Completion090PostingsFormat() {
- super(CODEC_NAME);
- }
-
- @Override
- public CompletionFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- if (delegatePostingsFormat == null) {
- throw new UnsupportedOperationException("Error - " + getClass().getName()
- + " has been constructed without a choice of PostingsFormat");
- }
- assert writeProvider != null;
- return new CompletionFieldsConsumer(state);
- }
-
- @Override
- public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- return new CompletionFieldsProducer(state);
- }
-
- private class CompletionFieldsConsumer extends FieldsConsumer {
-
- private FieldsConsumer delegatesFieldsConsumer;
- private FieldsConsumer suggestFieldsConsumer;
-
- public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
- this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
- String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
- IndexOutput output = null;
- boolean success = false;
- try {
- output = state.directory.createOutput(suggestFSTFile, state.context);
- CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
- /*
- * we write the delegate postings format name so we can load it
- * without getting an instance in the ctor
- */
- output.writeString(delegatePostingsFormat.getName());
- output.writeString(writeProvider.getName());
- this.suggestFieldsConsumer = writeProvider.consumer(output);
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(output);
- }
- }
- }
-
- @Override
- public void write(Fields fields) throws IOException {
- delegatesFieldsConsumer.write(fields);
- suggestFieldsConsumer.write(fields);
- }
-
- @Override
- public void close() throws IOException {
- IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer);
- }
- }
-
- private static class CompletionFieldsProducer extends FieldsProducer {
- // TODO make this class lazyload all the things in order to take advantage of the new merge instance API
- // today we just load everything up-front
- private final FieldsProducer delegateProducer;
- private final LookupFactory lookupFactory;
- private final int version;
-
- public CompletionFieldsProducer(SegmentReadState state) throws IOException {
- String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
- IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
- version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
- FieldsProducer delegateProducer = null;
- boolean success = false;
- try {
- PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
- String providerName = input.readString();
- CompletionLookupProvider completionLookupProvider = PROVIDERS.get(providerName);
- if (completionLookupProvider == null) {
- throw new IllegalStateException("no provider with name [" + providerName + "] registered");
- }
- // TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
- delegateProducer = delegatePostingsFormat.fieldsProducer(state);
- /*
- * If we are merging we don't load the FSTs at all such that we
- * don't consume so much memory during merge
- */
- if (state.context.context != Context.MERGE) {
- // TODO: maybe we can do this in a fully lazy fashion based on some configuration
- // eventually we should have some kind of curciut breaker that prevents us from going OOM here
- // with some configuration
- this.lookupFactory = completionLookupProvider.load(input);
- } else {
- this.lookupFactory = null;
- }
- this.delegateProducer = delegateProducer;
- success = true;
- } finally {
- if (!success) {
- IOUtils.closeWhileHandlingException(delegateProducer, input);
- } else {
- IOUtils.close(input);
- }
- }
- }
-
- @Override
- public void close() throws IOException {
- IOUtils.close(delegateProducer);
- }
-
- @Override
- public Iterator iterator() {
- return delegateProducer.iterator();
- }
-
- @Override
- public Terms terms(String field) throws IOException {
- final Terms terms = delegateProducer.terms(field);
- if (terms == null || lookupFactory == null) {
- return terms;
- }
- return new CompletionTerms(terms, lookupFactory);
- }
-
- @Override
- public int size() {
- return delegateProducer.size();
- }
-
- @Override
- public long ramBytesUsed() {
- return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed();
- }
-
- @Override
- public Collection getChildResources() {
- List resources = new ArrayList<>();
- if (lookupFactory != null) {
- resources.add(Accountables.namedAccountable("lookup", lookupFactory));
- }
- resources.add(Accountables.namedAccountable("delegate", delegateProducer));
- return Collections.unmodifiableList(resources);
- }
-
- @Override
- public void checkIntegrity() throws IOException {
- delegateProducer.checkIntegrity();
- }
-
- @Override
- public FieldsProducer getMergeInstance() throws IOException {
- return delegateProducer.getMergeInstance();
- }
- }
-
- public static final class CompletionTerms extends FilterTerms {
- private final LookupFactory lookup;
-
- public CompletionTerms(Terms delegate, LookupFactory lookup) {
- super(delegate);
- this.lookup = lookup;
- }
-
- public Lookup getLookup(CompletionFieldMapper.CompletionFieldType mapper, CompletionSuggestionContext suggestionContext) {
- return lookup.getLookup(mapper, suggestionContext);
- }
-
- public CompletionStats stats(String ... fields) {
- return lookup.stats(fields);
- }
- }
-
- public static abstract class CompletionLookupProvider implements PayloadProcessor, ToFiniteStrings {
-
- public static final char UNIT_SEPARATOR = '\u001f';
-
- public abstract FieldsConsumer consumer(IndexOutput output) throws IOException;
-
- public abstract String getName();
-
- public abstract LookupFactory load(IndexInput input) throws IOException;
-
- @Override
- public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
- if (weight < -1 || weight > Integer.MAX_VALUE) {
- throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE");
- }
- for (int i = 0; i < surfaceForm.length; i++) {
- if (surfaceForm.bytes[i] == UNIT_SEPARATOR) {
- throw new IllegalArgumentException(
- "surface form cannot contain unit separator character U+001F; this character is reserved");
- }
- }
- ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
- OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream);
- output.writeVLong(weight + 1);
- output.writeVInt(surfaceForm.length);
- output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
- output.writeVInt(payload.length);
- output.writeBytes(payload.bytes, 0, payload.length);
-
- output.close();
- return new BytesRef(byteArrayOutputStream.toByteArray());
- }
-
- @Override
- public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException {
- ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
- InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream);
- ref.weight = input.readVLong() - 1;
- int len = input.readVInt();
- ref.surfaceForm.grow(len);
- ref.surfaceForm.setLength(len);
- input.readBytes(ref.surfaceForm.bytes(), 0, ref.surfaceForm.length());
- len = input.readVInt();
- ref.payload.grow(len);
- ref.payload.setLength(len);
- input.readBytes(ref.payload.bytes(), 0, ref.payload.length());
- input.close();
- }
- }
-
- public CompletionStats completionStats(IndexReader indexReader, String ... fields) {
- CompletionStats completionStats = new CompletionStats();
- for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
- LeafReader atomicReader = atomicReaderContext.reader();
- try {
- for (String fieldName : atomicReader.fields()) {
- Terms terms = atomicReader.fields().terms(fieldName);
- if (terms instanceof CompletionTerms) {
- CompletionTerms completionTerms = (CompletionTerms) terms;
- completionStats.add(completionTerms.stats(fields));
- }
- }
- } catch (IOException e) {
- logger.error("Could not get completion stats: {}", e, e.getMessage());
- }
- }
-
- return completionStats;
- }
-
- public static abstract class LookupFactory implements Accountable {
- public abstract Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext);
- public abstract CompletionStats stats(String ... fields);
- abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(MappedFieldType fieldType);
- }
-}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java
new file mode 100644
index 0000000000000..e61c221a95937
--- /dev/null
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionFieldStats.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.completion;
+
+import com.carrotsearch.hppc.ObjectLongHashMap;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.suggest.document.CompletionTerms;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.regex.Regex;
+
+import java.io.IOException;
+
+public class CompletionFieldStats {
+
+ public static CompletionStats completionStats(IndexReader indexReader, String ... fields) {
+ long sizeInBytes = 0;
+ ObjectLongHashMap completionFields = null;
+ if (fields != null && fields.length > 0) {
+ completionFields = new ObjectLongHashMap<>(fields.length);
+ }
+ for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
+ LeafReader atomicReader = atomicReaderContext.reader();
+ try {
+ for (String fieldName : atomicReader.fields()) {
+ Terms terms = atomicReader.fields().terms(fieldName);
+ if (terms instanceof CompletionTerms) {
+ // TODO: currently we load up the suggester for reporting its size
+ long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
+ if (fields != null && fields.length > 0 && Regex.simpleMatch(fields, fieldName)) {
+ completionFields.addTo(fieldName, fstSize);
+ }
+ sizeInBytes += fstSize;
+ }
+ }
+ } catch (IOException ignored) {
+ throw new ElasticsearchException(ignored);
+ }
+ }
+ return new CompletionStats(sizeInBytes, completionFields);
+ }
+}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java
index 4f3222f4ff3f7..928a1342ec16a 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java
@@ -18,105 +18,158 @@
*/
package org.elasticsearch.search.suggest.completion;
+import org.apache.lucene.analysis.Analyzer;
+import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.HasContextAndHeaders;
-import org.elasticsearch.common.ParseFieldMatcher;
+import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
+import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
+import org.elasticsearch.index.query.RegexpFlag;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
-import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery;
+import org.elasticsearch.search.suggest.completion.context.ContextMapping;
+import org.elasticsearch.search.suggest.completion.context.ContextMappings;
import java.io.IOException;
-import java.util.List;
-
-import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext;
+import java.util.*;
/**
+ * Parses query options for {@link CompletionSuggester}
+ *
+ * Acceptable input:
+ * {
+ * "field" : STRING
+ * "size" : INT
+ * "fuzzy" : BOOLEAN | FUZZY_OBJECT
+ * "contexts" : QUERY_CONTEXTS
+ * "regex" : REGEX_OBJECT
+ * }
+ *
+ * FUZZY_OBJECT : {
+ * "edit_distance" : STRING | INT
+ * "transpositions" : BOOLEAN
+ * "min_length" : INT
+ * "prefix_length" : INT
+ * "unicode_aware" : BOOLEAN
+ * "max_determinized_states" : INT
+ * }
*
+ * REGEX_OBJECT: {
+ * "flags" : REGEX_FLAGS
+ * "max_determinized_states" : INT
+ * }
+ *
+ * see {@link RegexpFlag} for REGEX_FLAGS
*/
public class CompletionSuggestParser implements SuggestContextParser {
- private CompletionSuggester completionSuggester;
+ private static ObjectParser TLP_PARSER = new ObjectParser<>("completion", null);
+ private static ObjectParser REGEXP_PARSER = new ObjectParser<>("regexp", CompletionSuggestionBuilder.RegexOptionsBuilder::new);
+ private static ObjectParser FUZZY_PARSER = new ObjectParser<>("fuzzy", CompletionSuggestionBuilder.FuzzyOptionsBuilder::new);
+ static {
+ FUZZY_PARSER.declareInt(CompletionSuggestionBuilder.FuzzyOptionsBuilder::setFuzzyMinLength, new ParseField("min_length"));
+ FUZZY_PARSER.declareInt(CompletionSuggestionBuilder.FuzzyOptionsBuilder::setMaxDeterminizedStates, new ParseField("max_determinized_states"));
+ FUZZY_PARSER.declareBoolean(CompletionSuggestionBuilder.FuzzyOptionsBuilder::setUnicodeAware, new ParseField("unicode_aware"));
+ FUZZY_PARSER.declareInt(CompletionSuggestionBuilder.FuzzyOptionsBuilder::setFuzzyPrefixLength, new ParseField("prefix_length"));
+ FUZZY_PARSER.declareBoolean(CompletionSuggestionBuilder.FuzzyOptionsBuilder::setTranspositions, new ParseField("transpositions"));
+ FUZZY_PARSER.declareValue((a, b) -> {
+ try {
+ a.setFuzziness(Fuzziness.parse(b).asDistance());
+ } catch (IOException e) {
+ throw new ElasticsearchException(e);
+ }
+ }, new ParseField("fuzziness"));
+ REGEXP_PARSER.declareInt(CompletionSuggestionBuilder.RegexOptionsBuilder::setMaxDeterminizedStates, new ParseField("max_determinized_states"));
+ REGEXP_PARSER.declareStringOrNull(CompletionSuggestionBuilder.RegexOptionsBuilder::setFlags, new ParseField("flags"));
+
+ TLP_PARSER.declareStringArray(CompletionSuggestionContext::setPayloadFields, new ParseField("payload"));
+ TLP_PARSER.declareObjectOrDefault(CompletionSuggestionContext::setFuzzyOptionsBuilder, FUZZY_PARSER, CompletionSuggestionBuilder.FuzzyOptionsBuilder::new, new ParseField("fuzzy"));
+ TLP_PARSER.declareObject(CompletionSuggestionContext::setRegexOptionsBuilder, REGEXP_PARSER, new ParseField("regexp"));
+ TLP_PARSER.declareString(SuggestionSearchContext.SuggestionContext::setField, new ParseField("field"));
+ TLP_PARSER.declareField((p, v, c) -> {
+ String analyzerName = p.text();
+ Analyzer analyzer = c.mapperService.analysisService().analyzer(analyzerName);
+ if (analyzer == null) {
+ throw new IllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
+ }
+ v.setAnalyzer(analyzer);
+ }, new ParseField("analyzer"), ObjectParser.ValueType.STRING);
+ TLP_PARSER.declareString(SuggestionSearchContext.SuggestionContext::setField, new ParseField("analyzer"));
+ TLP_PARSER.declareInt(SuggestionSearchContext.SuggestionContext::setSize, new ParseField("size"));
+ TLP_PARSER.declareInt(SuggestionSearchContext.SuggestionContext::setShardSize, new ParseField("size"));
+ TLP_PARSER.declareField((p, v, c) -> {
+ // Copy the current structure. We will parse, once the mapping is provided
+ XContentBuilder builder = XContentFactory.contentBuilder(p.contentType());
+ builder.copyCurrentStructure(p);
+ BytesReference bytes = builder.bytes();
+ c.contextParser = XContentFactory.xContent(bytes).createParser(bytes);
+ p.skipChildren();
+ }, new ParseField("contexts", "context"), ObjectParser.ValueType.OBJECT); // context is deprecated
+ }
+
+ private static class ContextAndSuggest {
+ XContentParser contextParser;
+ final MapperService mapperService;
+
+ ContextAndSuggest(MapperService mapperService) {
+ this.mapperService = mapperService;
+ }
+ }
+
+ private final CompletionSuggester completionSuggester;
public CompletionSuggestParser(CompletionSuggester completionSuggester) {
this.completionSuggester = completionSuggester;
}
@Override
- public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService, HasContextAndHeaders headersContext) throws IOException {
- XContentParser.Token token;
- ParseFieldMatcher parseFieldMatcher = mapperService.getIndexSettings().getParseFieldMatcher();
- String fieldName = null;
- CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester);
-
- XContentParser contextParser = null;
-
- while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
- if (token == XContentParser.Token.FIELD_NAME) {
- fieldName = parser.currentName();
- } else if (token.isValue()) {
- if (!parseSuggestContext(parser, mapperService, fieldName, suggestion, parseFieldMatcher)) {
- if (token == XContentParser.Token.VALUE_BOOLEAN && "fuzzy".equals(fieldName)) {
- suggestion.setFuzzy(parser.booleanValue());
- }
- }
- } else if (token == XContentParser.Token.START_OBJECT) {
- if("fuzzy".equals(fieldName)) {
- suggestion.setFuzzy(true);
- String fuzzyConfigName = null;
- while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
- if (token == XContentParser.Token.FIELD_NAME) {
- fuzzyConfigName = parser.currentName();
- } else if (token.isValue()) {
- if (parseFieldMatcher.match(fuzzyConfigName, Fuzziness.FIELD)) {
- suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance());
- } else if ("transpositions".equals(fuzzyConfigName)) {
- suggestion.setFuzzyTranspositions(parser.booleanValue());
- } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) {
- suggestion.setFuzzyMinLength(parser.intValue());
- } else if ("prefix_length".equals(fuzzyConfigName) || "prefixLength".equals(fuzzyConfigName)) {
- suggestion.setFuzzyPrefixLength(parser.intValue());
- } else if ("unicode_aware".equals(fuzzyConfigName) || "unicodeAware".equals(fuzzyConfigName)) {
- suggestion.setFuzzyUnicodeAware(parser.booleanValue());
- }
- }
+ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService, IndexFieldDataService fieldDataService,
+ HasContextAndHeaders headersContext) throws IOException {
+ final CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester, mapperService, fieldDataService);
+ final ContextAndSuggest contextAndSuggest = new ContextAndSuggest(mapperService);
+ TLP_PARSER.parse(parser, suggestion, contextAndSuggest);
+ final XContentParser contextParser = contextAndSuggest.contextParser;
+ MappedFieldType mappedFieldType = mapperService.smartNameFieldType(suggestion.getField());
+ if (mappedFieldType == null) {
+ throw new ElasticsearchException("Field [" + suggestion.getField() + "] is not a completion suggest field");
+ } else if (mappedFieldType instanceof CompletionFieldMapper.CompletionFieldType) {
+ CompletionFieldMapper.CompletionFieldType type = (CompletionFieldMapper.CompletionFieldType) mappedFieldType;
+ if (type.hasContextMappings() == false && contextParser != null) {
+ throw new IllegalArgumentException("suggester [" + type.names().fullName() + "] doesn't expect any context");
+ }
+ Map> queryContexts = Collections.emptyMap();
+ if (type.hasContextMappings() && contextParser != null) {
+ ContextMappings contextMappings = type.getContextMappings();
+ contextParser.nextToken();
+ queryContexts = new HashMap<>(contextMappings.size());
+ assert contextParser.currentToken() == XContentParser.Token.START_OBJECT;
+ XContentParser.Token currentToken;
+ String currentFieldName;
+ while ((currentToken = contextParser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (currentToken == XContentParser.Token.FIELD_NAME) {
+ currentFieldName = contextParser.currentName();
+ final ContextMapping mapping = contextMappings.get(currentFieldName);
+ queryContexts.put(currentFieldName, mapping.parseQueryContext(contextParser));
}
- } else if("context".equals(fieldName)) {
- // Copy the current structure. We will parse, once the mapping is provided
- XContentBuilder builder = XContentFactory.contentBuilder(parser.contentType());
- builder.copyCurrentStructure(parser);
- BytesReference bytes = builder.bytes();
- contextParser = parser.contentType().xContent().createParser(bytes);
- } else {
- throw new IllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]");
}
- } else {
- throw new IllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]");
+ contextParser.close();
}
+ suggestion.setFieldType(type);
+ suggestion.setQueryContexts(queryContexts);
+ return suggestion;
+ } else {
+ throw new IllegalArgumentException("Field [" + suggestion.getField() + "] is not a completion suggest field");
}
+ }
- suggestion.fieldType((CompletionFieldMapper.CompletionFieldType) mapperService.smartNameFieldType(suggestion.getField()));
- CompletionFieldMapper.CompletionFieldType fieldType = suggestion.fieldType();
- if (fieldType != null) {
- if (fieldType.requiresContext()) {
- if (contextParser == null) {
- throw new IllegalArgumentException("suggester [completion] requires context to be setup");
- } else {
- contextParser.nextToken();
- List contextQueries = ContextQuery.parseQueries(fieldType.getContextMapping(), contextParser);
- suggestion.setContextQuery(contextQueries);
- }
- } else if (contextParser != null) {
- throw new IllegalArgumentException("suggester [completion] doesn't expect any context");
- }
- }
- return suggestion;
- }
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
index b3e5e2dc2a59e..106672ae7aee7 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
@@ -18,97 +18,242 @@
*/
package org.elasticsearch.search.suggest.completion;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.search.BulkScorer;
+import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.util.CharsRefBuilder;
-import org.apache.lucene.util.CollectionUtil;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.common.bytes.BytesArray;
+import org.apache.lucene.search.suggest.document.CompletionQuery;
+import org.apache.lucene.search.suggest.document.TopSuggestDocs;
+import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.text.StringText;
+import org.elasticsearch.index.fielddata.AtomicFieldData;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.Suggester;
-import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
public class CompletionSuggester extends Suggester {
- private static final ScoreComparator scoreComparator = new ScoreComparator();
-
+ public SuggestContextParser getContextParser() {
+ return new CompletionSuggestParser(this);
+ }
@Override
protected Suggest.Suggestion extends Suggest.Suggestion.Entry extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
- CompletionSuggestionContext suggestionContext, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
- if (suggestionContext.fieldType() == null) {
- throw new ElasticsearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
+ final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
+ final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
+ if (fieldType == null) {
+ throw new IllegalArgumentException("field [" + suggestionContext.getField() + "] is not a completion field");
}
- final IndexReader indexReader = searcher.getIndexReader();
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
spare.copyUTF8Bytes(suggestionContext.getText());
-
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
completionSuggestion.addTerm(completionSuggestEntry);
+ TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
+ suggest(searcher, suggestionContext.toQuery(), collector);
+ int numResult = 0;
+ List leaves = searcher.getIndexReader().leaves();
+ for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
+ TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc;
+ // collect contexts
+ Map> contexts = Collections.emptyMap();
+ if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) {
+ contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts());
+ }
+ // collect payloads
+ final Map> payload = new HashMap<>(0);
+ Set payloadFields = suggestionContext.getPayloadFields();
+ if (payloadFields.isEmpty() == false) {
+ final int readerIndex = ReaderUtil.subIndex(suggestDoc.doc, leaves);
+ final LeafReaderContext subReaderContext = leaves.get(readerIndex);
+ final int subDocId = suggestDoc.doc - subReaderContext.docBase;
+ for (String field : payloadFields) {
+ MappedFieldType payloadFieldType = suggestionContext.getMapperService().smartNameFieldType(field);
+ if (payloadFieldType != null) {
+ final AtomicFieldData data = suggestionContext.getIndexFieldDataService().getForField(payloadFieldType).load(subReaderContext);
+ final ScriptDocValues scriptValues = data.getScriptValues();
+ scriptValues.setNextDocId(subDocId);
+ payload.put(field, new ArrayList<>(scriptValues.getValues()));
+ } else {
+ throw new IllegalArgumentException("payload field [" + field + "] does not exist");
+ }
+ }
+ }
+ if (numResult++ < suggestionContext.getSize()) {
+ CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(
+ new StringText(suggestDoc.key.toString()), suggestDoc.score, contexts, payload);
+ completionSuggestEntry.addOption(option);
+ } else {
+ break;
+ }
+ }
+ return completionSuggestion;
+ }
- String fieldName = suggestionContext.getField();
- Map results = new HashMap<>(indexReader.leaves().size() * suggestionContext.getSize());
- for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
- LeafReader atomicReader = atomicReaderContext.reader();
- Terms terms = atomicReader.fields().terms(fieldName);
- if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
- final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
- final Lookup lookup = lookupTerms.getLookup(suggestionContext.fieldType(), suggestionContext);
- if (lookup == null) {
- // we don't have a lookup for this segment.. this might be possible if a merge dropped all
- // docs from the segment that had a value in this segment.
- continue;
+ private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSuggestDocsCollector collector) throws IOException {
+ query = (CompletionQuery) query.rewrite(searcher.getIndexReader());
+ Weight weight = query.createWeight(searcher, collector.needsScores());
+ for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
+ BulkScorer scorer = weight.bulkScorer(context);
+ if (scorer != null) {
+ try {
+ scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs());
+ } catch (CollectionTerminatedException e) {
+ // collection was terminated prematurely
+ // continue with the following leaf
+ }
+ }
+ }
+ }
+
+ // TODO: this should be refactored and moved to lucene
+ // see https://issues.apache.org/jira/browse/LUCENE-6880
+ private final static class TopDocumentsCollector extends TopSuggestDocsCollector {
+
+ /**
+ * Holds a list of suggest meta data for a doc
+ */
+ private final static class SuggestDoc extends TopSuggestDocs.SuggestScoreDoc {
+
+ private List suggestScoreDocs;
+
+ public SuggestDoc(int doc, CharSequence key, CharSequence context, float score) {
+ super(doc, key, context, score);
+ }
+
+ void add(CharSequence key, CharSequence context, float score) {
+ if (suggestScoreDocs == null) {
+ suggestScoreDocs = new ArrayList<>(1);
+ }
+ suggestScoreDocs.add(new TopSuggestDocs.SuggestScoreDoc(doc, key, context, score));
+ }
+
+ public List getKeys() {
+ if (suggestScoreDocs == null) {
+ return Collections.singletonList(key);
+ } else {
+ List keys = new ArrayList<>(suggestScoreDocs.size() + 1);
+ keys.add(key);
+ for (TopSuggestDocs.SuggestScoreDoc scoreDoc : suggestScoreDocs) {
+ keys.add(scoreDoc.key);
+ }
+ return keys;
+ }
+ }
+
+ public List getContexts() {
+ if (suggestScoreDocs == null) {
+ if (context != null) {
+ return Collections.singletonList(context);
+ } else {
+ return Collections.emptyList();
+ }
+ } else {
+ List contexts = new ArrayList<>(suggestScoreDocs.size() + 1);
+ contexts.add(context);
+ for (TopSuggestDocs.SuggestScoreDoc scoreDoc : suggestScoreDocs) {
+ contexts.add(scoreDoc.context);
+ }
+ return contexts;
}
- List lookupResults = lookup.lookup(spare.get(), false, suggestionContext.getSize());
- for (Lookup.LookupResult res : lookupResults) {
-
- final String key = res.key.toString();
- final float score = res.value;
- final Option value = results.get(key);
- if (value == null) {
- final Option option = new CompletionSuggestion.Entry.Option(new StringText(key), score, res.payload == null ? null
- : new BytesArray(res.payload));
- results.put(key, option);
- } else if (value.getScore() < score) {
- value.setScore(score);
- value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
+ }
+ }
+
+ private final static class SuggestDocPriorityQueue extends PriorityQueue {
+
+ public SuggestDocPriorityQueue(int maxSize) {
+ super(maxSize);
+ }
+
+ @Override
+ protected boolean lessThan(SuggestDoc a, SuggestDoc b) {
+ if (a.score == b.score) {
+ int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
+ if (cmp == 0) {
+ // prefer smaller doc id, in case of a tie
+ return a.doc > b.doc;
+ } else {
+ return cmp > 0;
}
}
+ return a.score < b.score;
+ }
+
+ public SuggestDoc[] getResults() {
+ int size = size();
+ SuggestDoc[] res = new SuggestDoc[size];
+ for (int i = size - 1; i >= 0; i--) {
+ res[i] = pop();
+ }
+ return res;
}
}
- final List options = new ArrayList<>(results.values());
- CollectionUtil.introSort(options, scoreComparator);
- int optionCount = Math.min(suggestionContext.getSize(), options.size());
- for (int i = 0 ; i < optionCount ; i++) {
- completionSuggestEntry.addOption(options.get(i));
+ private final int num;
+ private final SuggestDocPriorityQueue pq;
+ private final Map scoreDocMap;
+
+ public TopDocumentsCollector(int num) {
+ super(1); // TODO hack, we don't use the underlying pq, so we allocate a size of 1
+ this.num = num;
+ this.scoreDocMap = new LinkedHashMap<>(num);
+ this.pq = new SuggestDocPriorityQueue(num);
}
- return completionSuggestion;
- }
+ @Override
+ public int getCountToCollect() {
+ // This is only needed because we initialize
+ // the base class with 1 instead of the actual num
+ return num;
+ }
- @Override
- public SuggestContextParser getContextParser() {
- return new CompletionSuggestParser(this);
- }
- public static class ScoreComparator implements Comparator {
@Override
- public int compare(Option o1, Option o2) {
- return Float.compare(o2.getScore(), o1.getScore());
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ super.doSetNextReader(context);
+ updateResults();
+ }
+
+ private void updateResults() {
+ for (SuggestDoc suggestDoc : scoreDocMap.values()) {
+ if (pq.insertWithOverflow(suggestDoc) == suggestDoc) {
+ break;
+ }
+ }
+ scoreDocMap.clear();
+ }
+
+ @Override
+ public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
+ if (scoreDocMap.containsKey(docID)) {
+ SuggestDoc suggestDoc = scoreDocMap.get(docID);
+ suggestDoc.add(key, context, score);
+ } else if (scoreDocMap.size() <= num) {
+ scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score));
+ } else {
+ throw new CollectionTerminatedException();
+ }
+ }
+
+ @Override
+ public TopSuggestDocs get() throws IOException {
+ updateResults(); // to empty the last set of collected suggest docs
+ TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults();
+ if (suggestScoreDocs.length > 0) {
+ return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
+ } else {
+ return TopSuggestDocs.EMPTY;
+ }
}
}
}
diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java
index 83515ff74f3f3..66c21c5816231 100644
--- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java
+++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestion.java
@@ -18,23 +18,37 @@
*/
package org.elasticsearch.search.suggest.completion;
-import org.elasticsearch.common.bytes.BytesReference;
+import org.apache.lucene.search.suggest.Lookup;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.search.suggest.Suggest;
import java.io.IOException;
-import java.util.Map;
+import java.util.*;
/**
+ * Suggestion response for {@link CompletionSuggester} results
+ *
+ * Response format for each entry:
+ * {
+ * "text" : STRING
+ * "score" : FLOAT
+ * "contexts" : CONTEXTS
+ * }
+ *
+ * CONTEXTS : {
+ * "CONTEXT_NAME" : ARRAY,
+ * ..
+ * }
*
*/
-public class CompletionSuggestion extends Suggest.Suggestion {
+public final class CompletionSuggestion extends Suggest.Suggestion {
- public static final int TYPE = 2;
+ public static final int TYPE = 4;
public CompletionSuggestion() {
}
@@ -43,6 +57,62 @@ public CompletionSuggestion(String name, int size) {
super(name, size);
}
+ private static final class OptionPriorityQueue extends org.apache.lucene.util.PriorityQueue {
+
+ private final Comparator comparator;
+
+ OptionPriorityQueue(int maxSize, Comparator comparator) {
+ super(maxSize);
+ this.comparator = comparator;
+ }
+
+ @Override
+ protected boolean lessThan(Entry.Option a, Entry.Option b) {
+ int cmp = comparator.compare(a, b);
+ if (cmp != 0) {
+ return cmp > 0;
+ }
+ return Lookup.CHARSEQUENCE_COMPARATOR.compare(a.getText().string(), b.getText().string()) > 0;
+ }
+
+ Entry.Option[] get() {
+ int size = size();
+ Entry.Option[] results = new Entry.Option[size];
+ for (int i = size - 1; i >= 0; i--) {
+ results[i] = pop();
+ }
+ return results;
+ }
+ }
+
+ @Override
+ public Suggest.Suggestion reduce(List> toReduce) {
+ if (toReduce.size() == 1) {
+ return toReduce.get(0);
+ } else {
+ // combine suggestion entries from participating shards on the coordinating node
+ // the global top size entries are collected from the shard results
+ // using a priority queue
+ Comparator optionComparator = sortComparator();
+ OptionPriorityQueue priorityQueue = new OptionPriorityQueue(size, sortComparator());
+ for (Suggest.Suggestion entries : toReduce) {
+ assert entries.getEntries().size() == 1 : "CompletionSuggestion must have only one entry";
+ for (Entry.Option option : entries.getEntries().get(0)) {
+ if (option == priorityQueue.insertWithOverflow(option)) {
+ // if the current option has overflown from pq,
+ // we can assume all of the successive options
+ // from this shard result will be overflown as well
+ break;
+ }
+ }
+ }
+ Entry options = this.entries.get(0);
+ options.getOptions().clear();
+ Collections.addAll(options.getOptions(), priorityQueue.get());
+ return this;
+ }
+ }
+
@Override
public int getType() {
return TYPE;
@@ -53,7 +123,7 @@ protected Entry newEntry() {
return new Entry();
}
- public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry {
+ public final static class Entry extends Suggest.Suggestion.Entry {
public Entry(Text text, int offset, int length) {
super(text, offset, length);
@@ -68,41 +138,33 @@ protected Option newOption() {
return new Option();
}
- public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option {
- private BytesReference payload;
+ public static class Option extends Suggest.Suggestion.Entry.Option {
+ private Map> contexts;
+ private Map> payload;
- public Option(Text text, float score, BytesReference payload) {
+ public Option(Text text, float score, Map> contexts, Map> payload) {
super(text, score);
this.payload = payload;
+ this.contexts = contexts;
}
-
protected Option() {
super();
}
- public void setPayload(BytesReference payload) {
- this.payload = payload;
+ @Override
+ protected void mergeInto(Suggest.Suggestion.Entry.Option otherOption) {
+ // Completion suggestions are reduced by
+ // org.elasticsearch.search.suggest.completion.CompletionSuggestion.reduce()
+ throw new UnsupportedOperationException();
}
- public BytesReference getPayload() {
+ public Map> getPayload() {
return payload;
}
- public String getPayloadAsString() {
- return payload.toUtf8();
- }
-
- public long getPayloadAsLong() {
- return Long.parseLong(payload.toUtf8());
- }
-
- public double getPayloadAsDouble() {
- return Double.parseDouble(payload.toUtf8());
- }
-
- public Map getPayloadAsMap() {
- return XContentHelper.convertToMap(payload, false).v2();
+ public Map> getContexts() {
+ return contexts;
}
@Override
@@ -113,8 +175,27 @@ public void setScore(float score) {
@Override
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
super.innerToXContent(builder, params);
- if (payload != null && payload.length() > 0) {
- builder.rawField("payload", payload);
+ if (payload.size() > 0) {
+ builder.startObject("payload");
+ for (Map.Entry> entry : payload.entrySet()) {
+ builder.startArray(entry.getKey());
+ for (Object payload : entry.getValue()) {
+ builder.value(payload);
+ }
+ builder.endArray();
+ }
+ builder.endObject();
+ }
+ if (contexts.size() > 0) {
+ builder.startObject("contexts");
+ for (Map.Entry> entry : contexts.entrySet()) {
+ builder.startArray(entry.getKey());
+ for (CharSequence context : entry.getValue()) {
+ builder.value(context.toString());
+ }
+ builder.endArray();
+ }
+ builder.endObject();
}
return builder;
}
@@ -122,14 +203,78 @@ protected XContentBuilder innerToXContent(XContentBuilder builder, Params params
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
- payload = in.readBytesReference();
+ int payloadSize = in.readInt();
+ this.payload = new LinkedHashMap<>(payloadSize);
+ for (int i = 0; i < payloadSize; i++) {
+ String payloadName = in.readString();
+ int nValues = in.readVInt();
+ List