elastic · areek · Aug 1, 2015 · Mar 31, 2015 · Jul 13, 2015 · Jul 14, 2015
diff --git a/core/src/main/java/org/apache/lucene/search/suggest/xdocument/CompletionTokenStream.java b/core/src/main/java/org/apache/lucene/search/suggest/xdocument/CompletionTokenStream.java
@@ -27,7 +27,7 @@
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.XUtil;
+import org.apache.lucene.util.fst.Util;
 
 import java.io.IOException;
 import java.util.BitSet;
@@ -107,7 +107,7 @@ public boolean incrementToken() throws IOException {
        * produced. Multi Fields have the same surface form and therefore sum up
        */
       posInc = 0;
-      XUtil.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
+      Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
       if (charTermAttribute != null) {
         charTermAttribute.setLength(0);
         charTermAttribute.append(bytesAtt.toUTF16());

diff --git a/core/src/main/java/org/apache/lucene/search/suggest/xdocument/CompletionWeight.java b/core/src/main/java/org/apache/lucene/search/suggest/xdocument/CompletionWeight.java
@@ -27,15 +27,14 @@
 import org.apache.lucene.util.automaton.Automaton;
 
 import java.io.IOException;
-import java.util.List;
 import java.util.Set;
 
 /**
  * Expert: the Weight for CompletionQuery, used to
  * score and explain these queries.
  *
  * Subclasses can override {@link #setNextMatch(IntsRef)},
- * {@link #boost()} and {@link #contexts()}
+ * {@link #boost()} and {@link #context()}
  * to calculate the boost and extract the context of
  * a matched path prefix.
  *
@@ -103,7 +102,7 @@ public BulkScorer bulkScorer(final LeafReaderContext context, Bits acceptDocs) t
    * Set for every partial path in the index that matched the query
    * automaton.
    *
-   * Subclasses should override {@link #boost()} and {@link #contexts()}
+   * Subclasses should override {@link #boost()} and {@link #context()}
    * to return an appropriate value with respect to the current pathPrefix.
    *
    * @param pathPrefix the prefix of a matched path
@@ -125,7 +124,7 @@ protected float boost() {
    *
    * @return suggestion context
    */
-  protected List<CharSequence> contexts() {
+  protected CharSequence context() {
     return null;
   }
 

diff --git a/core/src/main/java/org/apache/lucene/search/suggest/xdocument/ContextQuery.java b/core/src/main/java/org/apache/lucene/search/suggest/xdocument/ContextQuery.java
@@ -26,8 +26,7 @@
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-import org.apache.lucene.util.fst.XUtil;
+import org.apache.lucene.util.fst.Util;
 
 import java.io.IOException;
 import java.util.*;
@@ -56,25 +55,26 @@
  *    or {@link FuzzyCompletionQuery} query.
  *   </li>
  *   <li>
- *     To suggest across all contexts with the same boost,
- *     use '*' as the context in {@link #addContext(CharSequence)})}.
- *     This can be combined with specific contexts with different boosts.
+ *     To suggest across all contexts, use {@link #addAllContexts()}.
+ *     When no context is added, the default behaviour is to suggest across
+ *     all contexts.
  *   </li>
  *   <li>
  *     To apply the same boost to multiple contexts sharing the same prefix,
  *     Use {@link #addContext(CharSequence, float, boolean)} with the common
  *     context prefix, boost and set <code>exact</code> to false.
  *   <li>
- *     Using this query against a {@link SuggestField} (not context enabled),
+ *     Using this query against a {@link org.apache.lucene.search.suggest.document.SuggestField} (not context enabled),
  *     would yield results ignoring any context filtering/boosting
  *   </li>
  * </ul>
  *
  * @lucene.experimental
  */
 public class ContextQuery extends CompletionQuery {
-  protected Map<CharSequence, ContextMetaData> contexts;
-  protected boolean matchAllContexts = false;
+  private IntsRefBuilder scratch = new IntsRefBuilder();
+  private Map<IntsRef, ContextMetaData> contexts;
+  private boolean matchAllContexts = false;
   /** Inner completion query */
   protected CompletionQuery innerQuery;
 
@@ -85,14 +85,13 @@ public class ContextQuery extends CompletionQuery {
    * Use {@link #addContext(CharSequence, float, boolean)}
    * to add context(s) with boost
    */
-  public ContextQuery(CompletionQuery innerQuery) {
-    super(innerQuery.getTerm(), innerQuery.getFilter());
-    /*
+  public ContextQuery(CompletionQuery query) {
+    super(query.getTerm(), query.getFilter());
     if (query instanceof ContextQuery) {
       throw new IllegalArgumentException("'query' parameter must not be of type "
               + this.getClass().getSimpleName());
-    }*/
-    this.innerQuery = innerQuery;
+    }
+    this.innerQuery = query;
     contexts = new HashMap<>();
   }
 
@@ -121,51 +120,31 @@ public void addContext(CharSequence context, float boost, boolean exact) {
     for (int i = 0; i < context.length(); i++) {
       if (ContextSuggestField.CONTEXT_SEPARATOR == context.charAt(i)) {
         throw new IllegalArgumentException("Illegal value [" + context + "] UTF-16 codepoint [0x"
-            + Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
+                + Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
       }
     }
-    contexts.put(context, new ContextMetaData(boost, exact));
+    contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact));
   }
 
+  /**
+   * Add all contexts with a boost of 1f
+   */
   public void addAllContexts() {
     matchAllContexts = true;
   }
 
-  protected Automaton contextAutomaton() {
-    final Automaton matchAllAutomaton = new RegExp(".*").toAutomaton();
-    final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
-    if (matchAllContexts || contexts.size() == 0) {
-      return Operations.concatenate(matchAllAutomaton, sep);
-    } else {
-      Automaton contextsAutomaton = null;
-      for (Map.Entry<CharSequence, ContextMetaData> entry : contexts.entrySet()) {
-        final ContextMetaData contextMetaData = entry.getValue();
-        Automaton contextAutomaton = Automata.makeString(entry.getKey().toString());
-        if (contextMetaData.exact == false) {
-          contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
-        }
-        contextAutomaton = Operations.concatenate(contextAutomaton, sep);
-        if (contextsAutomaton == null) {
-          contextsAutomaton = contextAutomaton;
-        } else {
-          contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
-        }
-      }
-      return contextsAutomaton;
-    }
-  }
-
   @Override
   public String toString(String field) {
     StringBuilder buffer = new StringBuilder();
-    for (CharSequence context : contexts.keySet()) {
+    BytesRefBuilder scratch = new BytesRefBuilder();
+    for (IntsRef context : contexts.keySet()) {
       if (buffer.length() != 0) {
         buffer.append(",");
       } else {
         buffer.append("contexts");
         buffer.append(":[");
       }
-      buffer.append(context);
+      buffer.append(Util.toBytesRef(context, scratch).utf8ToString());
       ContextMetaData metaData = contexts.get(context);
       if (metaData.exact == false) {
         buffer.append("*");
@@ -185,27 +164,20 @@ public String toString(String field) {
   @Override
   public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
     final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores));
-    Automaton contextsAutomaton;
-    if (innerQuery instanceof ContextQuery) {
-      contextsAutomaton = Operations.concatenate(contextAutomaton(), innerWeight.getAutomaton());
-    } else {
-      // if separators are preserved the fst contains a SEP_LABEL
-      // behind each gap. To have a matching automaton, we need to
-      // include the SEP_LABEL in the query as well
-      Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
-      Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
-      contextsAutomaton = Operations.concatenate(contextAutomaton(), prefixAutomaton);
-    }
+    // if separators are preserved the fst contains a SEP_LABEL
+    // behind each gap. To have a matching automaton, we need to
+    // include the SEP_LABEL in the query as well
+    Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
+    Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
+    Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
     contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
 
     final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
     final TreeSet<Integer> contextLengths = new TreeSet<>();
-    IntsRefBuilder scratch = new IntsRefBuilder();
-    for (Map.Entry<CharSequence, ContextMetaData> entry : contexts.entrySet()) {
-      BytesRef ref = new BytesRef(entry.getKey());
+    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
       ContextMetaData contextMetaData = entry.getValue();
-      contextMap.put(IntsRef.deepCopyOf(XUtil.toIntsRef(ref, scratch)), contextMetaData.boost);
-      contextLengths.add(scratch.length());
+      contextMap.put(entry.getKey(), contextMetaData.boost);
+      contextLengths.add(entry.getKey().length);
     }
     int[] contextLengthArray = new int[contextLengths.size()];
     final Iterator<Integer> iterator = contextLengths.descendingIterator();
@@ -215,8 +187,47 @@ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws I
     return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
   }
 
+  private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
+    final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
+    final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
+    if (matchAllContexts || contexts.size() == 0) {
+      return Operations.concatenate(matchAllAutomaton, sep);
+    } else {
+      Automaton contextsAutomaton = null;
+      for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
+        final ContextMetaData contextMetaData = entry.getValue();
+        final IntsRef ref = entry.getKey();
+        Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
+        if (contextMetaData.exact == false) {
+          contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
+        }
+        contextAutomaton = Operations.concatenate(contextAutomaton, sep);
+        if (contextsAutomaton == null) {
+          contextsAutomaton = contextAutomaton;
+        } else {
+          contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
+        }
+      }
+      return contextsAutomaton;
+    }
+  }
+
+  /**
+   * Holder for context value meta data
+   */
   private static class ContextMetaData {
+
+    /**
+     * Boost associated with a
+     * context value
+     */
     private final float boost;
+
+    /**
+     * flag to indicate whether the context
+     * value should be treated as an exact
+     * value or a context prefix
+     */
     private final boolean exact;
 
     private ContextMetaData(float boost, boolean exact) {
@@ -274,8 +285,7 @@ private void setInnerWeight(IntsRef ref, int offset) {
         if (ref.ints[ref.offset + i] == ContextSuggestField.CONTEXT_SEPARATOR) {
           if (i > 0) {
             refBuilder.copyInts(ref.ints, ref.offset, i);
-            currentContext = XUtil.toBytesRef(refBuilder.get(), scratch).utf8ToString();
-            refBuilder.clear();
+            currentContext = Util.toBytesRef(refBuilder.get(), scratch).utf8ToString();
           } else {
             currentContext = null;
           }
@@ -294,13 +304,8 @@ private void setInnerWeight(IntsRef ref, int offset) {
     }
 
     @Override
-    protected List<CharSequence> contexts() {
-      final List<CharSequence> contexts = new ArrayList<>();
-      contexts.add(currentContext);
-      if (innerWeight instanceof ContextCompletionWeight) {
-        contexts.addAll(innerWeight.contexts());
-      }
-      return contexts;
+    protected CharSequence context() {
+      return currentContext;
     }
 
     @Override

diff --git a/core/src/main/java/org/apache/lucene/search/suggest/xdocument/ContextSuggestField.java b/core/src/main/java/org/apache/lucene/search/suggest/xdocument/ContextSuggestField.java
@@ -23,6 +23,7 @@
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
@@ -70,31 +71,31 @@ public ContextSuggestField(String name, String value, int weight, CharSequence..
     validate(value);
     this.contexts = new HashSet<>((contexts != null) ? contexts.length : 0);
     if (contexts != null) {
-      for (CharSequence context : contexts) {
-        validate(context);
-        this.contexts.add(context);
-      }
+      Collections.addAll(this.contexts, contexts);
     }
   }
 
   /**
-   * Sub-classes can inject contexts at
-   * index-time by overriding
+   * Expert: Sub-classes can inject contexts at
+   * index-time
    */
-  protected Set<CharSequence> contexts() {
+  protected Iterable<CharSequence> contexts() {
     return contexts;
   }
 
   @Override
   protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
-    CompletionTokenStream completionTokenStream;
+    for (CharSequence context : contexts()) {
+      validate(context);
+    }
     PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts());
+    CompletionTokenStream completionTokenStream;
     if (stream instanceof CompletionTokenStream) {
       completionTokenStream = (CompletionTokenStream) stream;
       completionTokenStream = new CompletionTokenStream(prefixTokenFilter,
-          completionTokenStream.preserveSep,
-          completionTokenStream.preservePositionIncrements,
-          completionTokenStream.maxGraphExpansions);
+              completionTokenStream.preserveSep,
+              completionTokenStream.preservePositionIncrements,
+              completionTokenStream.maxGraphExpansions);
     } else {
       completionTokenStream = new CompletionTokenStream(prefixTokenFilter);
     }
@@ -161,11 +162,11 @@ public void reset() throws IOException {
     }
   }
 
-  protected void validate(final CharSequence value) {
+  private void validate(final CharSequence value) {
     for (int i = 0; i < value.length(); i++) {
       if (CONTEXT_SEPARATOR == value.charAt(i)) {
         throw new IllegalArgumentException("Illegal value [" + value + "] UTF-16 codepoint [0x"
-            + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character");
+                + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character");
       }
     }
   }

diff --git a/core/src/main/java/org/apache/lucene/search/suggest/xdocument/NRTSuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/xdocument/NRTSuggester.java
@@ -123,7 +123,7 @@ public Collection<Accountable> getChildResources() {
    * the matched partial paths. Upon reaching a completed path, {@link CompletionScorer#accept(int)}
    * and {@link CompletionScorer#score(float, float)} is used on the document id, index weight
    * and query boost to filter and score the entry, before being collected via
-   * {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence[], float)}
+   * {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}
    */
   public void lookup(final CompletionScorer scorer, final TopSuggestDocsCollector collector) throws IOException {
     final double liveDocsRatio = calculateLiveDocRatio(scorer.reader.numDocs(), scorer.reader.maxDoc());
@@ -148,7 +148,7 @@ protected boolean acceptResult(XUtil.FSTPath<Pair<Long, BytesRef>> path) {
         }
         try {
           float score = scorer.score(decode(path.cost.output1), path.boost);
-          collector.collect(docID, spare.toCharsRef(), path.contexts, score);
+          collector.collect(docID, spare.toCharsRef(), path.context, score);
           return true;
         } catch (IOException e) {
           throw new RuntimeException(e);
@@ -158,14 +158,7 @@ protected boolean acceptResult(XUtil.FSTPath<Pair<Long, BytesRef>> path) {
 
     for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
       scorer.weight.setNextMatch(path.input.get());
-      List<CharSequence> contexts = scorer.weight.contexts();
-      final CharSequence[] contextArray;
-      if (contexts != null) {
-        contextArray = contexts.toArray(new CharSequence[contexts.size()]);
-      } else {
-        contextArray = null;
-      }
-      searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(), contextArray);
+      searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(), scorer.weight.context());
     }
     // hits are also returned by search()
     // we do not use it, instead collect at acceptResult