Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.XUtil;
import org.apache.lucene.util.fst.Util;

import java.io.IOException;
import java.util.BitSet;
Expand Down Expand Up @@ -107,7 +107,7 @@ public boolean incrementToken() throws IOException {
* produced. Multi Fields have the same surface form and therefore sum up
*/
posInc = 0;
XUtil.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@
import org.apache.lucene.util.automaton.Automaton;

import java.io.IOException;
import java.util.List;
import java.util.Set;

/**
* Expert: the Weight for CompletionQuery, used to
* score and explain these queries.
*
* Subclasses can override {@link #setNextMatch(IntsRef)},
* {@link #boost()} and {@link #contexts()}
* {@link #boost()} and {@link #context()}
* to calculate the boost and extract the context of
* a matched path prefix.
*
Expand Down Expand Up @@ -103,7 +102,7 @@ public BulkScorer bulkScorer(final LeafReaderContext context, Bits acceptDocs) t
* Set for every partial path in the index that matched the query
* automaton.
*
* Subclasses should override {@link #boost()} and {@link #contexts()}
* Subclasses should override {@link #boost()} and {@link #context()}
* to return an appropriate value with respect to the current pathPrefix.
*
* @param pathPrefix the prefix of a matched path
Expand All @@ -125,7 +124,7 @@ protected float boost() {
*
* @return suggestion context
*/
protected List<CharSequence> contexts() {
protected CharSequence context() {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.fst.XUtil;
import org.apache.lucene.util.fst.Util;

import java.io.IOException;
import java.util.*;
Expand Down Expand Up @@ -56,25 +55,26 @@
* or {@link FuzzyCompletionQuery} query.
* </li>
* <li>
* To suggest across all contexts with the same boost,
* use '*' as the context in {@link #addContext(CharSequence)})}.
* This can be combined with specific contexts with different boosts.
* To suggest across all contexts, use {@link #addAllContexts()}.
* When no context is added, the default behaviour is to suggest across
* all contexts.
* </li>
* <li>
* To apply the same boost to multiple contexts sharing the same prefix,
* Use {@link #addContext(CharSequence, float, boolean)} with the common
* context prefix, boost and set <code>exact</code> to false.
* <li>
* Using this query against a {@link SuggestField} (not context enabled),
* Using this query against a {@link org.apache.lucene.search.suggest.document.SuggestField} (not context enabled),
* would yield results ignoring any context filtering/boosting
* </li>
* </ul>
*
* @lucene.experimental
*/
public class ContextQuery extends CompletionQuery {
protected Map<CharSequence, ContextMetaData> contexts;
protected boolean matchAllContexts = false;
private IntsRefBuilder scratch = new IntsRefBuilder();
private Map<IntsRef, ContextMetaData> contexts;
private boolean matchAllContexts = false;
/** Inner completion query */
protected CompletionQuery innerQuery;

Expand All @@ -85,14 +85,13 @@ public class ContextQuery extends CompletionQuery {
* Use {@link #addContext(CharSequence, float, boolean)}
* to add context(s) with boost
*/
public ContextQuery(CompletionQuery innerQuery) {
super(innerQuery.getTerm(), innerQuery.getFilter());
/*
public ContextQuery(CompletionQuery query) {
super(query.getTerm(), query.getFilter());
if (query instanceof ContextQuery) {
throw new IllegalArgumentException("'query' parameter must not be of type "
+ this.getClass().getSimpleName());
}*/
this.innerQuery = innerQuery;
}
this.innerQuery = query;
contexts = new HashMap<>();
}

Expand Down Expand Up @@ -121,51 +120,31 @@ public void addContext(CharSequence context, float boost, boolean exact) {
for (int i = 0; i < context.length(); i++) {
if (ContextSuggestField.CONTEXT_SEPARATOR == context.charAt(i)) {
throw new IllegalArgumentException("Illegal value [" + context + "] UTF-16 codepoint [0x"
+ Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
+ Integer.toHexString((int) context.charAt(i))+ "] at position " + i + " is a reserved character");
}
}
contexts.put(context, new ContextMetaData(boost, exact));
contexts.put(IntsRef.deepCopyOf(Util.toIntsRef(new BytesRef(context), scratch)), new ContextMetaData(boost, exact));
}

/**
* Add all contexts with a boost of 1f
*/
public void addAllContexts() {
matchAllContexts = true;
}

protected Automaton contextAutomaton() {
final Automaton matchAllAutomaton = new RegExp(".*").toAutomaton();
final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
if (matchAllContexts || contexts.size() == 0) {
return Operations.concatenate(matchAllAutomaton, sep);
} else {
Automaton contextsAutomaton = null;
for (Map.Entry<CharSequence, ContextMetaData> entry : contexts.entrySet()) {
final ContextMetaData contextMetaData = entry.getValue();
Automaton contextAutomaton = Automata.makeString(entry.getKey().toString());
if (contextMetaData.exact == false) {
contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
}
contextAutomaton = Operations.concatenate(contextAutomaton, sep);
if (contextsAutomaton == null) {
contextsAutomaton = contextAutomaton;
} else {
contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
}
}
return contextsAutomaton;
}
}

@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
for (CharSequence context : contexts.keySet()) {
BytesRefBuilder scratch = new BytesRefBuilder();
for (IntsRef context : contexts.keySet()) {
if (buffer.length() != 0) {
buffer.append(",");
} else {
buffer.append("contexts");
buffer.append(":[");
}
buffer.append(context);
buffer.append(Util.toBytesRef(context, scratch).utf8ToString());
ContextMetaData metaData = contexts.get(context);
if (metaData.exact == false) {
buffer.append("*");
Expand All @@ -185,27 +164,20 @@ public String toString(String field) {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, needsScores));
Automaton contextsAutomaton;
if (innerQuery instanceof ContextQuery) {
contextsAutomaton = Operations.concatenate(contextAutomaton(), innerWeight.getAutomaton());
} else {
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
contextsAutomaton = Operations.concatenate(contextAutomaton(), prefixAutomaton);
}
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
Automaton optionalSepLabel = Operations.optional(Automata.makeChar(CompletionAnalyzer.SEP_LABEL));
Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerWeight.getAutomaton());
Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
final TreeSet<Integer> contextLengths = new TreeSet<>();
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<CharSequence, ContextMetaData> entry : contexts.entrySet()) {
BytesRef ref = new BytesRef(entry.getKey());
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
ContextMetaData contextMetaData = entry.getValue();
contextMap.put(IntsRef.deepCopyOf(XUtil.toIntsRef(ref, scratch)), contextMetaData.boost);
contextLengths.add(scratch.length());
contextMap.put(entry.getKey(), contextMetaData.boost);
contextLengths.add(entry.getKey().length);
}
int[] contextLengthArray = new int[contextLengths.size()];
final Iterator<Integer> iterator = contextLengths.descendingIterator();
Expand All @@ -215,8 +187,47 @@ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws I
return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}

private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
if (matchAllContexts || contexts.size() == 0) {
return Operations.concatenate(matchAllAutomaton, sep);
} else {
Automaton contextsAutomaton = null;
for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
final ContextMetaData contextMetaData = entry.getValue();
final IntsRef ref = entry.getKey();
Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
if (contextMetaData.exact == false) {
contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
}
contextAutomaton = Operations.concatenate(contextAutomaton, sep);
if (contextsAutomaton == null) {
contextsAutomaton = contextAutomaton;
} else {
contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
}
}
return contextsAutomaton;
}
}

/**
* Holder for context value meta data
*/
private static class ContextMetaData {

/**
* Boost associated with a
* context value
*/
private final float boost;

/**
* flag to indicate whether the context
* value should be treated as an exact
* value or a context prefix
*/
private final boolean exact;

private ContextMetaData(float boost, boolean exact) {
Expand Down Expand Up @@ -274,8 +285,7 @@ private void setInnerWeight(IntsRef ref, int offset) {
if (ref.ints[ref.offset + i] == ContextSuggestField.CONTEXT_SEPARATOR) {
if (i > 0) {
refBuilder.copyInts(ref.ints, ref.offset, i);
currentContext = XUtil.toBytesRef(refBuilder.get(), scratch).utf8ToString();
refBuilder.clear();
currentContext = Util.toBytesRef(refBuilder.get(), scratch).utf8ToString();
} else {
currentContext = null;
}
Expand All @@ -294,13 +304,8 @@ private void setInnerWeight(IntsRef ref, int offset) {
}

@Override
protected List<CharSequence> contexts() {
final List<CharSequence> contexts = new ArrayList<>();
contexts.add(currentContext);
if (innerWeight instanceof ContextCompletionWeight) {
contexts.addAll(innerWeight.contexts());
}
return contexts;
protected CharSequence context() {
return currentContext;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
Expand Down Expand Up @@ -70,31 +71,31 @@ public ContextSuggestField(String name, String value, int weight, CharSequence..
validate(value);
this.contexts = new HashSet<>((contexts != null) ? contexts.length : 0);
if (contexts != null) {
for (CharSequence context : contexts) {
validate(context);
this.contexts.add(context);
}
Collections.addAll(this.contexts, contexts);
}
}

/**
* Sub-classes can inject contexts at
* index-time by overriding
* Expert: Sub-classes can inject contexts at
* index-time
*/
protected Set<CharSequence> contexts() {
protected Iterable<CharSequence> contexts() {
return contexts;
}

@Override
protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
CompletionTokenStream completionTokenStream;
for (CharSequence context : contexts()) {
validate(context);
}
PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts());
CompletionTokenStream completionTokenStream;
if (stream instanceof CompletionTokenStream) {
completionTokenStream = (CompletionTokenStream) stream;
completionTokenStream = new CompletionTokenStream(prefixTokenFilter,
completionTokenStream.preserveSep,
completionTokenStream.preservePositionIncrements,
completionTokenStream.maxGraphExpansions);
completionTokenStream.preserveSep,
completionTokenStream.preservePositionIncrements,
completionTokenStream.maxGraphExpansions);
} else {
completionTokenStream = new CompletionTokenStream(prefixTokenFilter);
}
Expand Down Expand Up @@ -161,11 +162,11 @@ public void reset() throws IOException {
}
}

protected void validate(final CharSequence value) {
private void validate(final CharSequence value) {
for (int i = 0; i < value.length(); i++) {
if (CONTEXT_SEPARATOR == value.charAt(i)) {
throw new IllegalArgumentException("Illegal value [" + value + "] UTF-16 codepoint [0x"
+ Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character");
+ Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character");
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public Collection<Accountable> getChildResources() {
* the matched partial paths. Upon reaching a completed path, {@link CompletionScorer#accept(int)}
* and {@link CompletionScorer#score(float, float)} is used on the document id, index weight
* and query boost to filter and score the entry, before being collected via
* {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence[], float)}
* {@link TopSuggestDocsCollector#collect(int, CharSequence, CharSequence, float)}
*/
public void lookup(final CompletionScorer scorer, final TopSuggestDocsCollector collector) throws IOException {
final double liveDocsRatio = calculateLiveDocRatio(scorer.reader.numDocs(), scorer.reader.maxDoc());
Expand All @@ -148,7 +148,7 @@ protected boolean acceptResult(XUtil.FSTPath<Pair<Long, BytesRef>> path) {
}
try {
float score = scorer.score(decode(path.cost.output1), path.boost);
collector.collect(docID, spare.toCharsRef(), path.contexts, score);
collector.collect(docID, spare.toCharsRef(), path.context, score);
return true;
} catch (IOException e) {
throw new RuntimeException(e);
Expand All @@ -158,14 +158,7 @@ protected boolean acceptResult(XUtil.FSTPath<Pair<Long, BytesRef>> path) {

for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
scorer.weight.setNextMatch(path.input.get());
List<CharSequence> contexts = scorer.weight.contexts();
final CharSequence[] contextArray;
if (contexts != null) {
contextArray = contexts.toArray(new CharSequence[contexts.size()]);
} else {
contextArray = null;
}
searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(), contextArray);
searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(), scorer.weight.context());
}
// hits are also returned by search()
// we do not use it, instead collect at acceptResult
Expand Down
Loading