Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,17 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader
// TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have:
// df = df1 + df2 - (df1 * df2 / maxDoc)?
max = Math.max(df, max);
if (minSumTTF != -1 && ctx.totalTermFreq() != -1) {
if (ctx.totalTermFreq() > 0) {
// we need to find out the minimum sumTTF to adjust the statistics
// otherwise the statistics don't match
minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field()));
} else {
minSumTTF = -1;
}

}
if (minSumTTF != -1 && maxDoc > minSumTTF) {
maxDoc = (int)minSumTTF;
}

if (max == 0) {
return; // we are done that term doesn't exist at all
}
long sumTTF = minSumTTF == -1 ? -1 : 0;
long sumTTF = 0;
final int[] tieBreak = new int[contexts.length];
for (int i = 0; i < tieBreak.length; ++i) {
tieBreak[i] = i;
Expand Down Expand Up @@ -165,29 +159,20 @@ protected int compare(int i, int j) {
}
contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf));
prev = current;
if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) {
sumTTF += ctx.totalTermFreq();
} else {
sumTTF = -1; // omit once TF is omitted anywhere!
}
sumTTF += ctx.totalTermFreq();
}
sumTTF = Math.min(sumTTF, minSumTTF);
for (int i = 0; i < contexts.length; i++) {
int df = contexts[i].docFreq();
if (df == 0) {
continue;
}
// the blended sumTTF can't be greater than the sumTTTF on the field
final long fixedTTF = sumTTF == -1 ? -1 : sumTTF;
contexts[i] = adjustTTF(reader.getContext(), contexts[i], fixedTTF);
contexts[i] = adjustTTF(reader.getContext(), contexts[i], sumTTF);
}
}

private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termContext, long sumTTF) throws IOException {
assert termContext.wasBuiltFor(readerContext);
if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
return termContext;
}
TermStates newTermContext = new TermStates(readerContext);
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
Expand All @@ -213,12 +198,7 @@ private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termCo
private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException {
assert ctx.wasBuiltFor(readerContext);
// Use a value of ttf that is consistent with the doc freq (ie. gte)
long newTTF;
if (ctx.totalTermFreq() < 0) {
newTTF = -1;
} else {
newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
}
long newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
Expand All @@ -52,6 +54,8 @@

import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.instanceOf;

public class BlendedTermQueryTests extends ESTestCase {
public void testDismaxQuery() throws IOException {
Expand Down Expand Up @@ -114,6 +118,61 @@ public void testDismaxQuery() throws IOException {
assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());

}
{
// test with an unknown field
String[] fields = new String[] {"username", "song", "unknown_field"};
Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 1.0f);
Query rewrite = searcher.rewrite(query);
assertThat(rewrite, instanceOf(BooleanQuery.class));
for (BooleanClause clause : (BooleanQuery) rewrite) {
assertThat(clause.getQuery(), instanceOf(TermQuery.class));
TermQuery termQuery = (TermQuery) clause.getQuery();
TermStates termStates = termQuery.getTermStates();
if (termQuery.getTerm().field().equals("unknown_field")) {
assertThat(termStates.docFreq(), equalTo(0));
assertThat(termStates.totalTermFreq(), equalTo(0L));
} else {
assertThat(termStates.docFreq(), greaterThan(0));
assertThat(termStates.totalTermFreq(), greaterThan(0L));
}
}
assertThat(searcher.search(query, 10).totalHits.value, equalTo((long) iters + username.length));
}
{
// test with an unknown field and an unknown term
String[] fields = new String[] {"username", "song", "unknown_field"};
Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "unknown_term"), 1.0f);
Query rewrite = searcher.rewrite(query);
assertThat(rewrite, instanceOf(BooleanQuery.class));
for (BooleanClause clause : (BooleanQuery) rewrite) {
assertThat(clause.getQuery(), instanceOf(TermQuery.class));
TermQuery termQuery = (TermQuery) clause.getQuery();
TermStates termStates = termQuery.getTermStates();
assertThat(termStates.docFreq(), equalTo(0));
assertThat(termStates.totalTermFreq(), equalTo(0L));
}
assertThat(searcher.search(query, 10).totalHits.value, equalTo(0L));
}
{
// test with an unknown field and a term that is present in only one field
String[] fields = new String[] {"username", "song", "id", "unknown_field"};
Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fan"), 1.0f);
Query rewrite = searcher.rewrite(query);
assertThat(rewrite, instanceOf(BooleanQuery.class));
for (BooleanClause clause : (BooleanQuery) rewrite) {
assertThat(clause.getQuery(), instanceOf(TermQuery.class));
TermQuery termQuery = (TermQuery) clause.getQuery();
TermStates termStates = termQuery.getTermStates();
if (termQuery.getTerm().field().equals("username")) {
assertThat(termStates.docFreq(), equalTo(1));
assertThat(termStates.totalTermFreq(), equalTo(1L));
} else {
assertThat(termStates.docFreq(), equalTo(0));
assertThat(termStates.totalTermFreq(), equalTo(0L));
}
}
assertThat(searcher.search(query, 10).totalHits.value, equalTo(1L));
}
reader.close();
w.close();
dir.close();
Expand Down