From 49f2aaba567be377e614bc2599ff560198809535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=CC=88rg=20Prante?= Date: Mon, 10 Nov 2014 13:31:27 +0100 Subject: [PATCH] Query: add option for analyze wildcard/prefix also to simple_query_string query The query_string query has an option for analyzing wildcard/prefix (#787) by a best effort approach. This adds `analyze_wildcard` option also to simple_query_string. The default is set to `false` so the existing behavior of simple_query_string is unchanged. --- .../index/query/SimpleQueryParser.java | 65 +++++++++++++++++-- .../index/query/SimpleQueryStringBuilder.java | 10 +++ .../index/query/SimpleQueryStringParser.java | 2 + .../search/query/SimpleQueryStringTests.java | 28 ++++++++ 4 files changed, 101 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java index 7483656ed0927..1d3f0e703bd3a 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java @@ -19,9 +19,14 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -109,7 +114,7 @@ public Query newPhraseQuery(String text, int slop) { /** * Dispatches to Lucene's SimpleQueryParser's newPrefixQuery, optionally - * lowercasing the term first + * lowercasing the term first or trying to analyze terms */ @Override public Query newPrefixQuery(String text) { @@ -119,9 +124,15 @@ public Query newPrefixQuery(String text) { BooleanQuery bq = new BooleanQuery(true); for (Map.Entry entry : weights.entrySet()) { try { - PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text)); - prefix.setBoost(entry.getValue()); - bq.add(prefix, BooleanClause.Occur.SHOULD); + if (settings.analyzeWildcard()) { + Query analyzedQuery = newPossiblyAnalyzedQuery(entry.getKey(), text); + analyzedQuery.setBoost(entry.getValue()); + bq.add(analyzedQuery, BooleanClause.Occur.SHOULD); + } else { + PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text)); + prefix.setBoost(entry.getValue()); + bq.add(prefix, BooleanClause.Occur.SHOULD); + } } catch (RuntimeException e) { return rethrowUnlessLenient(e); } @@ -129,6 +140,43 @@ public Query newPrefixQuery(String text) { return super.simplify(bq); } + private Query newPossiblyAnalyzedQuery(String field, String termStr) { + TokenStream source; + try { + source = getAnalyzer().tokenStream(field, termStr); + source.reset(); + } catch (IOException e) { + return new PrefixQuery(new Term(field, termStr)); + } + List tlist = new ArrayList<>(); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); + while (true) { + try { + if (!source.incrementToken()) { + break; + } + } catch (IOException e) { + break; + } + tlist.add(termAtt.toString()); + } + try { + source.close(); + } catch (IOException e) { + // ignore + } + if (tlist.size() == 1) { + return new PrefixQuery(new Term(field, tlist.get(0))); + } else { + // build a boolean query with prefix on each one... + BooleanQuery bq = new BooleanQuery(); + for (String token : tlist) { + bq.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD)); + } + return bq; + } + } + /** * Class encapsulating the settings for the SimpleQueryString query, with * their default values @@ -137,6 +185,7 @@ public static class Settings { private Locale locale = Locale.ROOT; private boolean lowercaseExpandedTerms = true; private boolean lenient = false; + private boolean analyzeWildcard = false; public Settings() { @@ -165,5 +214,13 @@ public void lenient(boolean lenient) { public boolean lenient() { return this.lenient; } + + public void analyzeWildcard(boolean analyzeWildcard) { + this.analyzeWildcard = analyzeWildcard; + } + + public boolean analyzeWildcard() { + return analyzeWildcard; + } } } diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java index 980f55a9716a0..cae18ac1e0c88 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -39,6 +39,7 @@ public class SimpleQueryStringBuilder extends BaseQueryBuilder { private int flags = -1; private Boolean lowercaseExpandedTerms; private Boolean lenient; + private Boolean analyzeWildcard; private Locale locale; /** @@ -128,6 +129,11 @@ public SimpleQueryStringBuilder lenient(boolean lenient) { return this; } + public SimpleQueryStringBuilder analyzeWildcard(boolean analyzeWildcard) { + this.analyzeWildcard = analyzeWildcard; + return this; + } + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(SimpleQueryStringParser.NAME); @@ -168,6 +174,10 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio builder.field("lenient", lenient); } + if (analyzeWildcard != null) { + builder.field("analyze_wildcard", analyzeWildcard); + } + if (locale != null) { builder.field("locale", locale.toString()); } diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java index 0c63119992f9e..d5954d4e4b9be 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java @@ -178,6 +178,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars sqsSettings.lowercaseExpandedTerms(parser.booleanValue()); } else if ("lenient".equals(currentFieldName)) { sqsSettings.lenient(parser.booleanValue()); + } else if ("analyze_wildcard".equals(currentFieldName)) { + sqsSettings.analyzeWildcard(parser.booleanValue()); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); } else { diff --git a/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java b/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java index 2cd0afc59e486..e12bb16aca071 100644 --- a/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java +++ b/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.search.query; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.SimpleQueryStringBuilder; import org.elasticsearch.index.query.SimpleQueryStringFlag; @@ -266,4 +268,30 @@ public void testLenientFlagBeingTooLenient() throws Exception { assertHitCount(resp, 1); assertSearchHits(resp, "1"); } + + @Test + public void testSimpleQueryStringAnalyzeWildcard() throws ExecutionException, InterruptedException, IOException { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type1") + .startObject("properties") + .startObject("location") + .field("type", "string") + .field("analyzer", "german") + .endObject() + .endObject() + .endObject() + .endObject().string(); + + CreateIndexRequestBuilder mappingRequest = client().admin().indices().prepareCreate("test1").addMapping("type1", mapping); + mappingRequest.execute().actionGet(); + indexRandom(true, client().prepareIndex("test1", "type1", "1").setSource("location", "Köln")); + refresh(); + + SearchResponse searchResponse = client().prepareSearch().setQuery(simpleQueryString("Köln*").analyzeWildcard(true).field("location")).get(); + assertNoFailures(searchResponse); + assertHitCount(searchResponse, 1l); + assertSearchHits(searchResponse, "1"); + } + }