Skip to content

Commit edb9224

Browse files
Introduce limit to the number of terms in Terms Query (#27968)
- Introduce index level settings to control the maximum number of terms that can be used in a Terms Query - Issue a deprecation warning if a request exceeds this max number Closes #18829
1 parent 2db21c0 commit edb9224

File tree

7 files changed

+110
-0
lines changed

7 files changed

+110
-0
lines changed

core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
119119
IndexSettings.MAX_RESCORE_WINDOW_SETTING,
120120
IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
121121
IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
122+
IndexSettings.MAX_TERMS_COUNT_SETTING,
122123
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
123124
IndexSettings.DEFAULT_FIELD_SETTING,
124125
IndexSettings.QUERY_STRING_LENIENT_SETTING,

core/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ public final class IndexSettings {
132132
public static final Setting<Integer> MAX_ANALYZED_OFFSET_SETTING =
133133
Setting.intSetting("index.highlight.max_analyzed_offset", 10000, 1, Property.Dynamic, Property.IndexScope);
134134

135+
136+
/**
137+
* Index setting describing the maximum number of terms that can be used in Terms Query.
138+
* The default maximum of 65536 terms is defensive, as extra processing and memory is involved
139+
* for each additional term, and a large number of terms degrade the cluster performance.
140+
*/
141+
public static final Setting<Integer> MAX_TERMS_COUNT_SETTING =
142+
Setting.intSetting("index.max_terms_count", 65536, 1, Property.Dynamic, Property.IndexScope);
143+
135144
/**
136145
* Index setting describing for NGramTokenizer and NGramTokenFilter
137146
* the maximum difference between
@@ -288,6 +297,7 @@ public final class IndexSettings {
288297
private volatile int maxShingleDiff;
289298
private volatile boolean TTLPurgeDisabled;
290299
private volatile int maxAnalyzedOffset;
300+
private volatile int maxTermsCount;
291301

292302
/**
293303
* The maximum number of refresh listeners allows on this shard.
@@ -397,6 +407,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
397407
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
398408
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
399409
maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
410+
maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
400411
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
401412
this.indexSortConfig = new IndexSortConfig(this);
402413
singleType = INDEX_MAPPING_SINGLE_TYPE_SETTING.get(indexMetaData.getSettings()); // get this from metadata - it's not registered
@@ -438,6 +449,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
438449
scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
439450
scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
440451
scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
452+
scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
441453
scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
442454
scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
443455
}
@@ -717,6 +729,13 @@ private void setMaxDocvalueFields(int maxDocvalueFields) {
717729

718730
private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; }
719731

732+
/**
733+
* Returns the maximum number of terms that can be used in a Terms Query request
734+
*/
735+
public int getMaxTermsCount() { return this.maxTermsCount; }
736+
737+
private void setMaxTermsCount (int maxTermsCount) { this.maxTermsCount = maxTermsCount; }
738+
720739
/**
721740
* Returns the maximum number of allowed script_fields to retrieve in a search request
722741
*/

core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,14 @@
3434
import org.elasticsearch.common.io.stream.BytesStreamOutput;
3535
import org.elasticsearch.common.io.stream.StreamInput;
3636
import org.elasticsearch.common.io.stream.StreamOutput;
37+
import org.elasticsearch.common.logging.DeprecationLogger;
38+
import org.elasticsearch.common.logging.Loggers;
3739
import org.elasticsearch.common.lucene.BytesRefs;
3840
import org.elasticsearch.common.lucene.search.Queries;
3941
import org.elasticsearch.common.xcontent.XContentBuilder;
4042
import org.elasticsearch.common.xcontent.XContentParser;
4143
import org.elasticsearch.common.xcontent.support.XContentMapValues;
44+
import org.elasticsearch.index.IndexSettings;
4245
import org.elasticsearch.index.mapper.MappedFieldType;
4346
import org.elasticsearch.indices.TermsLookup;
4447

@@ -59,6 +62,7 @@
5962
* A filter for a field based on several terms matching on any of them.
6063
*/
6164
public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
65+
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsQueryBuilder.class));
6266
public static final String NAME = "terms";
6367

6468
private final String fieldName;
@@ -416,6 +420,13 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
416420
if (values == null || values.isEmpty()) {
417421
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
418422
}
423+
int maxTermsCount = context.getIndexSettings().getMaxTermsCount();
424+
if (values.size() > maxTermsCount){
425+
DEPRECATION_LOGGER.deprecated(
426+
"Deprecated: the number of terms [" + values.size() + "] used in the Terms Query request has exceeded " +
427+
"the allowed maximum of [" + maxTermsCount + "]. " + "This maximum can be set by changing the [" +
428+
IndexSettings.MAX_TERMS_COUNT_SETTING.getKey() + "] index level setting.");
429+
}
419430
MappedFieldType fieldType = context.fieldMapper(fieldName);
420431

421432
if (fieldType != null) {

docs/reference/index-modules.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ specific index module:
188188
This setting is only applicable when highlighting is requested on a text that was indexed without offsets or term vectors.
189189
Defaults to `10000`.
190190

191+
`index.max_terms_count`::
192+
193+
The maximum number of terms that can be used in Terms Query.
194+
Defaults to `65536`.
195+
191196
[float]
192197
=== Settings in other index modules
193198

docs/reference/migration/migrate_6_0/search.asciidoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,11 @@ Setting `request_cache:true` on a query that creates a scroll ('scroll=1m`)
191191
In future versions we will return a `400 - Bad request` instead of just ignoring
192192
the hint.
193193
Scroll queries are not meant to be cached.
194+
195+
==== Limiting the number of terms that can be used in a Terms Query request
196+
197+
Executing a Terms Query with a lot of terms may degrade the cluster performance,
198+
as each additional term demands extra processing and memory.
199+
To safeguard against this, the maximum number of terms that can be used in a
200+
Terms Query request has been limited to 65536. This default maximum can be changed
201+
for a particular index with the index setting `index.max_terms_count`.

docs/reference/query-dsl/terms-query.asciidoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ across all nodes if the "reference" terms data is not large. The lookup
5757
terms filter will prefer to execute the get request on a local node if
5858
possible, reducing the need for networking.
5959

60+
[WARNING]
61+
Executing a Terms Query request with a lot of terms can be quite slow,
62+
as each additional term demands extra processing and memory.
63+
To safeguard against this, the maximum number of terms that can be used
64+
in a Terms Query both directly or through lookup has been limited to `65536`.
65+
This default maximum can be changed for a particular index with the index setting
66+
`index.max_terms_count`.
67+
6068
[float]
6169
===== Terms lookup twitter example
6270
At first we index the information for user with id 2, specifically, its
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
---
2+
"Terms Query with No.of terms exceeding index.max_terms_count should produce a warning":
3+
- skip:
4+
version: " - 6.1.99"
5+
reason: index.max_terms_count setting has been added in 6.2
6+
features: "warnings"
7+
- do:
8+
indices.create:
9+
index: test_index
10+
body:
11+
settings:
12+
number_of_shards: 1
13+
index.max_terms_count: 2
14+
mappings:
15+
test_type:
16+
properties:
17+
user:
18+
type: keyword
19+
followers:
20+
type: keyword
21+
- do:
22+
bulk:
23+
refresh: true
24+
body:
25+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u1"}}'
26+
- '{"user": "u1", "followers": ["u2", "u3"]}'
27+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u2"}}'
28+
- '{"user": "u2", "followers": ["u1", "u3", "u4"]}'
29+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u3"}}'
30+
- '{"user": "u3", "followers": ["u1"]}'
31+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u4"}}'
32+
- '{"user": "u4", "followers": ["u3"]}'
33+
34+
- do:
35+
search:
36+
index: test_index
37+
body: {"query" : {"terms" : {"user" : ["u1", "u2"]}}}
38+
- match: { hits.total: 2 }
39+
40+
- do:
41+
search:
42+
index: test_index
43+
body: {"query" : {"terms" : {"user" : ["u1", "u2", "u3"]}}}
44+
warnings:
45+
- "Deprecated: the number of terms [3] used in the Terms Query request has exceeded the allowed maximum of [2]. This maximum can be set by changing the [index.max_terms_count] index level setting."
46+
47+
- do:
48+
search:
49+
index: test_index
50+
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u1", "path" : "followers"}}}}
51+
- match: { hits.total: 2 }
52+
53+
- do:
54+
search:
55+
index: test_index
56+
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u2", "path" : "followers"}}}}
57+
warnings:
58+
- "Deprecated: the number of terms [3] used in the Terms Query request has exceeded the allowed maximum of [2]. This maximum can be set by changing the [index.max_terms_count] index level setting."

0 commit comments

Comments
 (0)