Skip to content

Commit dcde895

Browse files
Introduce limit to the number of terms in Terms Query (#27968)
- Introduce index level settings to control the maximum number of terms that can be used in a Terms Query - Throw an error if a request exceeds this max number Closes #18829
1 parent da0ed57 commit dcde895

File tree

7 files changed

+105
-0
lines changed

7 files changed

+105
-0
lines changed

core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
119119
IndexSettings.MAX_RESCORE_WINDOW_SETTING,
120120
IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
121121
IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
122+
IndexSettings.MAX_TERMS_COUNT_SETTING,
122123
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
123124
IndexSettings.DEFAULT_FIELD_SETTING,
124125
IndexSettings.QUERY_STRING_LENIENT_SETTING,

core/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ public final class IndexSettings {
129129
public static final Setting<Integer> MAX_ANALYZED_OFFSET_SETTING =
130130
Setting.intSetting("index.highlight.max_analyzed_offset", 10000, 1, Property.Dynamic, Property.IndexScope);
131131

132+
133+
/**
134+
* Index setting describing the maximum number of terms that can be used in Terms Query.
135+
* The default maximum of 65536 terms is defensive, as extra processing and memory is involved
136+
* for each additional term, and a large number of terms degrade the cluster performance.
137+
*/
138+
public static final Setting<Integer> MAX_TERMS_COUNT_SETTING =
139+
Setting.intSetting("index.max_terms_count", 65536, 1, Property.Dynamic, Property.IndexScope);
140+
132141
/**
133142
* Index setting describing for NGramTokenizer and NGramTokenFilter
134143
* the maximum difference between
@@ -287,6 +296,7 @@ public final class IndexSettings {
287296
private volatile boolean TTLPurgeDisabled;
288297
private volatile TimeValue searchIdleAfter;
289298
private volatile int maxAnalyzedOffset;
299+
private volatile int maxTermsCount;
290300

291301
/**
292302
* The maximum number of refresh listeners allows on this shard.
@@ -397,6 +407,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
397407
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
398408
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
399409
maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
410+
maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
400411
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
401412
this.indexSortConfig = new IndexSortConfig(this);
402413
searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
@@ -440,6 +451,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
440451
scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
441452
scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
442453
scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
454+
scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
443455
scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
444456
scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
445457
scopedSettings.addSettingsUpdateConsumer(INDEX_SEARCH_IDLE_AFTER, this::setSearchIdleAfter);
@@ -734,6 +746,13 @@ private void setMaxTokenCount(int maxTokenCount) {
734746

735747
private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; }
736748

749+
/**
750+
* Returns the maximum number of terms that can be used in a Terms Query request
751+
*/
752+
public int getMaxTermsCount() { return this.maxTermsCount; }
753+
754+
private void setMaxTermsCount (int maxTermsCount) { this.maxTermsCount = maxTermsCount; }
755+
737756
/**
738757
* Returns the maximum number of allowed script_fields to retrieve in a search request
739758
*/

core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.elasticsearch.common.xcontent.XContentBuilder;
4040
import org.elasticsearch.common.xcontent.XContentParser;
4141
import org.elasticsearch.common.xcontent.support.XContentMapValues;
42+
import org.elasticsearch.index.IndexSettings;
4243
import org.elasticsearch.index.mapper.MappedFieldType;
4344
import org.elasticsearch.indices.TermsLookup;
4445

@@ -416,6 +417,13 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
416417
if (values == null || values.isEmpty()) {
417418
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
418419
}
420+
int maxTermsCount = context.getIndexSettings().getMaxTermsCount();
421+
if (values.size() > maxTermsCount){
422+
throw new IllegalArgumentException(
423+
"The number of terms [" + values.size() + "] used in the Terms Query request has exceeded " +
424+
"the allowed maximum of [" + maxTermsCount + "]. " + "This maximum can be set by changing the [" +
425+
IndexSettings.MAX_TOKEN_COUNT_SETTING.getKey() + "] index level setting.");
426+
}
419427
MappedFieldType fieldType = context.fieldMapper(fieldName);
420428

421429
if (fieldType != null) {

docs/reference/index-modules.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ specific index module:
204204
This setting is only applicable when highlighting is requested on a text that was indexed without offsets or term vectors.
205205
Defaults to `10000`.
206206

207+
`index.max_terms_count`::
208+
209+
The maximum number of terms that can be used in Terms Query.
210+
Defaults to `65536`.
211+
207212

208213
[float]
209214
=== Settings in other index modules

docs/reference/migration/migrate_7_0/search.asciidoc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,12 @@ removed.
4949

5050
* `levenstein` - replaced by `levenshtein`
5151
* `jarowinkler` - replaced by `jaro_winkler`
52+
53+
54+
==== Limiting the number of terms that can be used in a Terms Query request
55+
56+
Executing a Terms Query with a lot of terms may degrade the cluster performance,
57+
as each additional term demands extra processing and memory.
58+
To safeguard against this, the maximum number of terms that can be used in a
59+
Terms Query request has been limited to 65536. This default maximum can be changed
60+
for a particular index with the index setting `index.max_terms_count`.

docs/reference/query-dsl/terms-query.asciidoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ across all nodes if the "reference" terms data is not large. The lookup
5757
terms filter will prefer to execute the get request on a local node if
5858
possible, reducing the need for networking.
5959

60+
[WARNING]
61+
Executing a Terms Query request with a lot of terms can be quite slow,
62+
as each additional term demands extra processing and memory.
63+
To safeguard against this, the maximum number of terms that can be used
64+
in a Terms Query both directly or through lookup has been limited to `65536`.
65+
This default maximum can be changed for a particular index with the index setting
66+
`index.max_terms_count`.
67+
6068
[float]
6169
===== Terms lookup twitter example
6270
At first we index the information for user with id 2, specifically, its
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
---
2+
"Terms Query with No.of terms exceeding index.max_terms_count should FAIL":
3+
- skip:
4+
version: " - 6.99.99"
5+
reason: index.max_terms_count setting has been added in 7.0.0
6+
- do:
7+
indices.create:
8+
index: test_index
9+
body:
10+
settings:
11+
number_of_shards: 1
12+
index.max_terms_count: 2
13+
mappings:
14+
test_type:
15+
properties:
16+
user:
17+
type: keyword
18+
followers:
19+
type: keyword
20+
- do:
21+
bulk:
22+
refresh: true
23+
body:
24+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u1"}}'
25+
- '{"user": "u1", "followers": ["u2", "u3"]}'
26+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u2"}}'
27+
- '{"user": "u2", "followers": ["u1", "u3", "u4"]}'
28+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u3"}}'
29+
- '{"user": "u3", "followers": ["u1"]}'
30+
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u4"}}'
31+
- '{"user": "u4", "followers": ["u3"]}'
32+
33+
- do:
34+
search:
35+
index: test_index
36+
body: {"query" : {"terms" : {"user" : ["u1", "u2"]}}}
37+
- match: { hits.total: 2 }
38+
39+
- do:
40+
catch: bad_request
41+
search:
42+
index: test_index
43+
body: {"query" : {"terms" : {"user" : ["u1", "u2", "u3"]}}}
44+
45+
- do:
46+
search:
47+
index: test_index
48+
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u1", "path" : "followers"}}}}
49+
- match: { hits.total: 2 }
50+
51+
- do:
52+
catch: bad_request
53+
search:
54+
index: test_index
55+
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u2", "path" : "followers"}}}}

0 commit comments

Comments
 (0)