Skip to content

Commit f05af0a

Browse files
authored
Enable index-time sorting (#24055)
This change adds an index setting to define how the documents should be sorted inside each Segment. It allows any numeric, date, boolean or keyword field inside a mapping to be used to sort the index on disk. It is not allowed to use a `nested` fields inside an index that defines an index sorting since `nested` fields relies on the original sort of the index. This change does not add early termination capabilities in the search layer. This will be added in a follow up. Relates #6720
1 parent c0ac50e commit f05af0a

File tree

28 files changed

+1313
-47
lines changed

28 files changed

+1313
-47
lines changed

core/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919

2020
package org.elasticsearch.action.admin.indices.segments;
2121

22+
import org.apache.lucene.search.Sort;
23+
import org.apache.lucene.search.SortField;
24+
import org.apache.lucene.search.SortedNumericSortField;
25+
import org.apache.lucene.search.SortedSetSortField;
2226
import org.apache.lucene.util.Accountable;
2327
import org.elasticsearch.action.ShardOperationFailedException;
2428
import org.elasticsearch.action.support.broadcast.BroadcastResponse;
@@ -37,6 +41,7 @@
3741
import java.util.List;
3842
import java.util.Map;
3943
import java.util.Set;
44+
import java.util.Locale;
4045

4146
public class IndicesSegmentResponse extends BroadcastResponse implements ToXContent {
4247

@@ -140,6 +145,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
140145
if (segment.getMergeId() != null) {
141146
builder.field(Fields.MERGE_ID, segment.getMergeId());
142147
}
148+
if (segment.getSegmentSort() != null) {
149+
toXContent(builder, segment.getSegmentSort());
150+
}
143151
if (segment.ramTree != null) {
144152
builder.startArray(Fields.RAM_TREE);
145153
for (Accountable child : segment.ramTree.getChildResources()) {
@@ -164,6 +172,25 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
164172
return builder;
165173
}
166174

175+
static void toXContent(XContentBuilder builder, Sort sort) throws IOException {
176+
builder.startArray("sort");
177+
for (SortField field : sort.getSort()) {
178+
builder.startObject();
179+
builder.field("field", field.getField());
180+
if (field instanceof SortedNumericSortField) {
181+
builder.field("mode", ((SortedNumericSortField) field).getSelector()
182+
.toString().toLowerCase(Locale.ROOT));
183+
} else if (field instanceof SortedSetSortField) {
184+
builder.field("mode", ((SortedSetSortField) field).getSelector()
185+
.toString().toLowerCase(Locale.ROOT));
186+
}
187+
builder.field("missing", field.getMissingValue());
188+
builder.field("reverse", field.getReverse());
189+
builder.endObject();
190+
}
191+
builder.endArray();
192+
}
193+
167194
static void toXContent(XContentBuilder builder, Accountable tree) throws IOException {
168195
builder.startObject();
169196
builder.field(Fields.DESCRIPTION, tree.toString());

core/src/main/java/org/elasticsearch/action/admin/indices/shrink/ShrinkRequest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ public ActionRequestValidationException validate() {
6666
if (shrinkIndexRequest == null) {
6767
validationException = addValidationError("shrink index request is missing", validationException);
6868
}
69+
if (shrinkIndexRequest.settings().getByPrefix("index.sort.").isEmpty() == false) {
70+
validationException = addValidationError("can't override index sort when shrinking index", validationException);
71+
}
6972
return validationException;
7073
}
7174

core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataCreateIndexService.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,18 @@ public ClusterState execute(ClusterState currentState) throws Exception {
374374
throw e;
375375
}
376376

377+
if (request.shrinkFrom() == null) {
378+
// now that the mapping is merged we can validate the index sort.
379+
// we cannot validate for index shrinking since the mapping is empty
380+
// at this point. The validation will take place later in the process
381+
// (when all shards are copied in a single place).
382+
indexService.getIndexSortSupplier().get();
383+
}
384+
377385
// the context is only used for validation so it's fine to pass fake values for the shard id and the current
378386
// timestamp
379387
final QueryShardContext queryShardContext = indexService.newQueryShardContext(0, null, () -> 0L);
388+
380389
for (Alias alias : request.aliases()) {
381390
if (Strings.hasLength(alias.filter())) {
382391
aliasValidator.validateAliasFilter(alias.name(), alias.filter(), queryShardContext, xContentRegistry);
@@ -581,22 +590,23 @@ static List<String> validateShrinkIndex(ClusterState state, String sourceIndex,
581590

582591
static void prepareShrinkIndexSettings(ClusterState currentState, Set<String> mappingKeys, Settings.Builder indexSettingsBuilder, Index shrinkFromIndex, String shrinkIntoName) {
583592
final IndexMetaData sourceMetaData = currentState.metaData().index(shrinkFromIndex.getName());
593+
584594
final List<String> nodesToAllocateOn = validateShrinkIndex(currentState, shrinkFromIndex.getName(),
585595
mappingKeys, shrinkIntoName, indexSettingsBuilder.build());
586-
final Predicate<String> analysisSimilarityPredicate = (s) -> s.startsWith("index.similarity.")
587-
|| s.startsWith("index.analysis.");
596+
final Predicate<String> sourceSettingsPredicate = (s) -> s.startsWith("index.similarity.")
597+
|| s.startsWith("index.analysis.") || s.startsWith("index.sort.");
588598
indexSettingsBuilder
589599
// we use "i.r.a.initial_recovery" rather than "i.r.a.require|include" since we want the replica to allocate right away
590600
// once we are allocated.
591601
.put("index.routing.allocation.initial_recovery._id",
592602
Strings.arrayToCommaDelimitedString(nodesToAllocateOn.toArray()))
593603
// we only try once and then give up with a shrink index
594604
.put("index.allocation.max_retries", 1)
595-
// now copy all similarity / analysis settings - this overrides all settings from the user unless they
605+
// now copy all similarity / analysis / sort settings - this overrides all settings from the user unless they
596606
// wanna add extra settings
597607
.put(IndexMetaData.SETTING_VERSION_CREATED, sourceMetaData.getCreationVersion())
598608
.put(IndexMetaData.SETTING_VERSION_UPGRADED, sourceMetaData.getUpgradedVersion())
599-
.put(sourceMetaData.getSettings().filter(analysisSimilarityPredicate))
609+
.put(sourceMetaData.getSettings().filter(sourceSettingsPredicate))
600610
.put(IndexMetaData.SETTING_ROUTING_PARTITION_SIZE, sourceMetaData.getRoutingPartitionSize())
601611
.put(IndexMetaData.INDEX_SHRINK_SOURCE_NAME.getKey(), shrinkFromIndex.getName())
602612
.put(IndexMetaData.INDEX_SHRINK_SOURCE_UUID.getKey(), shrinkFromIndex.getUUID());

core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
*/
1919
package org.elasticsearch.common.settings;
2020

21+
import org.elasticsearch.index.IndexSortConfig;
2122
import org.elasticsearch.cluster.metadata.IndexMetaData;
2223
import org.elasticsearch.cluster.routing.UnassignedInfo;
2324
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
2425
import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider;
2526
import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
2627
import org.elasticsearch.common.settings.Setting.Property;
27-
import org.elasticsearch.gateway.PrimaryShardAllocator;
2828
import org.elasticsearch.index.IndexModule;
2929
import org.elasticsearch.index.IndexSettings;
3030
import org.elasticsearch.index.IndexingSlowLog;
@@ -100,6 +100,10 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
100100
MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING,
101101
MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING,
102102
MergePolicyConfig.INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT_SETTING,
103+
IndexSortConfig.INDEX_SORT_FIELD_SETTING,
104+
IndexSortConfig.INDEX_SORT_ORDER_SETTING,
105+
IndexSortConfig.INDEX_SORT_MISSING_SETTING,
106+
IndexSortConfig.INDEX_SORT_MODE_SETTING,
103107
IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING,
104108
IndexSettings.INDEX_WARMER_ENABLED_SETTING,
105109
IndexSettings.INDEX_REFRESH_INTERVAL_SETTING,

core/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
package org.elasticsearch.index;
2121

2222
import org.apache.logging.log4j.message.ParameterizedMessage;
23-
import org.apache.logging.log4j.util.Supplier;
2423
import org.apache.lucene.index.IndexReader;
24+
import org.apache.lucene.search.Sort;
2525
import org.apache.lucene.store.AlreadyClosedException;
2626
import org.apache.lucene.util.Accountable;
2727
import org.apache.lucene.util.IOUtils;
@@ -84,6 +84,7 @@
8484
import java.util.concurrent.atomic.AtomicBoolean;
8585
import java.util.function.Consumer;
8686
import java.util.function.LongSupplier;
87+
import java.util.function.Supplier;
8788

8889
import static java.util.Collections.emptyMap;
8990
import static java.util.Collections.unmodifiableMap;
@@ -119,6 +120,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust
119120
private final ScriptService scriptService;
120121
private final ClusterService clusterService;
121122
private final Client client;
123+
private Supplier<Sort> indexSortSupplier;
122124

123125
public IndexService(IndexSettings indexSettings, NodeEnvironment nodeEnv,
124126
NamedXContentRegistry xContentRegistry,
@@ -153,6 +155,16 @@ public IndexService(IndexSettings indexSettings, NodeEnvironment nodeEnv,
153155
throw new IllegalArgumentException("Percolator queries are not allowed to use the current timestamp");
154156
}));
155157
this.indexFieldData = new IndexFieldDataService(indexSettings, indicesFieldDataCache, circuitBreakerService, mapperService);
158+
if (indexSettings.getIndexSortConfig().hasIndexSort()) {
159+
// we delay the actual creation of the sort order for this index because the mapping has not been merged yet.
160+
// The sort order is validated right after the merge of the mapping later in the process.
161+
this.indexSortSupplier = () -> indexSettings.getIndexSortConfig().buildIndexSort(
162+
mapperService::fullName,
163+
indexFieldData::getForField
164+
);
165+
} else {
166+
this.indexSortSupplier = () -> null;
167+
}
156168
this.shardStoreDeleter = shardStoreDeleter;
157169
this.bigArrays = bigArrays;
158170
this.threadPool = threadPool;
@@ -243,6 +255,10 @@ public SimilarityService similarityService() {
243255
return similarityService;
244256
}
245257

258+
public Supplier<Sort> getIndexSortSupplier() {
259+
return indexSortSupplier;
260+
}
261+
246262
public synchronized void close(final String reason, boolean delete) throws IOException {
247263
if (closed.compareAndSet(false, true)) {
248264
deleted.compareAndSet(false, delete);
@@ -350,10 +366,10 @@ public synchronized IndexShard createShard(ShardRouting routing) throws IOExcept
350366
};
351367
store = new Store(shardId, this.indexSettings, indexStore.newDirectoryService(path), lock,
352368
new StoreCloseListener(shardId, () -> eventListener.onStoreClosed(shardId)));
353-
indexShard = new IndexShard(routing, this.indexSettings, path, store, indexCache, mapperService, similarityService,
354-
indexFieldData, engineFactory, eventListener, searcherWrapper, threadPool, bigArrays, engineWarmer,
355-
() -> globalCheckpointSyncer.accept(shardId),
356-
searchOperationListeners, indexingOperationListeners);
369+
indexShard = new IndexShard(routing, this.indexSettings, path, store, indexSortSupplier,
370+
indexCache, mapperService, similarityService, indexFieldData, engineFactory,
371+
eventListener, searcherWrapper, threadPool, bigArrays, engineWarmer,
372+
() -> globalCheckpointSyncer.accept(shardId), searchOperationListeners, indexingOperationListeners);
357373
eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created");
358374
eventListener.afterIndexShardCreated(indexShard);
359375
shards = newMapBuilder(shards).put(shardId.id(), indexShard).immutableMap();
@@ -401,7 +417,8 @@ private void closeShard(String reason, ShardId sId, IndexShard indexShard, Store
401417
final boolean flushEngine = deleted.get() == false && closed.get();
402418
indexShard.close(reason, flushEngine);
403419
} catch (Exception e) {
404-
logger.debug((Supplier<?>) () -> new ParameterizedMessage("[{}] failed to close index shard", shardId), e);
420+
logger.debug((org.apache.logging.log4j.util.Supplier<?>)
421+
() -> new ParameterizedMessage("[{}] failed to close index shard", shardId), e);
405422
// ignore
406423
}
407424
}

core/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ public final class IndexSettings {
9898
Setting.intSetting("index.max_rescore_window", MAX_RESULT_WINDOW_SETTING, 1, Property.Dynamic, Property.IndexScope);
9999
/**
100100
* Index setting describing the maximum number of filters clauses that can be used
101-
* in an adjacency_matrix aggregation. The max number of buckets produced by
101+
* in an adjacency_matrix aggregation. The max number of buckets produced by
102102
* N filters is (N*N)/2 so a limit of 100 filters is imposed by default.
103103
*/
104104
public static final Setting<Integer> MAX_ADJACENCY_MATRIX_FILTERS_SETTING =
105-
Setting.intSetting("index.max_adjacency_matrix_filters", 100, 2, Property.Dynamic, Property.IndexScope);
105+
Setting.intSetting("index.max_adjacency_matrix_filters", 100, 2, Property.Dynamic, Property.IndexScope);
106106
public static final TimeValue DEFAULT_REFRESH_INTERVAL = new TimeValue(1, TimeUnit.SECONDS);
107107
public static final Setting<TimeValue> INDEX_REFRESH_INTERVAL_SETTING =
108108
Setting.timeSetting("index.refresh_interval", DEFAULT_REFRESH_INTERVAL, new TimeValue(-1, TimeUnit.MILLISECONDS),
@@ -176,6 +176,7 @@ public final class IndexSettings {
176176
private volatile ByteSizeValue generationThresholdSize;
177177
private final MergeSchedulerConfig mergeSchedulerConfig;
178178
private final MergePolicyConfig mergePolicyConfig;
179+
private final IndexSortConfig indexSortConfig;
179180
private final IndexScopedSettings scopedSettings;
180181
private long gcDeletesInMillis = DEFAULT_GC_DELETES.millis();
181182
private volatile boolean warmerEnabled;
@@ -278,6 +279,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
278279
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
279280
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
280281
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
282+
this.indexSortConfig = new IndexSortConfig(this);
281283

282284
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setNoCFSRatio);
283285
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING, mergePolicyConfig::setExpungeDeletesAllowed);
@@ -499,7 +501,7 @@ public int getMaxResultWindow() {
499501
private void setMaxResultWindow(int maxResultWindow) {
500502
this.maxResultWindow = maxResultWindow;
501503
}
502-
504+
503505
/**
504506
* Returns the max number of filters in adjacency_matrix aggregation search requests
505507
*/
@@ -509,7 +511,7 @@ public int getMaxAdjacencyMatrixFilters() {
509511

510512
private void setMaxAdjacencyMatrixFilters(int maxAdjacencyFilters) {
511513
this.maxAdjacencyMatrixFilters = maxAdjacencyFilters;
512-
}
514+
}
513515

514516
/**
515517
* Returns the maximum rescore window for search requests.
@@ -574,5 +576,12 @@ private void setMaxSlicesPerScroll(int value) {
574576
this.maxSlicesPerScroll = value;
575577
}
576578

579+
/**
580+
* Returns the index sort config that should be used for this index.
581+
*/
582+
public IndexSortConfig getIndexSortConfig() {
583+
return indexSortConfig;
584+
}
585+
577586
public IndexScopedSettings getScopedSettings() { return scopedSettings;}
578587
}

0 commit comments

Comments
 (0)