From d0e9038dc6824a034f4be838a9a13a9c404f6fe8 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Tue, 11 Jul 2017 10:07:20 +0100 Subject: [PATCH 1/4] Changes DocValueFieldsFetchSubPhase to reuse doc values iterators for multiple hits Closes #24986 --- .../subphase/DocValueFieldsFetchSubPhase.java | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java index cd0f1645ce03d..8e45433de3f4c 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java @@ -18,15 +18,19 @@ */ package org.elasticsearch.search.fetch.subphase; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.ReaderUtil; import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -38,7 +42,8 @@ public final class DocValueFieldsFetchSubPhase implements FetchSubPhase { @Override - public void hitExecute(SearchContext context, HitContext hitContext) throws IOException { + public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException { + if (context.collapse() != null) { // retrieve the `doc_value` associated with the collapse field String name = context.collapse().getFieldType().name(); @@ -48,26 +53,42 @@ public void hitExecute(SearchContext context, HitContext hitContext) throws IOEx context.docValueFieldsContext().fields().add(name); } } + if (context.docValueFieldsContext() == null) { return; } + + hits = hits.clone(); // don't modify the incoming hits + Arrays.sort(hits, (a, b) -> Integer.compare(a.docId(), b.docId())); + for (String field : context.docValueFieldsContext().fields()) { - if (hitContext.hit().fieldsOrNull() == null) { - hitContext.hit().fields(new HashMap<>(2)); - } - DocumentField hitField = hitContext.hit().getFields().get(field); - if (hitField == null) { - hitField = new DocumentField(field, new ArrayList<>(2)); - hitContext.hit().getFields().put(field, hitField); - } MappedFieldType fieldType = context.mapperService().fullName(field); if (fieldType != null) { - /* Because this is called once per document we end up creating a new ScriptDocValues for every document which is important - * because the values inside ScriptDocValues might be reused for different documents (Dates do this). */ - AtomicFieldData data = context.fieldData().getForField(fieldType).load(hitContext.readerContext()); - ScriptDocValues values = data.getScriptValues(); - values.setNextDocId(hitContext.docId()); - hitField.getValues().addAll(values); + int currentReaderIndex = -1; + LeafReaderContext subReaderContext = null; + AtomicFieldData data = null; + ScriptDocValues values = null; + for (SearchHit hit : hits) { + int readerIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves()); + // if the reader index has changed we need to get a new doc values reader instance + if (readerIndex != currentReaderIndex) { + currentReaderIndex = readerIndex; + subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex); + data = context.fieldData().getForField(fieldType).load(subReaderContext); + values = data.getScriptValues(); + } + int subDocId = hit.docId() - subReaderContext.docBase; + values.setNextDocId(subDocId); + if (hit.fieldsOrNull() == null) { + hit.fields(new HashMap<>(2)); + } + DocumentField hitField = hit.getFields().get(field); + if (hitField == null) { + hitField = new DocumentField(field, new ArrayList<>(2)); + hit.getFields().put(field, hitField); + } + hitField.getValues().addAll(values); + } } } } From e0e9e7499c5dd29f37d3aa2eab1d8382da701620 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Tue, 11 Jul 2017 16:53:55 +0100 Subject: [PATCH 2/4] iter --- .../search/fetch/subphase/DocValueFieldsFetchSubPhase.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java index 8e45433de3f4c..a3e426c8c5cbb 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java @@ -64,15 +64,13 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept for (String field : context.docValueFieldsContext().fields()) { MappedFieldType fieldType = context.mapperService().fullName(field); if (fieldType != null) { - int currentReaderIndex = -1; LeafReaderContext subReaderContext = null; AtomicFieldData data = null; ScriptDocValues values = null; for (SearchHit hit : hits) { - int readerIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves()); // if the reader index has changed we need to get a new doc values reader instance - if (readerIndex != currentReaderIndex) { - currentReaderIndex = readerIndex; + if (subReaderContext == null || hit.docId() >= subReaderContext.docBase + subReaderContext.reader().maxDoc()) { + int readerIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves()); subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex); data = context.fieldData().getForField(fieldType).load(subReaderContext); values = data.getScriptValues(); From ec37b538317d9c2208dcebbc807d6656f0ef09f7 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Tue, 11 Jul 2017 17:10:57 +0100 Subject: [PATCH 3/4] Update ScriptDocValues to not reuse GeoPoint and Date objects --- .../org/elasticsearch/index/fielddata/ScriptDocValues.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 3ae4e6ebc8123..82ccfcff5fb14 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -266,7 +266,7 @@ void refreshArray() throws IOException { return; } for (int i = 0; i < count; i++) { - dates[i].setMillis(in.nextValue()); + dates[i] = new MutableDateTime(in.nextValue(), DateTimeZone.UTC); } } } @@ -340,7 +340,7 @@ public void setNextDocId(int docId) throws IOException { resize(in.docValueCount()); for (int i = 0; i < count; i++) { GeoPoint point = in.nextValue(); - values[i].reset(point.lat(), point.lon()); + values[i] = new GeoPoint(point.lat(), point.lon()); } } else { resize(0); From c8efa14db8d9c45d765f146dadeddc9ae75eb92a Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Wed, 12 Jul 2017 10:50:40 +0000 Subject: [PATCH 4/4] added Javadoc about script value re-use --- .../elasticsearch/index/fielddata/ScriptDocValues.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 82ccfcff5fb14..803356b8f1ed0 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -43,8 +43,13 @@ /** - * Script level doc values, the assumption is that any implementation will implement a getValue - * and a getValues that return the relevant type that then can be used in scripts. + * Script level doc values, the assumption is that any implementation will + * implement a getValue and a getValues that return + * the relevant type that then can be used in scripts. + * + * Implementations should not internally re-use objects for the values that they + * return as a single {@link ScriptDocValues} instance can be reused to return + * values form multiple documents. */ public abstract class ScriptDocValues extends AbstractList { /**