diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b365a2bbd10..34598044aae69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Rule Based Auto-tagging] Add in-memory attribute value store ([#17342](https://github.com/opensearch-project/OpenSearch/pull/17342)) - [Rule Based Auto-tagging] Add in-memory rule processing service ([#17365](https://github.com/opensearch-project/OpenSearch/pull/17365)) - Change priority for scheduling reroute during timeout([#16445](https://github.com/opensearch-project/OpenSearch/pull/16445)) +- Faster `terms_query` with already sorted terms ([#17714](https://github.com/opensearch-project/OpenSearch/pull/17714)) ### Dependencies - Bump `dnsjava:dnsjava` from 3.6.2 to 3.6.3 ([#17231](https://github.com/opensearch-project/OpenSearch/pull/17231)) diff --git a/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java b/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java new file mode 100644 index 0000000000000..2fbb01b2ced25 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java @@ -0,0 +1,182 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.util.BytesRef; + +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Purposed for passing terms into {@link TermInSetQuery}. + * If the given terms are sorted already, it wrap it with a SortedSet stub. + * Otherwise, it passes terms as list. + */ +public class BytesRefsCollectionBuilder implements Consumer, Supplier> { + + /** + * Strategy for building BytesRef collection. + * */ + protected interface ConsumerStrategy extends Function, Supplier> {} + + public BytesRefsCollectionBuilder(int sizeExpected) { + terms = new ArrayList<>(sizeExpected); + } + + protected final List terms; + protected ConsumerStrategy delegate = createStartStrategy(); + + @Override + public void accept(BytesRef bytesRef) { + delegate = delegate.apply(bytesRef); + } + + @Override + public Collection get() { + Collection result = delegate.get(); + delegate = createFrozenStrategy(result); + return result; + } + + protected ConsumerStrategy createStartStrategy() { + return new ConsumerStrategy() { + @Override + public ConsumerStrategy apply(BytesRef firstBytes) { + terms.add(firstBytes); // firstly, just store + return createSortedStrategy(firstBytes); + } + + @Override + public Collection get() { + return terms; // empty list + } + }; + } + + protected ConsumerStrategy createSortedStrategy(BytesRef firstBytes) { + return new ConsumerStrategy() { + BytesRef prev = firstBytes; + + @Override + public ConsumerStrategy apply(BytesRef bytesRef) { + terms.add(bytesRef); + if (bytesRef.compareTo(prev) >= 0) { // keep checking sorted + prev = bytesRef; + return this; + } else { // isn't sorted + return createUnsortedStrategy(); + } + } + + @Override + public Collection get() { + return new SortedBytesSet(terms); + } + }; + } + + protected ConsumerStrategy createUnsortedStrategy() { + return new ConsumerStrategy() { + @Override + public ConsumerStrategy apply(BytesRef bytesRef) { // just storing + terms.add(bytesRef); + return this; + } + + @Override + public Collection get() { + return terms; + } + }; + } + + protected ConsumerStrategy createFrozenStrategy(Collection result) { + return new ConsumerStrategy() { + + @Override + public ConsumerStrategy apply(BytesRef bytesRef) { + throw new IllegalStateException("already build"); + } + + @Override + public Collection get() { + return result; + } + }; + } + + /** + * {@link SortedSet} for passing into TermInSetQuery() + * */ + protected static class SortedBytesSet extends AbstractSet implements SortedSet { + + private final List bytesRefs; + + public SortedBytesSet(List bytesRefs) { + this.bytesRefs = bytesRefs; + } + + @Override + public Iterator iterator() { + return bytesRefs.iterator(); + } + + @Override + public int size() { + return bytesRefs.size(); + } + + @Override + public Comparator comparator() { + return null; + } + + @Override + public SortedSet subSet(BytesRef fromElement, BytesRef toElement) { + throw new UnsupportedOperationException(); + } + + @Override + public SortedSet headSet(BytesRef toElement) { + throw new UnsupportedOperationException(); + } + + @Override + public SortedSet tailSet(BytesRef fromElement) { + throw new UnsupportedOperationException(); + } + + @Override + public BytesRef first() { + throw new UnsupportedOperationException(); + } + + @Override + public BytesRef last() { + throw new UnsupportedOperationException(); + } + + /** + * Dedicated for {@link TermInSetQuery#TermInSetQuery(String, Collection)}. + */ + @Override + public T[] toArray(T[] a) { + return bytesRefs.toArray(a); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java index 658f4228cb0c6..4a4ce31a44a88 100644 --- a/server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java @@ -163,15 +163,15 @@ public Query existsQuery(QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexed(); - BytesRef[] bytesRefs = new BytesRef[values.size()]; - for (int i = 0; i < bytesRefs.length; i++) { + BytesRefsCollectionBuilder bytesRefs = new BytesRefsCollectionBuilder(values.size()); + for (int i = 0; i < values.size(); i++) { Object idObject = values.get(i); if (idObject instanceof BytesRef) { idObject = ((BytesRef) idObject).utf8ToString(); } - bytesRefs[i] = Uid.encodeId(idObject.toString()); + bytesRefs.accept(Uid.encodeId(idObject.toString())); } - return new TermInSetQuery(name(), bytesRefs); + return new TermInSetQuery(name(), bytesRefs.get()); } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 4436e74c821c3..8d2b73b3d2968 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -447,23 +447,26 @@ public Query termsQuery(List values, QueryShardContext context) { if (!context.keywordFieldIndexOrDocValuesEnabled()) { return super.termsQuery(values, context); } - BytesRef[] iBytesRefs = new BytesRef[values.size()]; - BytesRef[] dVByteRefs = new BytesRef[values.size()]; - for (int i = 0; i < iBytesRefs.length; i++) { - iBytesRefs[i] = indexedValueForSearch(values.get(i)); - dVByteRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i))); + BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size()); + BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size()); + for (int i = 0; i < values.size(); i++) { + BytesRef idxBytes = indexedValueForSearch(values.get(i)); + iBytesRefs.accept(idxBytes); + BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i))); + dVByteRefs.accept(dvBytes); } - Query indexQuery = new TermInSetQuery(name(), iBytesRefs); - Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs); + Query indexQuery = new TermInSetQuery(name(), iBytesRefs.get()); + Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get()); return new IndexOrDocValuesQuery(indexQuery, dvQuery); } // if we only have doc_values enabled, we construct a new query with doc_values re-written if (hasDocValues()) { - BytesRef[] bytesRefs = new BytesRef[values.size()]; - for (int i = 0; i < bytesRefs.length; i++) { - bytesRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i))); + BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size()); + for (int i = 0; i < values.size(); i++) { + BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i))); + bytesCollector.accept(dvBytes); } - return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs); + return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get()); } // has index enabled, we're going to return the query as is return super.termsQuery(values, context); diff --git a/server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java index 78dae2d2c27fc..bedc799b3877d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java @@ -93,11 +93,12 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexed(); - BytesRef[] bytesRefs = new BytesRef[values.size()]; - for (int i = 0; i < bytesRefs.length; i++) { - bytesRefs[i] = indexedValueForSearch(values.get(i)); + BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size()); + for (int i = 0; i < values.size(); i++) { + BytesRef elem = indexedValueForSearch(values.get(i)); + bytesCollector.accept(elem); } - return new TermInSetQuery(name(), bytesRefs); + return new TermInSetQuery(name(), bytesCollector.get()); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/BytesRefsCollectionBuilderTests.java b/server/src/test/java/org/opensearch/index/mapper/BytesRefsCollectionBuilderTests.java new file mode 100644 index 0000000000000..cb4554fa93a4b --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/BytesRefsCollectionBuilderTests.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.util.BytesRef; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class BytesRefsCollectionBuilderTests extends OpenSearchTestCase { + + public void testBuildSortedNotSorted() { + String[] seedStrings = generateRandomStringArray(10, 10, false, true); + List bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList()); + List sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList()); + + Collection sortedSet = assertCollectionBuilt(sortedBytesRefs); + assertCollectionBuilt(bytesRefList); + + assertTrue(sortedSet.isEmpty() || sortedSet instanceof SortedSet); + if (!sortedSet.isEmpty()) { + assertNull(((SortedSet) sortedSet).comparator()); + } + } + + public void testBuildFooBar() { + String[] reverseOrderStrings = new String[] { "foo", "bar" }; + List bytesRefList = Arrays.stream(reverseOrderStrings).map(BytesRef::new).collect(Collectors.toList()); + List sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList()); + + Collection sortedSet = assertCollectionBuilt(sortedBytesRefs); + Collection reverseList = assertCollectionBuilt(bytesRefList); + + assertTrue(sortedSet instanceof SortedSet); + assertNull(((SortedSet) sortedSet).comparator()); + + assertTrue(reverseList instanceof List); + } + + public void testFrozen() { + BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1); + String[] seedStrings = generateRandomStringArray(5, 10, false, true); + Arrays.stream(seedStrings).map(BytesRef::new).forEachOrdered(builder); + Collection bytesRefCollection = builder.get(); + assertNotNull(bytesRefCollection); + assertEquals(seedStrings.length, bytesRefCollection.size()); + assertThrows(IllegalStateException.class, () -> builder.accept(new BytesRef("illegal state"))); + assertSame(bytesRefCollection, builder.get()); + } + + private static Collection assertCollectionBuilt(List sortedBytesRefs) { + BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1); + sortedBytesRefs.stream().forEachOrdered(builder); + Collection bytesRefCollection = builder.get(); + assertEquals(bytesRefCollection.size(), sortedBytesRefs.size()); + for (Iterator iterator = bytesRefCollection.iterator(), iterator2 = sortedBytesRefs.iterator(); iterator.hasNext() + || iterator2.hasNext();) { + assertTrue(iterator.next().bytesEquals(iterator2.next())); + } + return bytesRefCollection; + } + + public void testCoverUnsupported() { + BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1); + Stream.of("in", "order").map(BytesRef::new).forEachOrdered(builder); + SortedSet bytesRefCollection = (SortedSet) builder.get(); + assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.subSet(new BytesRef("a"), new BytesRef("z"))); + assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.headSet(new BytesRef("a"))); + assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.tailSet(new BytesRef("a"))); + assertThrows(UnsupportedOperationException.class, bytesRefCollection::first); + assertThrows(UnsupportedOperationException.class, bytesRefCollection::last); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index d52426c67d256..0d5843097f495 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -81,6 +81,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; public class KeywordFieldTypeTests extends FieldTypeTestCase { @@ -198,6 +199,27 @@ public void testTermsQuery() { ); } + public void testTermsSortedQuery() { + String[] seedStrings = generateRandomStringArray(10, 10, false, true); + List bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList()); + List sortedStrings = bytesRefList.stream().sorted().map(BytesRef::utf8ToString).collect(Collectors.toList()); + + MappedFieldType ft = new KeywordFieldType("field"); + Query expected = new IndexOrDocValuesQuery( + new TermInSetQuery("field", bytesRefList), + new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList) + ); + assertEquals(expected, ft.termsQuery(sortedStrings, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); + + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + Query expectedIndex = new TermInSetQuery("field", bytesRefList); + assertEquals(expectedIndex, onlyIndexed.termsQuery(sortedStrings, null)); + + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query expectedDocValues = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList); + assertEquals(expectedDocValues, onlyDocValues.termsQuery(sortedStrings, null)); + } + public void testExistsQuery() { { KeywordFieldType ft = new KeywordFieldType("field");