Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Rule Based Auto-tagging] Add in-memory attribute value store ([#17342](https://github.com/opensearch-project/OpenSearch/pull/17342))
- [Rule Based Auto-tagging] Add in-memory rule processing service ([#17365](https://github.com/opensearch-project/OpenSearch/pull/17365))
- Change priority for scheduling reroute during timeout([#16445](https://github.com/opensearch-project/OpenSearch/pull/16445))
- Faster `terms_query` with already sorted terms ([#17714](https://github.com/opensearch-project/OpenSearch/pull/17714))

### Dependencies
- Bump `dnsjava:dnsjava` from 3.6.2 to 3.6.3 ([#17231](https://github.com/opensearch-project/OpenSearch/pull/17231))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BytesRef;

import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;

/**
* Purposed for passing terms into {@link TermInSetQuery}.
* If the given terms are sorted already, it wrap it with a SortedSet stub.
* Otherwise, it passes terms as list.
*/
public class BytesRefsCollectionBuilder implements Consumer<BytesRef>, Supplier<Collection<BytesRef>> {

/**
* Strategy for building BytesRef collection.
* */
protected interface ConsumerStrategy extends Function<BytesRef, ConsumerStrategy>, Supplier<Collection<BytesRef>> {}

public BytesRefsCollectionBuilder(int sizeExpected) {
terms = new ArrayList<>(sizeExpected);
}

protected final List<BytesRef> terms;
protected ConsumerStrategy delegate = createStartStrategy();

@Override
public void accept(BytesRef bytesRef) {
delegate = delegate.apply(bytesRef);
}

@Override
public Collection<BytesRef> get() {
Collection<BytesRef> result = delegate.get();
delegate = createFrozenStrategy(result);
return result;
}

protected ConsumerStrategy createStartStrategy() {
return new ConsumerStrategy() {
@Override
public ConsumerStrategy apply(BytesRef firstBytes) {
terms.add(firstBytes); // firstly, just store
return createSortedStrategy(firstBytes);
}

@Override
public Collection<BytesRef> get() {
return terms; // empty list

Check warning on line 66 in server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/mapper/BytesRefsCollectionBuilder.java#L66

Added line #L66 was not covered by tests
}
};
}

protected ConsumerStrategy createSortedStrategy(BytesRef firstBytes) {
return new ConsumerStrategy() {
BytesRef prev = firstBytes;

@Override
public ConsumerStrategy apply(BytesRef bytesRef) {
terms.add(bytesRef);
if (bytesRef.compareTo(prev) >= 0) { // keep checking sorted
prev = bytesRef;
return this;
} else { // isn't sorted
return createUnsortedStrategy();
}
}

@Override
public Collection<BytesRef> get() {
return new SortedBytesSet(terms);
}
};
}

protected ConsumerStrategy createUnsortedStrategy() {
return new ConsumerStrategy() {
@Override
public ConsumerStrategy apply(BytesRef bytesRef) { // just storing
terms.add(bytesRef);
return this;
}

@Override
public Collection<BytesRef> get() {
return terms;
}
};
}

protected ConsumerStrategy createFrozenStrategy(Collection<BytesRef> result) {
return new ConsumerStrategy() {

@Override
public ConsumerStrategy apply(BytesRef bytesRef) {
throw new IllegalStateException("already build");
}

@Override
public Collection<BytesRef> get() {
return result;
}
};
}

/**
* {@link SortedSet<BytesRef>} for passing into TermInSetQuery()
* */
protected static class SortedBytesSet extends AbstractSet<BytesRef> implements SortedSet<BytesRef> {

private final List<BytesRef> bytesRefs;

public SortedBytesSet(List<BytesRef> bytesRefs) {
this.bytesRefs = bytesRefs;
}

@Override
public Iterator<BytesRef> iterator() {
return bytesRefs.iterator();
}

@Override
public int size() {
return bytesRefs.size();
}

@Override
public Comparator<? super BytesRef> comparator() {
return null;
}

@Override
public SortedSet<BytesRef> subSet(BytesRef fromElement, BytesRef toElement) {
throw new UnsupportedOperationException();
}

@Override
public SortedSet<BytesRef> headSet(BytesRef toElement) {
throw new UnsupportedOperationException();
}

@Override
public SortedSet<BytesRef> tailSet(BytesRef fromElement) {
throw new UnsupportedOperationException();
}

@Override
public BytesRef first() {
throw new UnsupportedOperationException();
}

@Override
public BytesRef last() {
throw new UnsupportedOperationException();
}

/**
* Dedicated for {@link TermInSetQuery#TermInSetQuery(String, Collection)}.
*/
@Override
public <T> T[] toArray(T[] a) {
return bytesRefs.toArray(a);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,15 +163,15 @@ public Query existsQuery(QueryShardContext context) {
@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexed();
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
BytesRefsCollectionBuilder bytesRefs = new BytesRefsCollectionBuilder(values.size());
for (int i = 0; i < values.size(); i++) {
Object idObject = values.get(i);
if (idObject instanceof BytesRef) {
idObject = ((BytesRef) idObject).utf8ToString();
}
bytesRefs[i] = Uid.encodeId(idObject.toString());
bytesRefs.accept(Uid.encodeId(idObject.toString()));
}
return new TermInSetQuery(name(), bytesRefs);
return new TermInSetQuery(name(), bytesRefs.get());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,23 +447,26 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.termsQuery(values, context);
}
BytesRef[] iBytesRefs = new BytesRef[values.size()];
BytesRef[] dVByteRefs = new BytesRef[values.size()];
for (int i = 0; i < iBytesRefs.length; i++) {
iBytesRefs[i] = indexedValueForSearch(values.get(i));
dVByteRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i)));
BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size());
BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size());
for (int i = 0; i < values.size(); i++) {
BytesRef idxBytes = indexedValueForSearch(values.get(i));
iBytesRefs.accept(idxBytes);
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
dVByteRefs.accept(dvBytes);
}
Query indexQuery = new TermInSetQuery(name(), iBytesRefs);
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs);
Query indexQuery = new TermInSetQuery(name(), iBytesRefs.get());
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get());
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
}
// if we only have doc_values enabled, we construct a new query with doc_values re-written
if (hasDocValues()) {
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(rewriteForDocValue(values.get(i)));
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
for (int i = 0; i < values.size(); i++) {
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
bytesCollector.accept(dvBytes);
}
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs);
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get());
}
// has index enabled, we're going to return the query as is
return super.termsQuery(values, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ public Query termQuery(Object value, QueryShardContext context) {
@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexed();
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(values.get(i));
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
for (int i = 0; i < values.size(); i++) {
BytesRef elem = indexedValueForSearch(values.get(i));
bytesCollector.accept(elem);
}
return new TermInSetQuery(name(), bytesRefs);
return new TermInSetQuery(name(), bytesCollector.get());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.util.BytesRef;
import org.opensearch.test.OpenSearchTestCase;

import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class BytesRefsCollectionBuilderTests extends OpenSearchTestCase {

public void testBuildSortedNotSorted() {
String[] seedStrings = generateRandomStringArray(10, 10, false, true);
List<BytesRef> bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList());
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList());

Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
assertCollectionBuilt(bytesRefList);

assertTrue(sortedSet.isEmpty() || sortedSet instanceof SortedSet);
if (!sortedSet.isEmpty()) {
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());
}
}

public void testBuildFooBar() {
String[] reverseOrderStrings = new String[] { "foo", "bar" };
List<BytesRef> bytesRefList = Arrays.stream(reverseOrderStrings).map(BytesRef::new).collect(Collectors.toList());
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().collect(Collectors.toList());

Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
Collection<BytesRef> reverseList = assertCollectionBuilt(bytesRefList);

assertTrue(sortedSet instanceof SortedSet);
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());

assertTrue(reverseList instanceof List);
}

public void testFrozen() {
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
String[] seedStrings = generateRandomStringArray(5, 10, false, true);
Arrays.stream(seedStrings).map(BytesRef::new).forEachOrdered(builder);
Collection<BytesRef> bytesRefCollection = builder.get();
assertNotNull(bytesRefCollection);
assertEquals(seedStrings.length, bytesRefCollection.size());
assertThrows(IllegalStateException.class, () -> builder.accept(new BytesRef("illegal state")));
assertSame(bytesRefCollection, builder.get());
}

private static Collection<BytesRef> assertCollectionBuilt(List<BytesRef> sortedBytesRefs) {
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
sortedBytesRefs.stream().forEachOrdered(builder);
Collection<BytesRef> bytesRefCollection = builder.get();
assertEquals(bytesRefCollection.size(), sortedBytesRefs.size());
for (Iterator<BytesRef> iterator = bytesRefCollection.iterator(), iterator2 = sortedBytesRefs.iterator(); iterator.hasNext()
|| iterator2.hasNext();) {
assertTrue(iterator.next().bytesEquals(iterator2.next()));
}
return bytesRefCollection;
}

public void testCoverUnsupported() {
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
Stream.of("in", "order").map(BytesRef::new).forEachOrdered(builder);
SortedSet<BytesRef> bytesRefCollection = (SortedSet<BytesRef>) builder.get();
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.subSet(new BytesRef("a"), new BytesRef("z")));
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.headSet(new BytesRef("a")));
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.tailSet(new BytesRef("a")));
assertThrows(UnsupportedOperationException.class, bytesRefCollection::first);
assertThrows(UnsupportedOperationException.class, bytesRefCollection::last);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class KeywordFieldTypeTests extends FieldTypeTestCase {

Expand Down Expand Up @@ -198,6 +199,27 @@ public void testTermsQuery() {
);
}

public void testTermsSortedQuery() {
String[] seedStrings = generateRandomStringArray(10, 10, false, true);
List<BytesRef> bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).collect(Collectors.toList());
List<String> sortedStrings = bytesRefList.stream().sorted().map(BytesRef::utf8ToString).collect(Collectors.toList());

MappedFieldType ft = new KeywordFieldType("field");
Query expected = new IndexOrDocValuesQuery(
new TermInSetQuery("field", bytesRefList),
new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList)
);
assertEquals(expected, ft.termsQuery(sortedStrings, MOCK_QSC_ENABLE_INDEX_DOC_VALUES));

MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap());
Query expectedIndex = new TermInSetQuery("field", bytesRefList);
assertEquals(expectedIndex, onlyIndexed.termsQuery(sortedStrings, null));

MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap());
Query expectedDocValues = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", bytesRefList);
assertEquals(expectedDocValues, onlyDocValues.termsQuery(sortedStrings, null));
}

public void testExistsQuery() {
{
KeywordFieldType ft = new KeywordFieldType("field");
Expand Down
Loading