Skip to content

Commit 00f931c

Browse files
committed
restructure trie to store values as a set
Signed-off-by: Ruirui Zhang <[email protected]>
1 parent 4f987eb commit 00f931c

File tree

5 files changed

+62
-50
lines changed

5 files changed

+62
-50
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
1010
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
1111
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065))
12+
- [Rule-based Auto-tagging] restructure the in-memory trie to store values as a set ([#19344](https://github.com/opensearch-project/OpenSearch/pull/19344))
1213
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
1314
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))
1415
- Use S3CrtClient for higher throughput while uploading files to S3 ([#18800](https://github.com/opensearch-project/OpenSearch/pull/18800))

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
package org.opensearch.rule.storage;
1010

11+
import java.util.List;
1112
import java.util.Optional;
13+
import java.util.Set;
1214

1315
/**
1416
* This interface provides apis to store Rule attribute values
@@ -21,18 +23,36 @@ public interface AttributeValueStore<K, V> {
2123
*/
2224
void put(K key, V value);
2325

26+
/**
27+
* removes the key and associated value from attribute value store
28+
* @param key key of the value to be removed
29+
* @param value to be removed
30+
*/
31+
void remove(K key, V value);
32+
2433
/**
2534
* removes the key and associated value from attribute value store
2635
* @param key to be removed
2736
*/
28-
void remove(K key);
37+
@Deprecated
38+
default void removeOld(K key) {};
39+
40+
/**
41+
* Returns the value associated with the key
42+
* @param key in the data structure
43+
* @return
44+
*/
45+
List<Set<V>> get(K key);
2946

3047
/**
3148
* Returns the value associated with the key
3249
* @param key in the data structure
3350
* @return
3451
*/
35-
Optional<V> get(K key);
52+
@Deprecated
53+
default Optional<V> getOld(K key) {
54+
return Optional.empty();
55+
};
3656

3757
/**
3858
* Clears all the keys and their associated values from the attribute value store

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java

Lines changed: 26 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010

1111
import org.apache.commons.collections4.trie.PatriciaTrie;
1212

13-
import java.util.Map;
14-
import java.util.Optional;
13+
import java.util.ArrayList;
14+
import java.util.HashSet;
15+
import java.util.List;
16+
import java.util.Set;
1517
import java.util.concurrent.locks.ReentrantReadWriteLock;
1618

1719
/**
@@ -21,7 +23,7 @@
2123
* ref: https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/org/apache/commons/collections4/trie/PatriciaTrie.html
2224
*/
2325
public class DefaultAttributeValueStore<K extends String, V> implements AttributeValueStore<K, V> {
24-
private final PatriciaTrie<V> trie;
26+
private final PatriciaTrie<Set<V>> trie;
2527
private static final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
2628
private static final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
2729
private static final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
@@ -37,74 +39,56 @@ public DefaultAttributeValueStore() {
3739
* Main constructor
3840
* @param trie A Patricia Trie
3941
*/
40-
public DefaultAttributeValueStore(PatriciaTrie<V> trie) {
42+
public DefaultAttributeValueStore(PatriciaTrie<Set<V>> trie) {
4143
this.trie = trie;
4244
}
4345

4446
@Override
4547
public void put(K key, V value) {
4648
writeLock.lock();
4749
try {
48-
trie.put(key, value);
50+
trie.computeIfAbsent(key, k -> new HashSet<>()).add(value);
4951
} finally {
5052
writeLock.unlock();
5153
}
5254
}
5355

5456
@Override
55-
public void remove(String key) {
57+
public void remove(K key, V value) {
5658
writeLock.lock();
5759
try {
58-
trie.remove(key);
60+
trie.computeIfPresent(key, (k, values) -> {
61+
assert values.contains(value);
62+
values.remove(value);
63+
return values.isEmpty() ? null : values;
64+
});
5965
} finally {
6066
writeLock.unlock();
6167
}
6268
}
6369

6470
@Override
65-
public Optional<V> get(String key) {
71+
public List<Set<V>> get(String key) {
6672
readLock.lock();
6773
try {
68-
/**
69-
* Since we are inserting prefixes into the trie and searching for larger strings
70-
* It is important to find the largest matching prefix key in the trie efficiently
71-
* Hence we can do binary search
72-
*/
73-
final String longestMatchingPrefix = findLongestMatchingPrefix(key);
74+
List<Set<V>> results = new ArrayList<>();
75+
StringBuilder prefixBuilder = new StringBuilder(key);
7476

75-
/**
76-
* Now there are following cases for this prefix
77-
* 1. There is a Rule which has this prefix as one of the attribute values. In this case we should return the
78-
* Rule's label otherwise send empty
79-
*/
80-
for (Map.Entry<String, V> possibleMatch : trie.prefixMap(longestMatchingPrefix).entrySet()) {
81-
if (key.startsWith(possibleMatch.getKey())) {
82-
return Optional.of(possibleMatch.getValue());
77+
for (int i = key.length(); i >= 0; i--) {
78+
String prefix = prefixBuilder.toString();
79+
Set<V> value = trie.get(prefix);
80+
if (value != null && !value.isEmpty()) {
81+
results.add(value);
82+
}
83+
if (!prefixBuilder.isEmpty()) {
84+
prefixBuilder.deleteCharAt(prefixBuilder.length() - 1);
8385
}
8486
}
87+
88+
return results;
8589
} finally {
8690
readLock.unlock();
8791
}
88-
return Optional.empty();
89-
}
90-
91-
private String findLongestMatchingPrefix(String key) {
92-
int low = 0;
93-
int high = key.length() - 1;
94-
95-
while (low < high) {
96-
int mid = (high + low + 1) / 2;
97-
/**
98-
* This operation has O(1) complexity because prefixMap returns only the iterator
99-
*/
100-
if (!trie.prefixMap(key.substring(0, mid)).isEmpty()) {
101-
low = mid;
102-
} else {
103-
high = mid - 1;
104-
}
105-
}
106-
107-
return key.substring(0, low);
10892
}
10993

11094
@Override

modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,24 @@ public void setUp() throws Exception {
2626

2727
public void testPut() {
2828
subjectUnderTest.put("foo", "bar");
29-
assertEquals("bar", subjectUnderTest.get("foo").get());
29+
assertEquals("bar", subjectUnderTest.get("foo").getFirst().iterator().next());
30+
subjectUnderTest.put("foo", "sing");
31+
assertEquals(1, subjectUnderTest.get("foo").size());
32+
assertEquals(2, subjectUnderTest.get("foo").get(0).size());
33+
assertTrue(subjectUnderTest.get("foo").get(0).contains("sing"));
3034
}
3135

3236
public void testRemove() {
3337
subjectUnderTest.put("foo", "bar");
34-
subjectUnderTest.remove("foo");
38+
subjectUnderTest.remove("foo", "bar");
3539
assertEquals(0, subjectUnderTest.size());
3640
}
3741

3842
public void tesGet() {
3943
subjectUnderTest.put("foo", "bar");
40-
assertEquals("bar", subjectUnderTest.get("foo").get());
44+
assertEquals("bar", subjectUnderTest.get("foo").getFirst());
45+
subjectUnderTest.put("foo", "sing");
46+
assertEquals(2, subjectUnderTest.get("foo").size());
4147
}
4248

4349
public void testGetWhenNoProperPrefixIsPresent() {

modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Ru
6666
private void removeOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule rule) {
6767
AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(attributeEntry.getKey());
6868
for (String value : attributeEntry.getValue()) {
69-
valueStore.remove(value.replace(WILDCARD, ""));
69+
valueStore.remove(value.replace(WILDCARD, ""), rule.getFeatureValue());
7070
}
7171
}
7272

@@ -92,12 +92,13 @@ public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attribute
9292
attributeExtractor.getAttribute()
9393
);
9494
for (String value : attributeExtractor.extract()) {
95-
Optional<String> possibleMatch = valueStore.get(value);
95+
List<Set<String>> candidateMatches = valueStore.get(value);
9696

97-
if (possibleMatch.isEmpty()) {
97+
if (candidateMatches == null || candidateMatches.isEmpty()) {
9898
return Optional.empty();
9999
}
100100

101+
Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
101102
if (result.isEmpty()) {
102103
result = possibleMatch;
103104
} else {

0 commit comments

Comments
 (0)