From a36aaa46d9bb89917bdb6c58b35e2c0769dfc322 Mon Sep 17 00:00:00 2001 From: Ruirui Zhang Date: Fri, 19 Sep 2025 13:55:38 -0700 Subject: [PATCH] restructure trie to store values as a set Signed-off-by: Ruirui Zhang --- CHANGELOG.md | 1 + .../rule/storage/AttributeValueStore.java | 21 ++++- .../storage/DefaultAttributeValueStore.java | 76 +++++++++---------- .../storage/AttributeValueStoreTests.java | 54 +++++++++++-- .../rule/InMemoryRuleProcessingService.java | 7 +- 5 files changed, 108 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d740e1fa958..d6abddd96cc46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920)) - Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060)) - The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065) ([#19097](https://github.com/opensearch-project/OpenSearch/pull/19097))) +- [Rule-based Auto-tagging] restructure the in-memory trie to store values as a set ([#19344](https://github.com/opensearch-project/OpenSearch/pull/19344)) - Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054)) - Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091)) - Use S3CrtClient for higher throughput while uploading files to S3 ([#18800](https://github.com/opensearch-project/OpenSearch/pull/18800)) diff --git a/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java b/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java index 98e9cc4041318..bfb2ba68a07d4 100644 --- a/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java +++ b/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java @@ -8,7 +8,10 @@ package org.opensearch.rule.storage; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.Set; /** * This interface provides apis to store Rule attribute values @@ -21,16 +24,32 @@ public interface AttributeValueStore { */ void put(K key, V value); + /** + * removes the key and associated value from attribute value store + * @param key key of the value to be removed + * @param value to be removed + */ + default void remove(K key, V value) { + remove(key); + } + /** * removes the key and associated value from attribute value store * @param key to be removed */ void remove(K key); + /** + * Returns the values associated with the key + * @param key in the data structure + */ + default List> getAll(K key) { + return new ArrayList<>(); + } + /** * Returns the value associated with the key * @param key in the data structure - * @return */ Optional get(K key); diff --git a/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java b/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java index c0f0313c383e7..00196f257d4d5 100644 --- a/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java +++ b/modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java @@ -10,8 +10,11 @@ import org.apache.commons.collections4.trie.PatriciaTrie; -import java.util.Map; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; /** @@ -21,7 +24,7 @@ * ref: https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/org/apache/commons/collections4/trie/PatriciaTrie.html */ public class DefaultAttributeValueStore implements AttributeValueStore { - private final PatriciaTrie trie; + private final PatriciaTrie> trie; private static final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); private static final ReentrantReadWriteLock.ReadLock readLock = lock.readLock(); private static final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock(); @@ -37,7 +40,7 @@ public DefaultAttributeValueStore() { * Main constructor * @param trie A Patricia Trie */ - public DefaultAttributeValueStore(PatriciaTrie trie) { + public DefaultAttributeValueStore(PatriciaTrie> trie) { this.trie = trie; } @@ -45,66 +48,57 @@ public DefaultAttributeValueStore(PatriciaTrie trie) { public void put(K key, V value) { writeLock.lock(); try { - trie.put(key, value); + trie.computeIfAbsent(key, k -> new HashSet<>()).add(value); } finally { writeLock.unlock(); } } @Override - public void remove(String key) { + public void remove(K key, V value) { writeLock.lock(); try { - trie.remove(key); + trie.computeIfPresent(key, (k, values) -> { + values.remove(value); + return values.isEmpty() ? null : values; + }); } finally { writeLock.unlock(); } } @Override - public Optional get(String key) { + public void remove(K key) { + throw new UnsupportedOperationException("This remove(K key) function is not supported within DefaultAttributeValueStore."); + } + + @Override + public Optional get(K key) { + throw new UnsupportedOperationException("This get(K key) function is not supported within DefaultAttributeValueStore."); + } + + @Override + public List> getAll(String key) { readLock.lock(); try { - /** - * Since we are inserting prefixes into the trie and searching for larger strings - * It is important to find the largest matching prefix key in the trie efficiently - * Hence we can do binary search - */ - final String longestMatchingPrefix = findLongestMatchingPrefix(key); + List> results = new ArrayList<>(); + StringBuilder prefixBuilder = new StringBuilder(key); - /** - * Now there are following cases for this prefix - * 1. There is a Rule which has this prefix as one of the attribute values. In this case we should return the - * Rule's label otherwise send empty - */ - for (Map.Entry possibleMatch : trie.prefixMap(longestMatchingPrefix).entrySet()) { - if (key.startsWith(possibleMatch.getKey())) { - return Optional.of(possibleMatch.getValue()); + for (int i = key.length(); i >= 0; i--) { + String prefix = prefixBuilder.toString(); + Set value = trie.get(prefix); + if (value != null && !value.isEmpty()) { + results.add(value); + } + if (!prefixBuilder.isEmpty()) { + prefixBuilder.deleteCharAt(prefixBuilder.length() - 1); } } + + return results; } finally { readLock.unlock(); } - return Optional.empty(); - } - - private String findLongestMatchingPrefix(String key) { - int low = 0; - int high = key.length() - 1; - - while (low < high) { - int mid = (high + low + 1) / 2; - /** - * This operation has O(1) complexity because prefixMap returns only the iterator - */ - if (!trie.prefixMap(key.substring(0, mid)).isEmpty()) { - low = mid; - } else { - high = mid - 1; - } - } - - return key.substring(0, low); } @Override diff --git a/modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java b/modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java index 2340cc3327337..dbd2e2063d87d 100644 --- a/modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java +++ b/modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java @@ -13,6 +13,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.Optional; +import java.util.Set; public class AttributeValueStoreTests extends OpenSearchTestCase { @@ -26,27 +28,33 @@ public void setUp() throws Exception { public void testPut() { subjectUnderTest.put("foo", "bar"); - assertEquals("bar", subjectUnderTest.get("foo").get()); + assertEquals("bar", subjectUnderTest.getAll("foo").getFirst().iterator().next()); + subjectUnderTest.put("foo", "sing"); + assertEquals(1, subjectUnderTest.getAll("foo").size()); + assertEquals(2, subjectUnderTest.getAll("foo").get(0).size()); + assertTrue(subjectUnderTest.getAll("foo").get(0).contains("sing")); } public void testRemove() { subjectUnderTest.put("foo", "bar"); - subjectUnderTest.remove("foo"); + subjectUnderTest.remove("foo", "bar"); assertEquals(0, subjectUnderTest.size()); } public void tesGet() { subjectUnderTest.put("foo", "bar"); - assertEquals("bar", subjectUnderTest.get("foo").get()); + assertEquals("bar", subjectUnderTest.getAll("foo").getFirst()); + subjectUnderTest.put("foo", "sing"); + assertEquals(2, subjectUnderTest.getAll("foo").size()); } public void testGetWhenNoProperPrefixIsPresent() { subjectUnderTest.put("foo", "bar"); subjectUnderTest.put("foodip", "sing"); - assertTrue(subjectUnderTest.get("foxtail").isEmpty()); + assertTrue(subjectUnderTest.getAll("foxtail").isEmpty()); subjectUnderTest.put("fox", "lucy"); - assertFalse(subjectUnderTest.get("foxtail").isEmpty()); + assertFalse(subjectUnderTest.getAll("foxtail").isEmpty()); } public void testClear() { @@ -97,7 +105,7 @@ public void run() { try { Thread.sleep(random().nextInt(100)); for (String key : toReadKeys) { - subjectUnderTest.get(key); + subjectUnderTest.getAll(key); } } catch (InterruptedException e) {} } @@ -123,4 +131,38 @@ public void run() { } catch (InterruptedException e) {} } } + + public void testDefaultMethods() { + class DummyStore implements AttributeValueStore { + boolean removeCalled = false; + + @Override + public void put(String key, String value) {} + + @Override + public void remove(String key) { + removeCalled = true; + } + + @Override + public Optional get(String key) { + return Optional.empty(); + } + + @Override + public void clear() {} + + @Override + public int size() { + return 0; + } + } + + DummyStore store = new DummyStore(); + store.remove("foo", "bar"); + assertTrue(store.removeCalled); + List> result = store.getAll("foo"); + assertNotNull(result); + assertTrue(result.isEmpty()); + } } diff --git a/modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java b/modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java index f44b5817fc6be..7cf8b3bf8daec 100644 --- a/modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java +++ b/modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java @@ -66,7 +66,7 @@ private void perform(Rule rule, BiConsumer>, Ru private void removeOperation(Map.Entry> attributeEntry, Rule rule) { AttributeValueStore valueStore = attributeValueStoreFactory.getAttributeValueStore(attributeEntry.getKey()); for (String value : attributeEntry.getValue()) { - valueStore.remove(value.replace(WILDCARD, "")); + valueStore.remove(value.replace(WILDCARD, ""), rule.getFeatureValue()); } } @@ -92,12 +92,13 @@ public Optional evaluateLabel(List> attribute attributeExtractor.getAttribute() ); for (String value : attributeExtractor.extract()) { - Optional possibleMatch = valueStore.get(value); + List> candidateMatches = valueStore.getAll(value); - if (possibleMatch.isEmpty()) { + if (candidateMatches == null || candidateMatches.isEmpty()) { return Optional.empty(); } + Optional possibleMatch = candidateMatches.get(0).stream().findAny(); if (result.isEmpty()) { result = possibleMatch; } else {