From 2d4d0169609f8389eb33d150c74a01dc510b1ebc Mon Sep 17 00:00:00 2001 From: Ruxo Zheng Date: Fri, 8 Sep 2023 19:26:01 +0700 Subject: [PATCH 1/3] Implement Remove for Trie --- TrieNet.Test/BaseTrieTest.cs | 11 ++++-- TrieNet.Test/Performance/FakeTrie.cs | 8 ++++ TrieNet.Test/TrieTest.cs | 44 ++++++++++++++++++++++ TrieNet/ITrie.cs | 2 + TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs | 8 ++++ TrieNet/PatriciaTrie/PatriciaTrie.cs | 8 ++++ TrieNet/Trie/ConcurrentTrie.cs | 8 ++++ TrieNet/Trie/SuffixTrie.cs | 8 ++++ TrieNet/Trie/Trie.cs | 8 ++++ TrieNet/Trie/TrieNode.cs | 14 +++++-- TrieNet/Trie/TrieNodeBase.cs | 14 +++++++ TrieNet/Ukkonen/CharUkkonenTrie.cs | 8 ++++ 12 files changed, 134 insertions(+), 7 deletions(-) diff --git a/TrieNet.Test/BaseTrieTest.cs b/TrieNet.Test/BaseTrieTest.cs index 6f1deca..c03e925 100644 --- a/TrieNet.Test/BaseTrieTest.cs +++ b/TrieNet.Test/BaseTrieTest.cs @@ -13,15 +13,20 @@ namespace TrieNet.Test; public abstract class BaseTrieTest { [OneTimeSetUp] public virtual void Setup() { - Trie = CreateTrie(); - for (var i = 0; i < Words40.Length; i++) Trie.Add(Words40[i], i); + Trie = CreateDefaultTrie(); + } + + public ITrie CreateDefaultTrie() { + var trie = CreateTrie(); + for (var i = 0; i < Words40.Length; i++) trie.Add(Words40[i], i); + return trie; } protected ITrie Trie { get; private set; } protected abstract ITrie CreateTrie(); - public readonly string[] Words40 = { + public static readonly string[] Words40 = { "daubreelite", "daubingly", "daubingly", diff --git a/TrieNet.Test/Performance/FakeTrie.cs b/TrieNet.Test/Performance/FakeTrie.cs index d6e0da5..b0122de 100644 --- a/TrieNet.Test/Performance/FakeTrie.cs +++ b/TrieNet.Test/Performance/FakeTrie.cs @@ -24,4 +24,12 @@ public void Add(string key, T value) { var keyValPair = new KeyValuePair(key, value); stack.Push(keyValPair); } + + public void Remove(string key, T value) { + throw new System.NotImplementedException(); + } + + public void Remove(string key, params T[] values) { + throw new System.NotImplementedException(); + } } \ No newline at end of file diff --git a/TrieNet.Test/TrieTest.cs b/TrieNet.Test/TrieTest.cs index 8236969..354b8c2 100644 --- a/TrieNet.Test/TrieTest.cs +++ b/TrieNet.Test/TrieTest.cs @@ -1,6 +1,7 @@ // This code is distributed under MIT license. Copyright (c) 2013 George Mamaladze // See license.txt or http://opensource.org/licenses/mit-license.php +using System; using System.Linq; using NUnit.Framework; using TrieNet.Trie; @@ -23,4 +24,47 @@ public void ExhaustiveParallelAddFails() { .ForAll(phrase => trie.Add(phrase, phrase.GetHashCode())); } } + + [Test] + public void RemoveKey() { + var trie = CreateDefaultTrie(); + + Assert.AreEqual(new [] { 21, 22, 23}, trie.Retrieve("capo")); + Assert.AreEqual(new [] { 22, 23}, trie.Retrieve("capoc")); + + trie.Remove("capoc", 22); + + Assert.AreEqual(new [] { 21, 23}, trie.Retrieve("capo")); + Assert.AreEqual(new [] { 23}, trie.Retrieve("capoc")); + } + + [Test] + public void RemoveMultipleKeys() { + var trie = CreateDefaultTrie(); + + trie.Remove("capoc", 22, 23); + + Assert.AreEqual(new [] { 21}, trie.Retrieve("capo")); + Assert.AreEqual(Enumerable.Empty(), trie.Retrieve("capoc")); + } + + [Test] + public void RemovePartialMatchedKey_RemoveAllEntries() { + var trie = CreateDefaultTrie(); + + trie.Remove("capo", 22, 23); + + Assert.AreEqual(new [] { 21 }, trie.Retrieve("capo")); + Assert.AreEqual(Array.Empty(), trie.Retrieve("capoc")); + } + + [Test] + public void RemoveLongerKey_HasNoEffect() { + var trie = CreateDefaultTrie(); + + trie.Remove("capocissss", 22); + + Assert.AreEqual(new [] { 21, 22, 23}, trie.Retrieve("capo")); + Assert.AreEqual(new [] { 22, 23}, trie.Retrieve("capoc")); + } } \ No newline at end of file diff --git a/TrieNet/ITrie.cs b/TrieNet/ITrie.cs index 12483c9..f4241c3 100644 --- a/TrieNet/ITrie.cs +++ b/TrieNet/ITrie.cs @@ -14,4 +14,6 @@ namespace TrieNet; public interface ITrie { IEnumerable Retrieve(string query); void Add(string key, TValue value); + void Remove(string key, TValue value); + void Remove(string key, params TValue[] values); } \ No newline at end of file diff --git a/TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs b/TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs index 6e21ee1..f1d16fb 100644 --- a/TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs +++ b/TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs @@ -39,6 +39,14 @@ public void Add(string key, TValue value) { innerTrie.Add(currentSuffix, new WordPosition(position, value)); } + public void Remove(string key, TValue value) { + throw new NotSupportedException(); + } + + public void Remove(string key, params TValue[] values) { + throw new NotImplementedException(); + } + private static IEnumerable> GetAllSuffixes(int minSuffixLength, string word) { for (var i = word.Length - minSuffixLength; i >= 0; i--) yield return new Tuple(new StringPartition(word, i), i); diff --git a/TrieNet/PatriciaTrie/PatriciaTrie.cs b/TrieNet/PatriciaTrie/PatriciaTrie.cs index 386a00a..60300f1 100644 --- a/TrieNet/PatriciaTrie/PatriciaTrie.cs +++ b/TrieNet/PatriciaTrie/PatriciaTrie.cs @@ -25,6 +25,14 @@ public virtual void Add(string key, TValue value) { Add(new StringPartition(key), value); } + public void Remove(string key, TValue value) { + throw new NotSupportedException(); + } + + public void Remove(string key, params TValue[] values) { + throw new NotImplementedException(); + } + internal override void Add(StringPartition keyRest, TValue value) { GetOrCreateChild(keyRest, value); } diff --git a/TrieNet/Trie/ConcurrentTrie.cs b/TrieNet/Trie/ConcurrentTrie.cs index 7e1c3b4..35caacc 100644 --- a/TrieNet/Trie/ConcurrentTrie.cs +++ b/TrieNet/Trie/ConcurrentTrie.cs @@ -15,4 +15,12 @@ public IEnumerable Retrieve(string query) { public void Add(string key, TValue value) { Add(key, 0, value); } + + public void Remove(string key, TValue value) { + throw new NotSupportedException(); + } + + public void Remove(string key, params TValue[] values) { + throw new NotImplementedException(); + } } \ No newline at end of file diff --git a/TrieNet/Trie/SuffixTrie.cs b/TrieNet/Trie/SuffixTrie.cs index 3dbd1a5..ec7a186 100644 --- a/TrieNet/Trie/SuffixTrie.cs +++ b/TrieNet/Trie/SuffixTrie.cs @@ -39,6 +39,14 @@ public void Add(string key, T value) { innerTrie.Add(suffix, new WordPosition(position, value)); } + public void Remove(string key, T value) { + throw new NotImplementedException(); + } + + public void Remove(string key, params T[] values) { + throw new NotImplementedException(); + } + private static IEnumerable> GetAllSuffixes(int minSuffixLength, string word) { for (var i = word.Length - minSuffixLength; i >= 0; i--) { var partition = new StringPartition(word, i); diff --git a/TrieNet/Trie/Trie.cs b/TrieNet/Trie/Trie.cs index 9897f81..b7d09fb 100644 --- a/TrieNet/Trie/Trie.cs +++ b/TrieNet/Trie/Trie.cs @@ -15,4 +15,12 @@ public IEnumerable Retrieve(string query) { public void Add(string key, TValue value) { Add(key, 0, value); } + + public void Remove(string key, TValue value) { + RemoveFromKey(key, new [] { value }); + } + + public void Remove(string key, params TValue[] values) { + RemoveFromKey(key, values); + } } \ No newline at end of file diff --git a/TrieNet/Trie/TrieNode.cs b/TrieNet/Trie/TrieNode.cs index 687d927..e5da87e 100644 --- a/TrieNet/Trie/TrieNode.cs +++ b/TrieNet/Trie/TrieNode.cs @@ -3,17 +3,18 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; namespace TrieNet.Trie; [Serializable] public class TrieNode : TrieNodeBase { private readonly Dictionary> children; - private readonly Queue values; + private readonly List values = new(); protected TrieNode() { children = new Dictionary>(); - values = new Queue(); } protected override int KeyLength => 1; @@ -35,7 +36,8 @@ protected override TrieNodeBase GetOrCreateChild(char key) { return result; } - protected override TrieNodeBase GetChildOrNull(string query, int position) { + [return: MaybeNull] + protected override TrieNodeBase GetChildOrNull([NotNull] string query, int position) { if (query == null) throw new ArgumentNullException(nameof(query)); return children.TryGetValue(query[position], out var childNode) @@ -44,6 +46,10 @@ protected override TrieNodeBase GetChildOrNull(string query, int positio } protected override void AddValue(TValue value) { - values.Enqueue(value); + values.Add(value); + } + + protected override void RemoveAll(TValue[] nodeValues) { + values.RemoveAll(v => nodeValues.Any(nv => v is not null && v.Equals(nv) || (v is null && nv is null))); } } \ No newline at end of file diff --git a/TrieNet/Trie/TrieNodeBase.cs b/TrieNet/Trie/TrieNodeBase.cs index f58ca29..a5b5b20 100644 --- a/TrieNet/Trie/TrieNodeBase.cs +++ b/TrieNet/Trie/TrieNodeBase.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; namespace TrieNet.Trie; @@ -32,6 +33,18 @@ public void Add(string key, int position, TValue value) { protected abstract void AddValue(TValue value); + protected virtual void RemoveAll(TValue[] values) { + throw new NotSupportedException(); + } + + protected void RemoveFromKey(string key, TValue[] values) { + if (key == null) throw new ArgumentNullException(nameof(key)); + var child = Enumerable.Range(0, key.Length).Aggregate(this, (node, position) => node?.GetChildOrNull(key, position)); + if (child is not null) + foreach(var node in child.Subtree()) + node.RemoveAll(values); + } + protected abstract TrieNodeBase GetOrCreateChild(char key); protected virtual IEnumerable Retrieve(string query, int position) { @@ -50,6 +63,7 @@ protected virtual IEnumerable SearchDeep(string query, int position) { protected abstract TrieNodeBase GetChildOrNull(string query, int position); + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool EndOfString(int position, string text) { return position >= text.Length; } diff --git a/TrieNet/Ukkonen/CharUkkonenTrie.cs b/TrieNet/Ukkonen/CharUkkonenTrie.cs index 896953a..29b8910 100644 --- a/TrieNet/Ukkonen/CharUkkonenTrie.cs +++ b/TrieNet/Ukkonen/CharUkkonenTrie.cs @@ -15,6 +15,14 @@ public void Add(string key, TValue value) { Add(key.AsMemory(), value); } + public void Remove(string key, TValue value) { + throw new NotImplementedException(); + } + + public void Remove(string key, params TValue[] values) { + throw new NotImplementedException(); + } + public IEnumerable Retrieve(string query) { return Retrieve(query.AsSpan()); } From e958ad0b721977dd5b483bc6ab553305c09942c0 Mon Sep 17 00:00:00 2001 From: Ruxo Zheng Date: Fri, 8 Sep 2023 21:09:18 +0700 Subject: [PATCH 2/3] Implement Remove for UkkonenTrie --- README.md | 13 +++++++-- TrieNet.Test/SuffixTrieTest.cs | 4 +-- TrieNet.Test/UkkonenTreeTest.cs | 44 ++++++++++++++++++++++++++++++ TrieNet/Ukkonen/CharUkkonenTrie.cs | 4 +-- TrieNet/Ukkonen/Node.cs | 9 ++++++ TrieNet/Ukkonen/UkkonenTrie.cs | 10 +++++++ 6 files changed, 78 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6a303b8..3e5dcd6 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,10 @@ This small library contains a bunch of trie data structures all having the same public interface ITrie { IEnumerable Retrieve(string query); void Add(string key, TValue value); + + // Note, only Trie and UkkonenTrie are only supported. + void Remove(string key, TValue value); + void Remove(string key, params TValue[] values); } ``` @@ -61,6 +65,12 @@ public interface IGenericTrie where TKey : IEquatable { At the moment only `UkkonenTrie` implements this interface. +## Removal + +The current implementation allows for item removal in two specific tree types: `Trie` and `UkkonenTree`. + +Please note that removing an item does **not** optimize or alter the internal tree structure. This limitation exists due to the complexities involved in reducing a trie tree without adversely affecting its other functionalities. + ## Performance All diagrams are given in logarithmic scale on the x-axis and y-axis. @@ -74,5 +84,4 @@ All diagrams are given in logarithmic scale on the x-axis and y-axis. The app demonstrates indexing of large text files and look-up inside them. Indexing usually takes only a few seconds and the look-up delay will be unnoticeable for the user. -![](https://raw.githubusercontent.com/OliBomby/trienet/master/img/trie-demo-app.png) - +![](https://raw.githubusercontent.com/OliBomby/trienet/master/img/trie-demo-app.png) \ No newline at end of file diff --git a/TrieNet.Test/SuffixTrieTest.cs b/TrieNet.Test/SuffixTrieTest.cs index 20ec412..80377e1 100644 --- a/TrieNet.Test/SuffixTrieTest.cs +++ b/TrieNet.Test/SuffixTrieTest.cs @@ -10,7 +10,7 @@ namespace TrieNet.Test; public class SuffixTrieTest { - public readonly string[] Words20 = { + public static readonly string[] Words20 = { "overcontribution", "overcontribute", "overcontraction", @@ -45,7 +45,7 @@ public virtual void Setup() { Trie2.Add("aabacdefac", 0); Trie2.Add("aabacdefac", 1); } - + [TestCase("a", new[] { 0, 1, 3, 8 })] [TestCase("b", new[] { 2 })] [TestCase("c", new[] { 4, 9 })] diff --git a/TrieNet.Test/UkkonenTreeTest.cs b/TrieNet.Test/UkkonenTreeTest.cs index 08c37e2..14e05ec 100644 --- a/TrieNet.Test/UkkonenTreeTest.cs +++ b/TrieNet.Test/UkkonenTreeTest.cs @@ -11,4 +11,48 @@ public class UkkonenTreeTest : SuffixTrieTest { protected override ISuffixTrie CreateTrie() { return new CharUkkonenTrie(0); } + + [Test] + public void RemoveKey() { + var trie = CreateTestTrie(); + + Assert.AreEqual(new [] { 15, 16, 17 }, trie.Retrieve("archi")); + + trie.Remove("architecturesque", 17); + + Assert.AreEqual(new [] { 15, 16 }, trie.Retrieve("archi")); + } + + [Test] + public void RemoveMultipleValues() { + var trie = CreateTestTrie(); + + trie.Remove("architis", 15, 16); + + Assert.AreEqual(new [] { 17 }, trie.Retrieve("archi")); + } + + [Test] + public void RemovePartialMatchedKey_RemoveAllEntries() { + var trie = CreateTestTrie(); + + trie.Remove("archi", 15, 16); + + Assert.AreEqual(new [] { 17 }, trie.Retrieve("archi")); + } + + [Test] + public void RemoveLongerKey_HasNoEffect() { + var trie = CreateTestTrie(); + + trie.Remove("architissssssss", 15, 16); + + Assert.AreEqual(new [] { 15, 16, 17 }, trie.Retrieve("archi")); + } + + static CharUkkonenTrie CreateTestTrie() { + var trie = new CharUkkonenTrie(); + for(var i =0; i < Words20.Length; ++i) trie.Add(Words20[i], i); + return trie; + } } \ No newline at end of file diff --git a/TrieNet/Ukkonen/CharUkkonenTrie.cs b/TrieNet/Ukkonen/CharUkkonenTrie.cs index 29b8910..ac416d8 100644 --- a/TrieNet/Ukkonen/CharUkkonenTrie.cs +++ b/TrieNet/Ukkonen/CharUkkonenTrie.cs @@ -16,11 +16,11 @@ public void Add(string key, TValue value) { } public void Remove(string key, TValue value) { - throw new NotImplementedException(); + RemoveAll(key.AsMemory(), new[]{ value }); } public void Remove(string key, params TValue[] values) { - throw new NotImplementedException(); + RemoveAll(key.AsMemory(), values); } public IEnumerable Retrieve(string query) { diff --git a/TrieNet/Ukkonen/Node.cs b/TrieNet/Ukkonen/Node.cs index 1962fe8..bc0e082 100644 --- a/TrieNet/Ukkonen/Node.cs +++ b/TrieNet/Ukkonen/Node.cs @@ -36,6 +36,15 @@ public IEnumerable> GetData() { return Data.Concat(childData); } + public void RemoveAll(TValue[] values) { + Data.RemoveAll(w => Contains(values, w)); + foreach(var edge in Edges) + edge.Item2.Target.RemoveAll(values); + } + + static bool Contains(TValue[] values, WordPosition w) => + values.Any(v => v is not null && v.Equals(w.Value) || v is null && w.Value is null); + public void AddRef(WordPosition value) { if (Data.Contains(value)) return; diff --git a/TrieNet/Ukkonen/UkkonenTrie.cs b/TrieNet/Ukkonen/UkkonenTrie.cs index c6ef5af..d969ebe 100644 --- a/TrieNet/Ukkonen/UkkonenTrie.cs +++ b/TrieNet/Ukkonen/UkkonenTrie.cs @@ -66,6 +66,16 @@ public void Add(ReadOnlyMemory key, TValue value) { if (null == activeLeaf.Suffix && activeLeaf != Root && activeLeaf != s) activeLeaf.Suffix = s; } + #region Remove node + + protected void RemoveAll(ReadOnlyMemory key, TValue[] values) { + if (key.Length < MinSuffixLength) return; + var tmpNode = SearchNode(key.Span); + tmpNode?.RemoveAll(values); + } + + #endregion + public IEnumerable> RetrieveSubstringsRange(ReadOnlyMemory min, ReadOnlyMemory max) { if (min.Length != max.Length) throw new ArgumentException("Lengths of min and max must be the same."); From 9d6d5c8b48cb53b89e70c50601ae92c504a5a831 Mon Sep 17 00:00:00 2001 From: Ruxo Zheng Date: Wed, 14 Aug 2024 15:59:51 +0700 Subject: [PATCH 3/3] Configure packable project --- DemoApp/DemoApp.csproj | 1 + SampleConsoleApp/SampleConsoleApp.csproj | 1 + TrieNet/TrieNet.csproj | 10 +++++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/DemoApp/DemoApp.csproj b/DemoApp/DemoApp.csproj index 1bae484..0d53959 100644 --- a/DemoApp/DemoApp.csproj +++ b/DemoApp/DemoApp.csproj @@ -6,6 +6,7 @@ enable true enable + false diff --git a/SampleConsoleApp/SampleConsoleApp.csproj b/SampleConsoleApp/SampleConsoleApp.csproj index 95f93fb..f220a2a 100644 --- a/SampleConsoleApp/SampleConsoleApp.csproj +++ b/SampleConsoleApp/SampleConsoleApp.csproj @@ -5,6 +5,7 @@ net6.0 enable enable + false diff --git a/TrieNet/TrieNet.csproj b/TrieNet/TrieNet.csproj index f14b8cd..a06287d 100644 --- a/TrieNet/TrieNet.csproj +++ b/TrieNet/TrieNet.csproj @@ -9,12 +9,9 @@ .NET Implementations of Trie Data Structures for Substring Search, Auto-completion and Intelli-sense. Includes: patricia trie, suffix trie and a trie implementation using Ukkonen's algorithm. This is a modern .NET update for the old TrieNet package. true Copyright OliBomby 2022 - 2.0.0 - 2.0.0 https://github.com/OliBomby/trienet ata-structures dotnet algorithms string search ukkonen trie - 2.0.1 https://github.com/OliBomby/trienet trienet.png TrieNet 2 @@ -32,5 +29,12 @@ + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + +