Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Remove method for Trie and UkkonenTrie #2

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DemoApp/DemoApp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<Nullable>enable</Nullable>
<UseWindowsForms>true</UseWindowsForms>
<ImplicitUsings>enable</ImplicitUsings>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
Expand Down
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ This small library contains a bunch of trie data structures all having the same
public interface ITrie {
IEnumerable Retrieve(string query);
void Add(string key, TValue value);

// Note, only Trie and UkkonenTrie are only supported.
void Remove(string key, TValue value);
void Remove(string key, params TValue[] values);
}
```

Expand All @@ -61,6 +65,12 @@ public interface IGenericTrie<TKey, TValue> where TKey : IEquatable<TKey> {

At the moment only `UkkonenTrie` implements this interface.

## Removal

The current implementation allows for item removal in two specific tree types: `Trie` and `UkkonenTree`.

Please note that removing an item does **not** optimize or alter the internal tree structure. This limitation exists due to the complexities involved in reducing a trie tree without adversely affecting its other functionalities.

## Performance

All diagrams are given in logarithmic scale on the x-axis and y-axis.
Expand All @@ -74,5 +84,4 @@ All diagrams are given in logarithmic scale on the x-axis and y-axis.
The app demonstrates indexing of large text files and look-up inside them. Indexing usually takes only a few seconds and the look-up delay will be unnoticeable for
the user.

![](https://raw.githubusercontent.com/OliBomby/trienet/master/img/trie-demo-app.png)

![](https://raw.githubusercontent.com/OliBomby/trienet/master/img/trie-demo-app.png)
1 change: 1 addition & 0 deletions SampleConsoleApp/SampleConsoleApp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
Expand Down
11 changes: 8 additions & 3 deletions TrieNet.Test/BaseTrieTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,20 @@ namespace TrieNet.Test;
public abstract class BaseTrieTest {
[OneTimeSetUp]
public virtual void Setup() {
Trie = CreateTrie();
for (var i = 0; i < Words40.Length; i++) Trie.Add(Words40[i], i);
Trie = CreateDefaultTrie();
}

public ITrie<int> CreateDefaultTrie() {
var trie = CreateTrie();
for (var i = 0; i < Words40.Length; i++) trie.Add(Words40[i], i);
return trie;
}

protected ITrie<int> Trie { get; private set; }

protected abstract ITrie<int> CreateTrie();

public readonly string[] Words40 = {
public static readonly string[] Words40 = {
"daubreelite",
"daubingly",
"daubingly",
Expand Down
8 changes: 8 additions & 0 deletions TrieNet.Test/Performance/FakeTrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,12 @@ public void Add(string key, T value) {
var keyValPair = new KeyValuePair<string, T>(key, value);
stack.Push(keyValPair);
}

public void Remove(string key, T value) {
throw new System.NotImplementedException();
}

public void Remove(string key, params T[] values) {
throw new System.NotImplementedException();
}
}
4 changes: 2 additions & 2 deletions TrieNet.Test/SuffixTrieTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
namespace TrieNet.Test;

public class SuffixTrieTest {
public readonly string[] Words20 = {
public static readonly string[] Words20 = {
"overcontribution",
"overcontribute",
"overcontraction",
Expand Down Expand Up @@ -45,7 +45,7 @@ public virtual void Setup() {
Trie2.Add("aabacdefac", 0);
Trie2.Add("aabacdefac", 1);
}

[TestCase("a", new[] { 0, 1, 3, 8 })]
[TestCase("b", new[] { 2 })]
[TestCase("c", new[] { 4, 9 })]
Expand Down
44 changes: 44 additions & 0 deletions TrieNet.Test/TrieTest.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// This code is distributed under MIT license. Copyright (c) 2013 George Mamaladze
// See license.txt or http://opensource.org/licenses/mit-license.php

using System;
using System.Linq;
using NUnit.Framework;
using TrieNet.Trie;
Expand All @@ -23,4 +24,47 @@ public void ExhaustiveParallelAddFails() {
.ForAll(phrase => trie.Add(phrase, phrase.GetHashCode()));
}
}

[Test]
public void RemoveKey() {
var trie = CreateDefaultTrie();

Assert.AreEqual(new [] { 21, 22, 23}, trie.Retrieve("capo"));
Assert.AreEqual(new [] { 22, 23}, trie.Retrieve("capoc"));

trie.Remove("capoc", 22);

Assert.AreEqual(new [] { 21, 23}, trie.Retrieve("capo"));
Assert.AreEqual(new [] { 23}, trie.Retrieve("capoc"));
}

[Test]
public void RemoveMultipleKeys() {
var trie = CreateDefaultTrie();

trie.Remove("capoc", 22, 23);

Assert.AreEqual(new [] { 21}, trie.Retrieve("capo"));
Assert.AreEqual(Enumerable.Empty<int>(), trie.Retrieve("capoc"));
}

[Test]
public void RemovePartialMatchedKey_RemoveAllEntries() {
var trie = CreateDefaultTrie();

trie.Remove("capo", 22, 23);

Assert.AreEqual(new [] { 21 }, trie.Retrieve("capo"));
Assert.AreEqual(Array.Empty<int>(), trie.Retrieve("capoc"));
}

[Test]
public void RemoveLongerKey_HasNoEffect() {
var trie = CreateDefaultTrie();

trie.Remove("capocissss", 22);

Assert.AreEqual(new [] { 21, 22, 23}, trie.Retrieve("capo"));
Assert.AreEqual(new [] { 22, 23}, trie.Retrieve("capoc"));
}
}
44 changes: 44 additions & 0 deletions TrieNet.Test/UkkonenTreeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,48 @@ public class UkkonenTreeTest : SuffixTrieTest {
protected override ISuffixTrie<int> CreateTrie() {
return new CharUkkonenTrie<int>(0);
}

[Test]
public void RemoveKey() {
var trie = CreateTestTrie();

Assert.AreEqual(new [] { 15, 16, 17 }, trie.Retrieve("archi"));

trie.Remove("architecturesque", 17);

Assert.AreEqual(new [] { 15, 16 }, trie.Retrieve("archi"));
}

[Test]
public void RemoveMultipleValues() {
var trie = CreateTestTrie();

trie.Remove("architis", 15, 16);

Assert.AreEqual(new [] { 17 }, trie.Retrieve("archi"));
}

[Test]
public void RemovePartialMatchedKey_RemoveAllEntries() {
var trie = CreateTestTrie();

trie.Remove("archi", 15, 16);

Assert.AreEqual(new [] { 17 }, trie.Retrieve("archi"));
}

[Test]
public void RemoveLongerKey_HasNoEffect() {
var trie = CreateTestTrie();

trie.Remove("architissssssss", 15, 16);

Assert.AreEqual(new [] { 15, 16, 17 }, trie.Retrieve("archi"));
}

static CharUkkonenTrie<int> CreateTestTrie() {
var trie = new CharUkkonenTrie<int>();
for(var i =0; i < Words20.Length; ++i) trie.Add(Words20[i], i);
return trie;
}
}
2 changes: 2 additions & 0 deletions TrieNet/ITrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ namespace TrieNet;
public interface ITrie<TValue> {
IEnumerable<TValue> Retrieve(string query);
void Add(string key, TValue value);
void Remove(string key, TValue value);
void Remove(string key, params TValue[] values);
}
8 changes: 8 additions & 0 deletions TrieNet/PatriciaTrie/PatriciaSuffixTrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ public void Add(string key, TValue value) {
innerTrie.Add(currentSuffix, new WordPosition<TValue>(position, value));
}

public void Remove(string key, TValue value) {
throw new NotSupportedException();
}

public void Remove(string key, params TValue[] values) {
throw new NotImplementedException();
}

private static IEnumerable<Tuple<StringPartition, int>> GetAllSuffixes(int minSuffixLength, string word) {
for (var i = word.Length - minSuffixLength; i >= 0; i--)
yield return new Tuple<StringPartition, int>(new StringPartition(word, i), i);
Expand Down
8 changes: 8 additions & 0 deletions TrieNet/PatriciaTrie/PatriciaTrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ public virtual void Add(string key, TValue value) {
Add(new StringPartition(key), value);
}

public void Remove(string key, TValue value) {
throw new NotSupportedException();
}

public void Remove(string key, params TValue[] values) {
throw new NotImplementedException();
}

internal override void Add(StringPartition keyRest, TValue value) {
GetOrCreateChild(keyRest, value);
}
Expand Down
8 changes: 8 additions & 0 deletions TrieNet/Trie/ConcurrentTrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,12 @@ public IEnumerable<TValue> Retrieve(string query) {
public void Add(string key, TValue value) {
Add(key, 0, value);
}

public void Remove(string key, TValue value) {
throw new NotSupportedException();
}

public void Remove(string key, params TValue[] values) {
throw new NotImplementedException();
}
}
8 changes: 8 additions & 0 deletions TrieNet/Trie/SuffixTrie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ public void Add(string key, T value) {
innerTrie.Add(suffix, new WordPosition<T>(position, value));
}

public void Remove(string key, T value) {
throw new NotImplementedException();
}

public void Remove(string key, params T[] values) {
throw new NotImplementedException();
}

private static IEnumerable<Tuple<string, int>> GetAllSuffixes(int minSuffixLength, string word) {
for (var i = word.Length - minSuffixLength; i >= 0; i--) {
var partition = new StringPartition(word, i);
Expand Down
8 changes: 8 additions & 0 deletions TrieNet/Trie/Trie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,12 @@ public IEnumerable<TValue> Retrieve(string query) {
public void Add(string key, TValue value) {
Add(key, 0, value);
}

public void Remove(string key, TValue value) {
RemoveFromKey(key, new [] { value });
}

public void Remove(string key, params TValue[] values) {
RemoveFromKey(key, values);
}
}
14 changes: 10 additions & 4 deletions TrieNet/Trie/TrieNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;

namespace TrieNet.Trie;

[Serializable]
public class TrieNode<TValue> : TrieNodeBase<TValue> {
private readonly Dictionary<char, TrieNode<TValue>> children;
private readonly Queue<TValue> values;
private readonly List<TValue> values = new();

protected TrieNode() {
children = new Dictionary<char, TrieNode<TValue>>();
values = new Queue<TValue>();
}

protected override int KeyLength => 1;
Expand All @@ -35,7 +36,8 @@ protected override TrieNodeBase<TValue> GetOrCreateChild(char key) {
return result;
}

protected override TrieNodeBase<TValue> GetChildOrNull(string query, int position) {
[return: MaybeNull]
protected override TrieNodeBase<TValue> GetChildOrNull([NotNull] string query, int position) {
if (query == null) throw new ArgumentNullException(nameof(query));
return
children.TryGetValue(query[position], out var childNode)
Expand All @@ -44,6 +46,10 @@ protected override TrieNodeBase<TValue> GetChildOrNull(string query, int positio
}

protected override void AddValue(TValue value) {
values.Enqueue(value);
values.Add(value);
}

protected override void RemoveAll(TValue[] nodeValues) {
values.RemoveAll(v => nodeValues.Any(nv => v is not null && v.Equals(nv) || (v is null && nv is null)));
}
}
14 changes: 14 additions & 0 deletions TrieNet/Trie/TrieNodeBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;

namespace TrieNet.Trie;

Expand Down Expand Up @@ -32,6 +33,18 @@ public void Add(string key, int position, TValue value) {

protected abstract void AddValue(TValue value);

protected virtual void RemoveAll(TValue[] values) {
throw new NotSupportedException();
}

protected void RemoveFromKey(string key, TValue[] values) {
if (key == null) throw new ArgumentNullException(nameof(key));
var child = Enumerable.Range(0, key.Length).Aggregate(this, (node, position) => node?.GetChildOrNull(key, position));
if (child is not null)
foreach(var node in child.Subtree())
node.RemoveAll(values);
}

protected abstract TrieNodeBase<TValue> GetOrCreateChild(char key);

protected virtual IEnumerable<TValue> Retrieve(string query, int position) {
Expand All @@ -50,6 +63,7 @@ protected virtual IEnumerable<TValue> SearchDeep(string query, int position) {

protected abstract TrieNodeBase<TValue> GetChildOrNull(string query, int position);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool EndOfString(int position, string text) {
return position >= text.Length;
}
Expand Down
10 changes: 7 additions & 3 deletions TrieNet/TrieNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@
<Description>.NET Implementations of Trie Data Structures for Substring Search, Auto-completion and Intelli-sense. Includes: patricia trie, suffix trie and a trie implementation using Ukkonen's algorithm. This is a modern .NET update for the old TrieNet package.</Description>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Copyright>Copyright OliBomby 2022</Copyright>
<AssemblyVersion>2.0.0</AssemblyVersion>
<FileVersion>2.0.0</FileVersion>
<PackageLicenseUrl></PackageLicenseUrl>
<PackageProjectUrl>https://github.com/OliBomby/trienet</PackageProjectUrl>
<PackageTags>ata-structures dotnet algorithms string search ukkonen trie</PackageTags>
<Version>2.0.1</Version>
<RepositoryUrl>https://github.com/OliBomby/trienet</RepositoryUrl>
<PackageIcon>trienet.png</PackageIcon>
<Title>TrieNet 2</Title>
Expand All @@ -32,5 +29,12 @@
<ItemGroup>
<None Include="..\license.txt" Pack="true" PackagePath="" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="MinVer" Version="5.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

</Project>
Loading