diff --git a/_xtool/internal/header/trie.go b/_xtool/internal/header/trie.go new file mode 100644 index 00000000..bfdd00d7 --- /dev/null +++ b/_xtool/internal/header/trie.go @@ -0,0 +1,209 @@ +package header + +import ( + "iter" + "os" + "path/filepath" + "slices" + "strings" +) + +type Segmenter func(s string) iter.Seq[string] + +type TrieNode struct { + isLeaf bool // Indicates if this node represents the end of a word + linkCount int // Number of children nodes + children map[string]*TrieNode // Map of child nodes by segment +} + +// Creates a new TrieNode with empty children map +func NewTrieNode() *TrieNode { + return &TrieNode{children: make(map[string]*TrieNode)} +} + +type Trie struct { + root *TrieNode // Root node of the trie + segmenter Segmenter // Function to split strings into segments +} +type Options func(*Trie) // Function type for configuring Trie options + +func skipEmpty(s []string) []string { + for len(s) > 0 && s[0] == "" { + s = s[1:] + } + return s +} + +func splitPathAbsSafe(path string) (paths []string) { + originalPath := filepath.Clean(path) + + sep := string(os.PathSeparator) + + // keep absolute path info + if filepath.IsAbs(originalPath) { + i := strings.Index(originalPath[1:], sep) + if i > 0 { + // bound edge: if i is greater than zero, which means there's second separator + // for example, /usr/, i: 3, with first separator what we just skipped, i: 4 + paths = append(paths, originalPath[0:i+1]) + paths = append(paths, skipEmpty(strings.Split(originalPath[i+1:], sep))...) + } else { + // start with / but no other / is found, like /usr + paths = append(paths, originalPath) + } + } + + if len(paths) == 0 { + paths = skipEmpty(strings.Split(originalPath, sep)) + } + + return +} + +// Returns an option to configure path segmenter +// Splits strings by OS path separator and yields each segment +func WithPathSegmenter() Options { + return func(t *Trie) { + t.segmenter = func(s string) iter.Seq[string] { + return func(yield func(string) bool) { + for _, path := range splitPathAbsSafe(s) { + if path != "" && !yield(path) { + return + } + } + } + } + } +} + +// Returns an option to configure reverse path segmenter +// Splits and reverses strings by OS path separator +func WithReversePathSegmenter() Options { + return func(t *Trie) { + t.segmenter = func(s string) iter.Seq[string] { + return func(yield func(string) bool) { + paths := splitPathAbsSafe(s) + + slices.Reverse(paths) + + for _, path := range paths { + if path != "" && !yield(path) { + return + } + } + } + } + } +} + +// Creates a new Trie with default path segmenter +// Applies all provided options to configure the Trie +func NewTrie(opts ...Options) *Trie { + t := &Trie{root: NewTrieNode()} + + WithPathSegmenter()(t) + + for _, o := range opts { + o(t) + } + + return t +} + +// Inserts a string into the trie +// Creates nodes for each segment in the string +func (t *Trie) Insert(s string) { + if s == "" { + return + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + child = NewTrieNode() + node.children[segment] = child + node.linkCount++ + } + node = child + } + node.isLeaf = true +} + +// Searches for a prefix in the trie +// Returns the node at the end of the prefix or nil if not found +func (t *Trie) searchPrefix(s string) *TrieNode { + if s == "" { + return nil + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + return nil + } + node = child + } + + return node +} + +// Finds the longest common prefix of the given string +// Returns the longest prefix that exists in the trie +// +// Implement Source: https://leetcode.com/problems/longest-common-prefix/solutions/127449/longest-common-prefix +func (t *Trie) LongestPrefix() string { + var prefix []string + + dfs(&prefix, "", t.root, nil) + + return filepath.Join(prefix...) +} + +func dfs(prefix *[]string, currentPrefix string, node, parent *TrieNode) { + if node == nil { + return + } + if parent != nil && (parent.linkCount != 1 || parent.isLeaf) { + return + } + + if currentPrefix != "" { + *prefix = append(*prefix, currentPrefix) + } + + for current, child := range node.children { + dfs(prefix, current, child, node) + } +} + +// IsOnSameBranch checks the given s is the subset of trie tree +func (t *Trie) IsOnSameBranch(s string) bool { + if s == "" { + return false + } + node := t.root + + for segment := range t.segmenter(s) { + child, ok := node.children[segment] + if !ok { + // if the current node is end, but there's something unmatched, we still consider it valid. + // for example, + // input: /c/b/a, tree: /c/b, valid + // input: /c/b/a, tree: /c/b/c, invalid + // input: /c/b, tree: /c/b/c, valid + return node.isLeaf + } + node = child + } + + return node != nil +} + +// Checks if the trie contains the exact string +// Returns true if the string exists in the trie +func (t *Trie) Search(s string) bool { + node := t.searchPrefix(s) + return node != nil && node.isLeaf +} diff --git a/_xtool/internal/header/trie_test.go b/_xtool/internal/header/trie_test.go new file mode 100644 index 00000000..d770ab3b --- /dev/null +++ b/_xtool/internal/header/trie_test.go @@ -0,0 +1,438 @@ +package header_test + +import ( + "testing" + + "github.com/goplus/llcppg/_xtool/internal/header" +) + +func TestTrieSubset(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "empty string", + search: "abc", + want: false, + }, + { + name: "input empty string", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "one string", + search: "/a", + inserted: []string{"/a"}, + want: true, + }, + { + name: "two string", + search: "/a", + inserted: []string{"/a", "/b"}, + want: true, + }, + { + name: "multiple string case 1", + search: "/c", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 2", + search: "", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 3", + search: "/c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: true, + }, + + { + name: "multiple string case 4", + search: "/c/d", + inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, + want: true, + }, + { + name: "substring string case 1", + search: "/a/b", + inserted: []string{"/a"}, + want: true, + }, + { + name: "substring string case 2", + search: "/a", + inserted: []string{"/a/b"}, + want: true, + }, + + { + name: "substring string case 3", + search: "/a/b", + inserted: []string{"/a/b", "/a/b/c"}, + want: true, + }, + + { + name: "substring string case 4", + search: "/c/b", + inserted: []string{"/a/b", "/c/b/a"}, + want: true, + }, + { + name: "substring string case 5", + search: "/c/a", + inserted: []string{"/a/b", "/c/b/a"}, + want: false, + }, + { + name: "substring string case 6", + search: "/c/b/c", + inserted: []string{"/a/b", "/c/b/a"}, + want: false, + }, + { + name: "substring string case 7", + search: "/c/b", + inserted: []string{"/a/b", "/c/b/c/a"}, + want: true, + }, + { + name: "absolute path case 1", + search: "a", + inserted: []string{"/a/b"}, + want: false, + }, + { + name: "absolute path case 2", + search: "/a", + inserted: []string{"a/b", "a/b/c"}, + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := header.NewTrie() + + for _, i := range tc.inserted { + trie.Insert(i) + } + if got := trie.IsOnSameBranch(tc.search); got != tc.want { + t.Fatalf("unexpected result: want %v got %v", tc.want, got) + } + }) + } +} + +func TestTrieSearch(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "Empty string insertion and search", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "Single directory exact match", + search: "/usr/local/bin/", + inserted: []string{"/usr/local/bin/"}, + want: true, + }, + { + name: "Single directory partial match", + search: "/usr/local/bin/python", + inserted: []string{"/usr/local/bin/"}, + want: false, + }, + { + name: "Multiple directories exact match", + search: "/usr/local/lib/", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, + want: true, + }, + { + name: "Multiple directories partial match", + search: "/usr/local/lib/python", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/"}, + want: false, + }, + { + name: "Non-existent path", + search: "/non/existent/path", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/"}, + want: false, + }, + { + name: "Empty search string", + search: "", + inserted: []string{"/usr/local/bin/"}, + want: false, + }, + { + name: "Subdirectory search", + search: "/usr/local/bin/", + inserted: []string{"/usr/local/bin/"}, + want: true, + }, + { + name: "Deep directory structure", + search: "/a/b/c/d/e/f/g", + inserted: []string{"/a/b/c/d/e/f/g"}, + want: true, + }, + { + name: "Long path with special characters", + search: "/home/user/!@#$%^&*()", + inserted: []string{"/home/user/!@#$%^&*()"}, + want: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := header.NewTrie() + for _, word := range tc.inserted { + trie.Insert(word) + } + if got := trie.Search(tc.search); got != tc.want { + t.Fatalf("Search(%q) = %v, want %v", tc.search, got, tc.want) + } + }) + } +} + +func TestTrieLongestPrefix(t *testing.T) { + tests := []struct { + name string + inserted []string + want string + }{ + { + name: "Empty trie", + inserted: []string{}, + want: "", + }, + { + name: "Single directory exact match", + inserted: []string{"/usr/local/bin/"}, + want: "/usr/local/bin", + }, + { + name: "Single directory partial match", + inserted: []string{"/usr/local/bin/", "/usr/local/bin/python"}, + want: "/usr/local/bin", + }, + { + name: "Multiple directories with common prefix", + inserted: []string{"/usr/local/bin/", "/usr/local/lib/", "/usr/include/", "/usr/local/bin/python"}, + want: "/usr", + }, + { + name: "No common prefix", + inserted: []string{"/home/user/", "/var/log/", "/tmp/", "/etc/passwd"}, + want: "", + }, + { + name: "Reverse path match", + inserted: []string{"bin", "lib", "include", "include/lib/bin"}, + want: "", + }, + { + name: "Longer input than stored", + inserted: []string{"/short/", "/shorter/path"}, + want: "", + }, + { + name: "No match", + inserted: []string{"/apple/", "/banana/", "/cherry/"}, + want: "", + }, + { + name: "Partial reverse match", + inserted: []string{"bin", "lib", "include", "lib/bin"}, + want: "", + }, + { + name: "normal case 1", + inserted: []string{ + "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON.h", + "/opt/homebrew/Cellar/cjson/1.7.18/include/zlib/zlib.h", + "/opt/homebrew/Cellar/cjson/1.7.18/include/cJSON/cJSON.h", + }, + want: "/opt/homebrew/Cellar/cjson/1.7.18/include", + }, + { + name: "absolute path case 1", + inserted: []string{"/usr", "usr", "/usr/include", "/usr"}, + want: "", + }, + { + name: "absolute path case 2", + inserted: []string{"usr/share", "/usr", "usr/include", "usr/include/share"}, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + trie := header.NewTrie() + for _, word := range tt.inserted { + trie.Insert(word) + } + result := trie.LongestPrefix() + if result != tt.want { + t.Errorf("LongestPrefix(%q) = %q, want %q", tt.inserted, result, tt.want) + } + }) + } +} + +func TestTrieReverse(t *testing.T) { + testCases := []struct { + name string + search string + inserted []string + want bool + }{ + { + name: "empty string", + search: "abc", + want: false, + }, + { + name: "input empty string", + search: "", + inserted: []string{""}, + want: false, + }, + { + name: "one string", + search: "/a", + inserted: []string{"/a"}, + want: true, + }, + { + name: "two string", + search: "/a", + inserted: []string{"/a", "/b"}, + want: true, + }, + { + name: "multiple string case 1", + search: "/c", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 2", + search: "", + inserted: []string{"/a", "/b", "/d"}, + want: false, + }, + + { + name: "multiple string case 3", + search: "/c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: false, + }, + + { + name: "multiple string case 4", + search: "c/d", + inserted: []string{"/a/c/d", "/b/c/d", "/c/d/a"}, + want: true, + }, + + { + name: "multiple string case 5", + search: "c", + inserted: []string{"/a/c", "/b/c", "/c/d"}, + want: true, + }, + { + name: "substring string case 1", + search: "/a/b", + inserted: []string{"/a"}, + want: false, + }, + { + name: "substring string case 2", + search: "b", + inserted: []string{"/a/b"}, + want: true, + }, + + { + name: "substring string case 3", + search: "/a/b", + inserted: []string{"/a/b", "/a/b/c"}, + want: true, + }, + + { + name: "normal case 1", + search: "libxslt/variables.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: true, + }, + + { + name: "normal case 2", + search: "libxslt/c14n.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: false, + }, + + { + name: "normal case 3", + search: "libxslt/imports.h", + inserted: []string{ + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/zlib/imports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxml2/imports.h", + + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/xsltexports.h", + "/Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include/libxslt/variables.h", + }, + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + trie := header.NewTrie(header.WithReversePathSegmenter()) + + for _, i := range tc.inserted { + trie.Insert(i) + } + if got := trie.IsOnSameBranch(tc.search); got != tc.want { + t.Fatalf("unexpected result: want %v got %v", tc.want, got) + } + }) + } +}