diff --git a/README.md b/README.md index 4ddc212..55c112e 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Try to keep the interface and variable name consistent with the original book wh * [BreadthFirstPaths](algs4/breadth_first_paths.py) * [CC](algs4/cc.py) * [Cycle](algs4/cycle.py) + * [SymbolGraph](algs4/symbol_graph.py) * 5 STRING diff --git a/algs4/cc.py b/algs4/cc.py new file mode 100644 index 0000000..84c9d83 --- /dev/null +++ b/algs4/cc.py @@ -0,0 +1,78 @@ +""" + Execution: python cc.py filename.txt + Data files: https://algs4.cs.princeton.edu/41graph/tinyG.txt + https://algs4.cs.princeton.edu/41graph/mediumG.txt + https://algs4.cs.princeton.edu/41graph/largeG.txt + + Compute connected components using depth first search. + Runs in O(E + V) time. + + % python cc.py tinyG.txt + 3 components + 0 1 2 3 4 5 6 + 7 8 + 9 10 11 12 + + % pytyon cc.py mediumG.txt + 1 components + 0 1 2 3 4 5 6 7 8 9 10 ... + + % python cc.py largeG.txt + 1 components + 0 1 2 3 4 5 6 7 8 9 10 ... + + Note: This implementation uses a recursive DFS. To avoid needing + a potentially very large stack size, replace with a non-recurisve + DFS ala NonrecursiveDFS.java. + +""" + +from algs4.bag import Bag +from algs4.graph import Graph + + +class CC: + + def __init__(self, G): + self.marked = [False for _ in range(G.V)] + self.id = [0 for _ in range(G.V)] + self.count = 0 + + for s in range(G.V): + if not self.marked[s]: + self.dfs(G, s) + self.count += 1 + + def dfs(self, G, v): + self.marked[v] = True + self.id[v] = self.count + for w in G.adj[v]: + if not self.marked[w]: + self.dfs(G, w) + + def connected(self, v, w): + return self.id[v] == self.id[w] + +if __name__ == "__main__": + import sys + f = open(sys.argv[1]) + s = int(sys.argv[2]) + V = int(f.readline()) + E = int(f.readline()) + g = Graph(V) + for i in range(E): + v, w = f.readline().split() + g.add_edge(v, w) + cc = CC(g) + print(cc.count, " components") + components = [] + for i in range(cc.count): + components.append(Bag()) + + for v in range(g.V): + components[cc.id[v]].add(v) + + for i in range(cc.count): + for v in components[i]: + print(v, " ", end='') + print() diff --git a/algs4/cycle.py b/algs4/cycle.py new file mode 100644 index 0000000..923e035 --- /dev/null +++ b/algs4/cycle.py @@ -0,0 +1,55 @@ +""" + Compilation: javac Cycle.java + Execution: java Cycle filename.txt + Dependencies: Graph.java Stack.java In.java StdOut.java + Data files: https: // algs4.cs.princeton.edu / 41graph / tinyG.txt + https: // algs4.cs.princeton.edu / 41graph / mediumG.txt + https: // algs4.cs.princeton.edu / 41graph / largeG.txt + + Identifies a cycle. + Runs in O(E + V) time. + + % java Cycle tinyG.txt + 3 4 5 3 + + % java Cycle mediumG.txt + 15 0 225 15 + + % java Cycle largeG.txt + 996673 762 840164 4619 785187 194717 996673 + """ + +from algs4.graph import Graph + + +class Cycle: + + def __init__(self, G): + self.marked = [False for _ in range(G.V)] + self.has_cycle = False + for s in range(G.V): + if not self.marked[s]: + self.dfs(G, s, s) + + def dfs(self, G, v, u): + self.marked[v] = True + for w in G.adj[v]: + if not self.marked[w]: + self.dfs(G, w, v) + elif w != u: + self.has_cycle = True + +if __name__ == "__main__": + import sys + f = open(sys.argv[1]) + V = int(f.readline()) + E = int(f.readline()) + g = Graph(V) + for i in range(E): + v, w = f.readline().split() + g.add_edge(v, w) + cycle = Cycle(g) + if cycle.has_cycle: + print("Graph is cyclic") + else: + print("Graph is acyclic") diff --git a/algs4/st.py b/algs4/st.py index ae8ae84..be9035a 100644 --- a/algs4/st.py +++ b/algs4/st.py @@ -1,30 +1,52 @@ """ - Execution: python st < input.txt + Execution: python st.py < input.txt Data files: https://algs4.cs.princeton.edu/35applications/tinyST.txt + Sorted symbol table implementation using a python collections.OrderedDict Does not allow duplicates. """ +from collections import OrderedDict + class ST: + def __init__(self): + self.st = OrderedDict() + def put(self, key, value): - pass + self.st[key] = value def get(self, key): - pass + if key is None: + raise ValueError("calls get() with null key") + + return self.st.get(key) def delete(self, key): - pass + if key is None: + raise ValueError("calls get() with null key") + del self.st[key] def contains(self, key): - pass + return key in self.st def is_empty(self): - pass + self.size() == 0 def size(self): - pass + return len(self.st.keys()) def keys(self): - pass + return self.st.keys() + +if __name__ == "__main__": + import sys + st = ST() + i = 0 + for line in sys.stdin: + st.put(line, i) + i += 1 + + for key in st.keys(): + print("%s : %s " % (key, st.get(key))) diff --git a/algs4/symbol_graph.py b/algs4/symbol_graph.py new file mode 100644 index 0000000..7500b74 --- /dev/null +++ b/algs4/symbol_graph.py @@ -0,0 +1,93 @@ +""" + Execution: python symbol_graph.py filename.txt delimiter + Data files: https://algs4.cs.princeton.edu/41graph/routes.txt + https://algs4.cs.princeton.edu/41graph/movies.txt + https://algs4.cs.princeton.edu/41graph/moviestiny.txt + https://algs4.cs.princeton.edu/41graph/moviesG.txt + https://algs4.cs.princeton.edu/41graph/moviestopGrossing.txt + + % python symbol_graph.py routes.txt " " + JFK + MCO + ATL + ORD + LAX + PHX + LAS + + % python symbol_graph.py movies.txt "/" + Tin Men (1987) + Hershey, Barbara + Geppi, Cindy + Jones, Kathy (II) + Herr, Marcia + ... + Blumenfeld, Alan + DeBoy, David + Bacon, Kevin + Woodsman, The (2004) + Wild Things (1998) + Where the Truth Lies (2005) + Tremors (1990) + ... + Apollo 13 (1995) + Animal House (1978) + + + Assumes that input file is encoded using UTF-8. + % iconv -f ISO-8859-1 -t UTF-8 movies-iso8859.txt > movies.txt + + """ + +from algs4.st import ST +from algs4.graph import Graph + + +class SymbolGraph: + + def __init__(self, stream, sp): + self.st = ST() + + for line in open(stream): + a = line.strip().split(sp) + for i in range(len(a)): + if not self.st.contains(a[i]): + self.st.put(a[i], self.st.size()) + + self.keys = ["" for _ in range(self.st.size())] + for key in self.st.keys(): + self.keys[self.st.get(key)] = key + + self.G = Graph(self.st.size()) + for line in open(stream): + a = line.strip().split(sp) + v = self.st.get(a[0]) + for i in range(1, len(a)): + self.G.add_edge(v, self.st.get(a[i])) + + def contains(self, s): + return self.st.contains(s) + + def index(self, s): + return self.st.get(s) + + def name(self, v): + return self.keys[v] + + def graph(self): + return self.G + +if __name__ == "__main__": + import sys + filename, delimiter = sys.argv[1], sys.argv[2] + sg = SymbolGraph(filename, delimiter) + graph = sg.graph() + + for line in sys.stdin: + source = line.strip() + if sg.contains(source): + s = sg.index(source) + for v in graph.adj[s]: + print(" ", sg.name(v), end='') + else: + print("input not contains source: ", source)