-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1133abe
Showing
8 changed files
with
854 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# MCL Clustering | ||
|
||
Python implementation of Markov Clustering technique. | ||
This implementation si not yet optimized for large networks. | ||
|
||
## Installation: | ||
|
||
python setup.py install | ||
|
||
##Usage: | ||
|
||
###Command line: | ||
|
||
Usage: ./mcl_clustering.py [options] <input_file> <output_file> | ||
|
||
|
||
Options: | ||
-h, --help show this help message and exit | ||
-e EXPAND_FACTOR, --expand_factor=EXPAND_FACTOR | ||
expand factor (default: 2) | ||
-i INFLATE_FACTOR, --inflate_factor=INFLATE_FACTOR | ||
inflate factor (default: 2) | ||
-m MULT_FACTOR, --mult_factor=MULT_FACTOR | ||
multiply factor (default: 1) | ||
-l MAX_LOOP, --max_loops=MAX_LOOP | ||
max loops (default: 60) | ||
|
||
|
||
|
||
###Code: | ||
|
||
numpy adjacency matrix | ||
|
||
from mcl_clustering import mcl | ||
|
||
A = <your matrix> | ||
|
||
M, clusters = mcl(A, expand_factor = options.expand_factor, | ||
inflate_factor = options.inflate_factor, | ||
max_loop = options.max_loop, | ||
mult_factor = options.mult_factor) | ||
|
||
networkx graph | ||
|
||
from mcl_clustering import networkx_mcl | ||
|
||
G = <your graph> | ||
|
||
M, clusters = networkx_mcl(G, expand_factor = options.expand_factor, | ||
inflate_factor = options.inflate_factor, | ||
max_loop = options.max_loop, | ||
mult_factor = options.mult_factor) | ||
Output: | ||
M = otuput matrix | ||
clusters = dict with keys = [<cluster id>] values = [<vertex id>] | ||
|
||
##Requirements | ||
|
||
numpy | ||
networkx | ||
|
||
|
||
##Example: | ||
|
||
|
||
|
||
##Parameters: | ||
|
||
-i --inflate-factor | ||
-e --expand-factor | ||
-m --multiply-factor | ||
-l --max-loops | ||
-s --show-graph show graph with networkx | ||
|
||
|
||
|
||
## References | ||
|
||
Stijn van Dongen, Graph Clustering by Flow Simulation. | ||
PhD thesis, University of Utrecht, May 2000. | ||
( http://www.library.uu.nl/digiarchief/dip/diss/1895620/inhoud.htm ) | ||
|
||
Stijn van Dongen. A cluster algorithm for graphs. Technical Report | ||
INS-R0010, National Research Institute for Mathematics and Computer | ||
Science in the Netherlands, Amsterdam, May 2000. | ||
( http://www.cwi.nl/ftp/CWIreports/INS/INS-R0010.ps.Z ) | ||
|
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import numpy as np | ||
import time | ||
from optparse import OptionParser | ||
import logging | ||
|
||
def normalize(A): | ||
column_sums = A.sum(axis=0) | ||
new_matrix = A / column_sums[np.newaxis, :] | ||
return new_matrix | ||
|
||
def inflate(A, inflate_factor): | ||
return normalize(np.power(A, inflate_factor)) | ||
|
||
def expand(A, expand_factor): | ||
return np.linalg.matrix_power(A, expand_factor) | ||
|
||
def add_diag(A, mult_factor): | ||
return A + mult_factor * np.identity(A.shape[0]) | ||
|
||
def get_clusters(A): | ||
clusters = [] | ||
for i, r in enumerate((A>0).tolist()): | ||
if r[i]: | ||
clusters.append(A[i,:]>0) | ||
|
||
clust_map ={} | ||
for cn , c in enumerate(clusters): | ||
for x in [ i for i, x in enumerate(c) if x ]: | ||
clust_map[cn] = clust_map.get(cn, []) + [x] | ||
return clust_map | ||
|
||
def draw(G, A, cluster_map): | ||
import networkx as nx | ||
import matplotlib.pyplot as plt | ||
|
||
clust_map = {} | ||
for k, vals in cluster_map.items(): | ||
for v in vals: | ||
clust_map[v] = k | ||
|
||
colors = [] | ||
for i in range(len(G.nodes())): | ||
colors.append( clust_map.get(i, 100 )) | ||
|
||
pos = nx.spring_layout(G) | ||
nx.draw_networkx_nodes(G, pos,node_size = 200, node_color =colors , cmap=plt.cm.Blues ) | ||
nx.draw_networkx_edges(G,pos, alpha=0.5) | ||
|
||
from matplotlib.pylab import matshow, show, cm | ||
matshow(A, fignum=100, cmap=cm.gray) | ||
show() | ||
|
||
plt.show() | ||
|
||
def stop(M, i): | ||
|
||
if i%5==4: | ||
m = np.max( M**2 - M) - np.min( M**2 - M) | ||
if m==0: | ||
logging.info("Stop at iteration %s" % i) | ||
return True | ||
|
||
return False | ||
|
||
|
||
def mcl(M, expand_factor = 2, inflate_factor = 2, max_loop = 10 , mult_factor = 1): | ||
M = add_diag(M, mult_factor) | ||
M = normalize(M) | ||
|
||
|
||
for i in range(max_loop): | ||
logging.info("loop", i) | ||
M = inflate(M, inflate_factor) | ||
M = expand(M, expand_factor) | ||
if stop(M, i): break | ||
|
||
clusters = get_clusters(M) | ||
return M, clusters | ||
|
||
def networkx_mcl(G, expand_factor = 2, inflate_factor = 2, max_loop = 10 , mult_factor = 1): | ||
import networkx as nx | ||
A = nx.adjacency_matrix(G) | ||
return mcl(np.array(A.todense()), expand_factor, inflate_factor, max_loop, mult_factor) | ||
|
||
def print_info(options): | ||
print "-"*60 | ||
print "MARKOV CLUSTERING:" | ||
print "-" * 60 | ||
print " expand_factor: %s" % options.expand_factor | ||
print " inflate_factor: %s" % options.inflate_factor | ||
print " mult factor: %s" % options.mult_factor | ||
print " max loops: %s\n" % options.max_loop | ||
|
||
def get_options(): | ||
usage = "usage: %prog [options] <input_matrix>" | ||
parser = OptionParser(usage) | ||
parser.add_option("-e", "--expand_factor", | ||
dest="expand_factor", | ||
default=2, | ||
type=int, | ||
help="expand factor (default: %default)") | ||
parser.add_option("-i", "--inflate_factor", | ||
dest="inflate_factor", | ||
default=2, | ||
type=float, | ||
help="inflate factor (default: %default)") | ||
parser.add_option("-m", "--mult_factor", | ||
dest="mult_factor", | ||
default=2, | ||
type=float, | ||
help="multiply factor (default: %default)") | ||
parser.add_option("-l", "--max_loops", | ||
dest="max_loop", | ||
default=60, | ||
type=int, | ||
help="max loops (default: %default)") | ||
parser.add_option("-o", "--output", metavar="FILE", | ||
help="output (default: stdout)") | ||
|
||
parser.add_option("-v", "--verbose", | ||
action="store_true", dest="verbose", default=True, | ||
help="verbose (default: %default)") | ||
parser.add_option("-d", "--draw-graph", | ||
action="store_true", dest="draw", default=False, | ||
help="show graph with networkx (default: %default)") | ||
|
||
|
||
(options, args) = parser.parse_args() | ||
|
||
try: | ||
filename = args[0] | ||
except: | ||
raise Exception('input', 'missing input filename') | ||
|
||
|
||
return options, filename | ||
|
||
def get_graph(csv_filename): | ||
import networkx as nx | ||
|
||
M = [] | ||
for r in open(csv_filename): | ||
r = r.strip().split(",") | ||
M.append( map( lambda x: float(x.strip()), r)) | ||
|
||
G = nx.from_numpy_matrix(np.matrix(M)) | ||
return np.array(M), G | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
options, filename = get_options() | ||
print_info(options) | ||
M, G = get_graph(filename) | ||
|
||
print " number of nodes: %s\n" % M.shape[0] | ||
|
||
print time.time(), "evaluating clusters..." | ||
M, clusters = networkx_mcl(G, expand_factor = options.expand_factor, | ||
inflate_factor = options.inflate_factor, | ||
max_loop = options.max_loop, | ||
mult_factor = options.mult_factor) | ||
print time.time(), "done\n" | ||
|
||
if not options.output: | ||
print "Clusters:" | ||
for k, v in clusters.items(): | ||
print k, v | ||
|
||
if options.draw: | ||
print time.time(), "drawing..." | ||
draw(G, M, clusters) | ||
print time.time(), "done" |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import unittest | ||
import numpy as np | ||
from mcl_clustering import * | ||
import logging | ||
|
||
#TODO: improveme | ||
class TestMcl(unittest.TestCase): | ||
|
||
def setUp(self): | ||
pass | ||
|
||
def test_normalize(self): | ||
A = np.ones((4, 4)) | ||
A[2,0] = 2 | ||
A_n = normalize(A) | ||
self.assertEqual(0.4, A_n[2, 0]) | ||
|
||
def test_inflate(self): | ||
A = np.ones((4, 4)) | ||
A[3,0] = 2 | ||
A[1,0] = 3 | ||
A_i = inflate(A, 2) | ||
self.assertTrue( A[3, 0] > A_i[3, 0]) | ||
|
||
def test_expand(self): | ||
A = np.ones((4, 4)) | ||
A[3,0] = 2 | ||
A[2,0] = 3 | ||
A = normalize(A) | ||
A_i = expand(A, 2) | ||
self.assertTrue(A[2, 0] > A_i[2, 0]) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env python | ||
|
||
from distutils.core import setup | ||
|
||
setup( | ||
name='MCL Markov Clustering', | ||
version='0.3', | ||
description='Markov Clustering algoritm for Graphs', | ||
scripts = [ | ||
'mcl/mcl_clustering.py' | ||
], | ||
author='koteth', | ||
install_requires = ['numpy', 'networkx'], | ||
keywords = "MCL markov clustering graph", | ||
author_email='[email protected]', | ||
packages=['mcl'], | ||
) |