-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpartitioning.py
293 lines (232 loc) · 8.18 KB
/
partitioning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import argparse
import logging
from collections import Counter
import numpy as np
def degree_nodes(adjacency_matrix, total_nodes):
"""
compute the degree of each node
returns the vector of degrees
:param adjacency_matrix: adjacency matrix of edges
:param total_nodes: list of all nodes
:return:
"""
d = []
for i in range(total_nodes):
d.append(sum([adjacency_matrix[i][j] for j in range(total_nodes)]))
return d
def get_min_cuts(edges, cut_edges):
"""
recursively calculates the minimum node cuts to eliminate dependency between partitions
returns list of all not cut edges
:param edges: list of all edges
:param cut_edges: list of cut edges
:return:
"""
logging.debug('total edges {} in between {}'.format(len(edges), len(cut_edges)))
if len(cut_edges) == 0:
return edges
nodes = []
for v1, v2 in cut_edges:
nodes.append(v1)
nodes.append(v2)
new_edges = []
node = Counter(nodes).most_common()[0][0]
for v1, v2 in edges:
if v1 != node and v2 != node:
new_edges.append((v1, v2))
new_edges_in_between = []
for edge in cut_edges:
if edge in new_edges:
new_edges_in_between.append(edge)
if len(new_edges_in_between) > 0:
return get_min_cuts(new_edges, new_edges_in_between)
else:
return new_edges
def get_nodes(edges):
"""
returns all nodes that were in the list of edges
:param edges: list of edges
:return:
"""
nodes = set()
for v1, v2 in edges:
nodes.add(v1)
nodes.add(v2)
return nodes
def get_cut_edges(edges, part_1, part_2):
"""
returns list of cut edges
:param edges: list of all edges
:param part_1: nodes in part A
:param part_2: nodes in part B
:return:
"""
cut_edges = []
for edge in edges:
v1, v2 = edge
if v1 in part_1 and v2 in part_2 or v1 in part_2 and v2 in part_1:
cut_edges.append(edge)
return cut_edges
def import_nodes_from_tg(tg_file):
"""
node file parser
nodes: is the list of nodes that are named 1:n
edges: is the list of edges using the names in nodes
adjacency_matrix: adjacency matrix of edges
degrees: list of nodes degrees
lines: strings in node file
node_map: nodes dict, keys are the real name of nodes in task graph and values are their name in nodes list
:param tg_file: task graph file
:return:
"""
nodes = []
edges = []
node_map = {}
with open(tg_file) as f:
lines = f.readlines()
total_nodes = int(lines[0])
adjacency_matrix = np.zeros((total_nodes, total_nodes), dtype=int)
for index in range(1, len(lines)):
row = lines[index].split()
node_name = row[0]
node_id = index - 1
node_map[node_name] = node_id
nodes.append(node_id)
for dep in row[4:]:
dep_id = node_map[dep]
edges.append((node_id, dep_id))
adjacency_matrix[node_id][dep_id] = 1
adjacency_matrix[dep_id][node_id] = 1
logging.info("Imported {} nodes with {} edges from {}".format(total_nodes, len(edges), tg_file))
return nodes, edges, adjacency_matrix, degree_nodes(adjacency_matrix, total_nodes), lines, node_map
def prune(part_1, part_2, nodes, new_nodes):
"""
removes cut nodes and edges
:param part_1: nodes in part A
:param part_2: nodes in part B
:param nodes: list of all nodes
:param new_nodes: list of nodes that are not cut
:return:
"""
cut_nodes = []
for node in nodes:
if node not in new_nodes:
cut_nodes.append(node)
new_part_1 = []
new_part_2 = []
for node in part_1:
if node not in cut_nodes:
new_part_1.append(node)
for node in part_2:
if node not in cut_nodes:
new_part_2.append(node)
return new_part_1, new_part_2, cut_nodes
def get_deps(node, edges, node_map):
"""
returns dependencies of a node
:param node: node
:param edges: list of edges
:param node_map: nodes dict
:return:
"""
deps = []
dep_names = []
for edge in edges:
v1, v2 = edge
if v1 == node and v2 < v1:
deps.append(v2)
elif v2 == node and v1 < v2:
deps.append(v1)
for node in deps:
dep_names.append(get_node_name(node, node_map))
return dep_names
def get_node_name(node_id, node_map):
"""
gets node name from nodes dictionary
:param node_id: node_id
:param node_map: input dict
:return:
"""
for key, value in node_map.items():
if str(value) == str(node_id):
return str(key)
return 'ERROR'
def create_subgraph(part, edges, nodes_data, node_map, name):
"""
creates a subgraph using nodes that are in part and writes it to file
:param part:
:param edges:
:param nodes_data:
:param node_map:
:param name:
:return:
"""
with open(name, 'w') as file:
flag = False
if part[0] != 0:
part.insert(0, 0)
node_map['0'] = 0
nodes_data.insert(1, '0 0 0 0')
flag = True
file.write(str(len(part)) + '\n')
for node in part:
node_data = nodes_data[node + 1].split()
file.write(get_node_name(node, node_map) + '\t\t' + node_data[1] + '\t\t' + node_data[2] + '\t\t')
deps = get_deps(node, edges, node_map)
if flag and node != 0:
if len(deps) != 0:
flag = False
else:
deps = [0]
file.write(str(len(deps)))
for dep in deps:
file.write('\t\t' + str(dep))
file.write('\n')
def partition_graph(nodes_file):
"""
partitions graph
returns list of nodes in two parts, list of nodes, list of edges, nodes_data and nodes dict
:param nodes_file:
:return:
"""
logging.basicConfig(level=logging.INFO)
logging.info("Computing Adjacency Matrix...")
nodes, edges, adjacency_matrix, degrees, nodes_data, node_map = import_nodes_from_tg(nodes_file)
logging.info("Computing the Laplacian matrix...")
laplacian_matrix = np.diag(degrees) - adjacency_matrix
logging.info("Computing the eigenvectors and eigenvalues...")
eigenvalues, eigenvectors = np.linalg.eigh(laplacian_matrix)
# Index of the second eigenvalue
index_fnzev = np.argsort(eigenvalues)[1]
logging.debug("Eigenvector for #{} eigenvalue ({}): ".format(
index_fnzev, eigenvalues[index_fnzev]), eigenvectors[:, index_fnzev])
partition = [val >= 0 for val in eigenvectors[:, index_fnzev]]
part_1 = [node for (node, nodeCommunity) in enumerate(partition) if nodeCommunity]
part_2 = [node for (node, nodeCommunity) in enumerate(partition) if not nodeCommunity]
# cut_edges = get_cut_edges(edges, part_1, part_2)
# edges = get_min_cuts(edges, cut_edges)
# new_nodes = get_nodes(edges)
logging.info("Nodes in A: {} Nodes in B: {}".format(len(part_1), len(part_2)))
# part_1, part_2, cut_nodes = prune(part_1, part_2, nodes, new_nodes)
# logging.info("Partition computed: nbA={} nbB={} (total {}), {} edges in between, {} cut nodes".format(
# len(part_1),
# len(part_2),
# len(nodes),
# len(cut_edges),
# len(cut_nodes),
# ))
return part_1, part_2, nodes, edges, nodes_data, node_map
def main():
# Configure logging
logging_format = '%(asctime)s.%(msecs)03d %(message)s'
logging.basicConfig(format=logging_format, datefmt='%H:%M:%S')
parser = argparse.ArgumentParser(description="Compute the partition of a "
"graph using the Spectral Partition Algorithm.")
parser.add_argument('--nodes-file', '-f', help='the file containing the nodes',
default='deadline.stg')
parser.add_argument('--output-file', '-o', help='the filename of the'
' communities PNG graph to be written')
args = parser.parse_args()
partition_graph(args.nodes_file)
if __name__ == '__main__':
main()