Skip to content

Commit

Permalink
Move printing Graphviz graphs to separate scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
17451k committed Dec 13, 2022
1 parent 4d945f9 commit 892e69c
Show file tree
Hide file tree
Showing 9 changed files with 302 additions and 99 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,6 @@ prof

# pyright
pyrightconfig.json

# documents
*.pdf
58 changes: 11 additions & 47 deletions clade/extensions/cmd_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from graphviz import Digraph

from clade.extensions.abstract import Extension
from clade.extensions.utils import get_string_hash


class CmdGraph(Extension):
Expand All @@ -44,8 +43,7 @@ def __init__(self, work_dir, conf=None):

self.out_dict = dict()

self.graph_dot = os.path.join(self.work_dir, "cmd_graph.dot")
self.graph_with_files_dot = os.path.join(self.work_dir, "cmd_graph_with_files.dot")
self.pdf_file = os.path.join(self.work_dir, "cmd_graph")

def load_cmd_graph(self):
"""Load command graph."""
Expand Down Expand Up @@ -86,6 +84,15 @@ def load_all_cmds_by_type(self, cmd_type, filter_by_pid=True):

return [cmd for cmd in cmds if cmd["id"] in cmd_graph]

def load_cmd_by_id(self, cmd_id):
if not hasattr(self, "cmd_type") or not self.cmd_type:
self.cmd_type = self.load_cmd_type()

cmd = self.extensions[self.cmd_type[cmd_id]].load_cmd_by_id(cmd_id)
cmd["type"] = self.cmd_type[cmd_id]

return cmd

@Extension.prepare
def parse(self, cmds_file):
cmds = self.load_all_cmds()
Expand All @@ -100,7 +107,6 @@ def parse(self, cmds_file):
if self.graph:
if self.conf.get("CmdGraph.as_picture"):
self.__print_cmd_graph()
self.__print_cmd_graph_with_files()
else:
self.error("Command graph is empty")
raise RuntimeError
Expand Down Expand Up @@ -143,49 +149,7 @@ def __print_cmd_graph(self):
dot.edge(using_id, cmd_id)

self.debug("Rendering dot file")
dot.render(self.graph_dot)

def __print_cmd_graph_with_files(self):
self.debug("Preparing dot file")

dot = Digraph(graph_attr={"rankdir": "LR"}, node_attr={"shape": "rectangle"})

added_nodes = set()

for cmd_id in self.graph:
cmd_type = self.cmd_type[cmd_id]
cmd = self.extensions[cmd_type].load_cmd_by_id(cmd_id)

for i, cmd_out in enumerate(cmd["out"]):
cmd_out_hash = get_string_hash(cmd_out)

if cmd_out not in added_nodes:
dot.node(cmd_out_hash, label=re.escape(cmd_out))
added_nodes.add(cmd_out)

# Properly print compiler commands with "-c" option
if cmd_type in ["CC", "CL"]:
if not cmd["in"]:
continue
cmd_ins = [cmd["in"][i]]
else:
cmd_ins = cmd["in"]

for cmd_in in cmd_ins:
cmd_in_hash = get_string_hash(cmd_in)

if cmd_in not in added_nodes:
dot.node(cmd_in_hash, label=re.escape(cmd_in))
added_nodes.add(cmd_in)

dot.edge(
cmd_in_hash,
cmd_out_hash,
label="{}({})".format(cmd_type, cmd_id),
)

self.debug("Rendering dot file")
dot.render(self.graph_with_files_dot)
dot.render(self.pdf_file, cleanup=True)

@staticmethod
def __get_new_value():
Expand Down
27 changes: 0 additions & 27 deletions clade/extensions/pid_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import os

from graphviz import Digraph

from clade.cmds import iter_cmds, get_last_id
from clade.extensions.abstract import Extension
Expand All @@ -31,8 +27,6 @@ def __init__(self, work_dir, conf=None):
self.pid_by_id = dict()
self.pid_by_id_file = "pid_by_id.json"

self.graph_dot = os.path.join(self.work_dir, "pid_graph.dot")

@Extension.prepare
def parse(self, cmds_file):
self.log("Parsing {} commands".format(get_last_id(cmds_file, raise_exception=True)))
Expand All @@ -42,29 +36,8 @@ def parse(self, cmds_file):

self.dump_data(self.pid_by_id, self.pid_by_id_file)

if self.pid_by_id and self.conf.get("PidGraph.as_picture"):
self.__print_pid_graph(cmds_file)

self.pid_by_id.clear()

def __print_pid_graph(self, cmds_file):
self.debug("Preparing dot file")

dot = Digraph(graph_attr={'rankdir': 'LR'}, node_attr={'shape': 'rectangle'})

cmds = list(iter_cmds(cmds_file))

for cmd in cmds:
cmd_node = "[{}] {}".format(cmd["id"], cmd["which"])
dot.node(cmd["id"], label=re.escape(cmd_node))

for cmd in cmds:
for parent_cmd in [x for x in cmds if x["id"] == cmd["pid"]]:
dot.edge(parent_cmd["id"], cmd["id"])

self.debug("Rendering dot file")
dot.render(self.graph_dot)

def load_pid_graph(self):
pid_by_id = self.load_pid_by_id()
pid_graph = dict()
Expand Down
10 changes: 2 additions & 8 deletions clade/extensions/presets/presets.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"Info.cif": "cif",
"Info.aspectator": null,
"Info.aspect": "info.aspect",
"PidGraph.as_picture": false,
"PidGraph.filter_cmds_by_pid": true,
"CDB.filter_opts": false,
"AR.which_list": [
Expand Down Expand Up @@ -101,11 +100,6 @@
"cmd\\.exe$"
]
},
"base_print": {
"extends": "base",
"CmdGraph.as_picture": true,
"PidGraph.as_picture": true
},
"linux_kernel": {
"extends": "base",
"CmdGraph.requires": [
Expand Down Expand Up @@ -138,7 +132,7 @@
"Info.aspect": "linux_kernel.aspect"
},
"busybox_linux": {
"extends": "base_print",
"extends": "base",
"Common.exclude_list": [],
"Common.exclude_list_in": [
"\\.tmp$",
Expand All @@ -156,7 +150,7 @@
]
},
"apache_linux": {
"extends": "base_print",
"extends": "base",
"Common.exclude_list": [
"/tmp/.*"
],
Expand Down
204 changes: 204 additions & 0 deletions clade/scripts/file_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# Copyright (c) 2022 Ilya Shchepetkov
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import re
import sys

from graphviz import Digraph

from clade import Clade
from clade.extensions.utils import get_string_hash


class DotWithFiles:
def __init__(self, include_files, exclude_files):
self.dot = Digraph(
graph_attr={"rankdir": "LR"}, node_attr={"shape": "rectangle"}
)
self.added_nodes = set()
self.include_regex = None
self.exclude_regex = None

if include_files:
self.include_regex = re.compile(
"(" + ")|(".join(include_files) + ")"
)

if exclude_files:
self.exclude_regex = re.compile(
"(" + ")|(".join(exclude_files) + ")"
)

def add_node(self, file):
if self.__filter_file(file):
return

file_hash = get_string_hash(file)

if file not in self.added_nodes:
self.dot.node(file_hash, label=re.escape(file))
self.added_nodes.add(file)

def add_edge(self, in_file, out_file, label):
if self.__filter_file(in_file) or self.__filter_file(out_file):
return

in_hash = get_string_hash(in_file)
out_hash = get_string_hash(out_file)

self.dot.edge(
in_hash,
out_hash,
label=label,
)

def __filter_file(self, file):
if self.include_regex and not self.include_regex.search(file):
return True

if self.exclude_regex and self.exclude_regex.search(file):
return True

return False


class FileGraph:
def __init__(self, args):
self.clade = Clade(args.clade)

if not self.clade.work_dir_ok():
raise RuntimeError("Specified Clade build base is not valid")

self.output = args.output
self.exclude_cmd_types = args.exclude_cmd_types
self.exclude_files = args.exclude_files
self.include_files = args.include_files
self.include_deps = args.include_deps

def print(self):
cmd_graph = self.clade.cmd_graph

dot = DotWithFiles(self.include_files, self.exclude_files)

for cmd_id in cmd_graph:
cmd = self.clade.CmdGraph.load_cmd_by_id(cmd_id)

if cmd["type"] in self.exclude_cmd_types:
continue

for i, cmd_out in enumerate(cmd["out"]):
dot.add_node(cmd_out)

# Properly print compiler commands with "-c" option
if cmd["type"] in ["CC", "CL", "LN"]:
if not cmd["in"]:
continue
cmd_ins = [cmd["in"][i]]
else:
cmd_ins = cmd["in"]

for cmd_in in cmd_ins:
dot.add_node(cmd_in)
dot.add_edge(cmd_in, cmd_out, f'{cmd["type"]} ({cmd_id})')

if not self.include_deps:
continue

try:
deps = self.clade.CmdGraph.extensions[
cmd["type"]
].load_deps_by_id(cmd["id"])
except AttributeError:
continue

for dep in deps:
if dep in cmd["in"]:
continue

dot.add_node(dep)
dot.add_edge(dep, cmd_in, f"Include ({cmd_id})")

dot.dot.render("file_graph", directory=self.output, cleanup=True)


def parse_args(args):
parser = argparse.ArgumentParser(
description="Create a file graph based on input and output of intercepted commands."
)

parser.add_argument(
"--include-deps",
help="include header files to the graph",
action="store_true",
default=False,
)

parser.add_argument(
"--exclude-file",
help="remove nodes from the resulted graph that match this regular expression",
metavar="REGEXP",
default=[],
action="append",
dest="exclude_files",
)

parser.add_argument(
"--include-file",
help="include only those nodes that match this regular expression",
metavar="REGEXP",
default=[],
action="append",
dest="include_files",
)

parser.add_argument(
"--exclude",
help="remove nodes from the resulted graph that came from commands of specified type",
metavar="CMD_TYPE",
default=[],
action="append",
dest="exclude_cmd_types",
)

parser.add_argument(
"-o",
"--output",
help="path to the output directory where generated graphs will be saved",
metavar="DIR",
default=os.path.curdir,
)

parser.add_argument(
"clade",
help="path to the Clade build base",
metavar="DIR",
)

args = parser.parse_args(args)

return args


def main(args=None):
if not args:
args = sys.argv[1:]

f = FileGraph(parse_args(args))
f.print()


if __name__ == "__main__":
main()
Loading

0 comments on commit 892e69c

Please sign in to comment.