Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
27876b3
adding tools to dividing mock headers
foreseeable Jul 17, 2020
f33ed82
fix spelling
foreseeable Jul 17, 2020
9579352
fix format issues
foreseeable Jul 17, 2020
4106ddc
fix a minor bug in scripts
foreseeable Jul 22, 2020
d13fc8f
adding support to binary size tracking
foreseeable Jul 27, 2020
270933d
refactor profile.py
foreseeable Jul 28, 2020
364d7b2
added type hint and unit test
foreseeable Jul 28, 2020
b812242
update Readme
foreseeable Jul 28, 2020
900eb5e
run formatter
foreseeable Jul 28, 2020
89c47dd
code refactoring
foreseeable Jul 30, 2020
6b94843
adding comments
foreseeable Jul 30, 2020
03f5b7a
run formatter
foreseeable Jul 30, 2020
25ba2d3
add no lint for code corpus to pass ci
foreseeable Jul 30, 2020
e31448a
run formatter
foreseeable Aug 3, 2020
e3d5220
format
foreseeable Aug 3, 2020
688e47c
adding supports to produce changed.txt automatically
foreseeable Aug 3, 2020
810cc3f
run formatter
foreseeable Aug 3, 2020
8e21940
silence clang_tidy on test data
foreseeable Aug 3, 2020
e5dc56a
add comments for get_headers
foreseeable Aug 6, 2020
98d28dc
rename function
foreseeable Aug 6, 2020
29f73b6
adding docs
foreseeable Aug 6, 2020
45c39a0
run formatter
foreseeable Aug 6, 2020
9c6c7dd
use smaller test
foreseeable Aug 6, 2020
a63d79c
run formatter
foreseeable Aug 10, 2020
a1bfc6a
fix typo
foreseeable Aug 18, 2020
1157bcf
use llvm-config instead of hard code path
foreseeable Aug 18, 2020
1ece0c5
remove emptyline
foreseeable Aug 18, 2020
8a77cd4
refactor
foreseeable Aug 18, 2020
7a1d616
fix readme format
foreseeable Aug 20, 2020
59c6e1c
use double quote for consistent
foreseeable Aug 20, 2020
c40bd31
fix typo
foreseeable Aug 20, 2020
c48cd2b
fix typo
foreseeable Aug 20, 2020
2ce96cd
run pylint
foreseeable Aug 20, 2020
78f571f
run fix format
foreseeable Aug 20, 2020
7ab2763
remove profile.py and fix headersplit format
foreseeable Aug 20, 2020
6040230
format fix
foreseeable Aug 20, 2020
4278a4c
integrate unit test under bazel
foreseeable Aug 20, 2020
1bba190
make test run under bazel
foreseeable Aug 24, 2020
0f7ca0f
Merge remote-tracking branch 'upstream/master' into headsplit_tool
foreseeable Aug 24, 2020
891f8b3
integrate ci
foreseeable Aug 24, 2020
66be1ac
fix format
foreseeable Aug 24, 2020
470f3c8
fix column limit
foreseeable Aug 24, 2020
2bc5aa2
format
foreseeable Aug 24, 2020
31e6111
Kick CI
foreseeable Aug 25, 2020
1922301
minor fix
foreseeable Aug 25, 2020
85fc71c
fix BUILD
foreseeable Aug 26, 2020
04da966
fix format
foreseeable Aug 26, 2020
28833ef
add instructions at README
foreseeable Aug 26, 2020
49d6781
idempotent
foreseeable Aug 26, 2020
a7b40e8
add TODO
foreseeable Aug 27, 2020
6c5f650
fix format
foreseeable Aug 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tools/envoy_headersplit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Envoy Header Split
Tool for spliting monolithic header files in envoy to speed up compilation
Comment thread
foreseeable marked this conversation as resolved.
Outdated


Steps to divide Envoy mock headers:

1. run `headersplit.py` to divide the monolith mock header into different classes
Comment thread
foreseeable marked this conversation as resolved.
Outdated

2. Resolve bazel dependency for the divided classes manually and remove unused includes for them (After running headersplit.py, we will get some new mock class files. We need to write Bazel dependencies for them.
And since those new mock class file has the same #includes as the monolithic mock header, we need to clean up unused includes for them.)
Comment thread
foreseeable marked this conversation as resolved.
Outdated

Comment thread
foreseeable marked this conversation as resolved.
3. run `replace_includes.py` to replace superfluous #includes in Envoy directory after dividing. it will also modify the corresponding Bazel `BUILD` file.

4. (optional) run `profile.py` to compare performance with the master branch
303 changes: 303 additions & 0 deletions tools/envoy_headersplit/headersplit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
# !/usr/bin/env python3
# Lint as: python3
"""
This python script can dividing monolith mock headers
into different mock classes. We need to remove the
over-included head files in generated class codes and
Comment thread
foreseeable marked this conversation as resolved.
Outdated
resolve dependencies in the corresponding Bazel files
manually.
"""

from __future__ import print_function
Comment thread
foreseeable marked this conversation as resolved.
Outdated

import argparse
from typing import Type, List, Tuple
import clang.cindex
from clang.cindex import TranslationUnit, Index, CursorKind, Cursor

clang.cindex.Config.set_library_path("/opt/llvm/lib")
Comment thread
foreseeable marked this conversation as resolved.
Outdated


def to_filename(classname: str) -> str:
"""
maps mock class name (in C++ codes) to filenames under the envoy naming convention.
e.g. map "MockAdminStream" to "admin_stream"

Args:
classname: mock class name from source

Returns:
corresponding file name
"""
filename = classname.replace('Mock', '', 1) # Remove only first "Mock"
ret = ""
for i, val in enumerate(filename):
if val.isupper() and i > 0:
ret += '_'
ret += val
return ret.lower()


def get_headers(translation_unit: Type[TranslationUnit]) -> str:
"""
extracts all head includes statements from the target code file (translation_unit)

for instance:
foo.h:
#include "a.h"
#include "b.h"

int foo(){

}
this function should return
'#include "a.h"\n#include "b.h"'
Comment thread
foreseeable marked this conversation as resolved.
Outdated

Args:
translation_unit: parsing result of target source code by libclang

Returns:
A string, contains all includes statements from the source code.

"""

# clang lib provides API like tranlation_unit.get_inludes()
# But we can't use it since it requires presence of the included files to return the full list

cursor = translation_unit.cursor
for i in cursor.walk_preorder():
Comment thread
foreseeable marked this conversation as resolved.
Outdated
if i.location.file is not None and i.location.file.name == cursor.displayname:
filename = i.location.file.name
with open(filename, 'r') as source_file:
contents = source_file.read()
headers = contents[:i.extent.start.offset]
return headers
Comment thread
foreseeable marked this conversation as resolved.
Outdated

return ""


def class_definitions(cursor: Cursor) -> List[Cursor]:
"""
extracts all class definitions in the file pointed by cursor. (typical mocks.h)

Args:
cursor: cursor of parsing result of target souce code by libclang

Returns:
a list of cursor, each pointing to a class definition.

Comment thread
foreseeable marked this conversation as resolved.
Outdated
"""
class_cursors = []
for i in cursor.walk_preorder():
if i.location.file is None:
continue
if i.location.file.name != cursor.displayname:
Comment thread
foreseeable marked this conversation as resolved.
Outdated
continue
if i.kind != CursorKind.CLASS_DECL:
continue
if not i.is_definition():
continue
if i.semantic_parent.kind != CursorKind.NAMESPACE:
continue
class_cursors.append(i)
return class_cursors


def class_implementations(cursor: Cursor) -> List[Cursor]:
"""
extracts all class implementation in the file pointed by cursor. (typical mocks.cc)

Args:
cursor: cursor of parsing result of target souce code by libclang

Returns:
a list of cursor, each pointing to a class implementation.

"""
impl_cursors = []
for i in cursor.walk_preorder():
if i.location.file is None:
continue
if i.location.file.name != cursor.displayname:
continue
if i.kind == CursorKind.NAMESPACE:
continue
if i.semantic_parent is not None and i.semantic_parent.kind == CursorKind.CLASS_DECL:
impl_cursors.append(i)
return impl_cursors


def extract_definition(cursor: Cursor, classnames: List[str]) -> Tuple[str, str, List[str]]:
"""
extracts class definition source code pointed by the cursor parameter.
and find dependent mock classes by naming look up.

Args:
cursor: libclang cursor pointing to the target mock class definition.
classnames: all mock class names defined in the definition header that needs to be
divided, used to parse class dependencies.
Returns:
class_name: a string representing the mock class name.
class_defn: a string contatins the whole class definition body.
deps: a set of string contatins all dependent classes for the return class.
Comment thread
foreseeable marked this conversation as resolved.
Outdated

Note:
It can not detect and resolve forward declaration and cyclic dependency. Need to address
manually.
"""
filename = cursor.location.file.name
with open(filename, 'r') as source_file:
contents = source_file.read()
Comment thread
foreseeable marked this conversation as resolved.
Outdated
class_name = cursor.spelling
class_defn = contents[cursor.extent.start.offset:cursor.extent.end.offset] + ";"
# need to know enclosed semantic parents (namespaces)
# to generate corresponding definitions
parent_cursor = cursor.semantic_parent
while parent_cursor.kind == CursorKind.NAMESPACE:
if parent_cursor.spelling == "":
break
class_defn = "namespace {} {{\n".format(parent_cursor.spelling) + class_defn + "\n}\n"
parent_cursor = parent_cursor.semantic_parent

# resolve dependency
# by simple naming look up

Comment thread
foreseeable marked this conversation as resolved.
Outdated
deps = set()
for classname in classnames:
if classname in class_defn and classname != class_name:
deps.add(classname)

return class_name, class_defn, deps


def extract_implementation(cursor: Cursor) -> Tuple[str, int]:
"""
extracts class methods implementation source code pointed by the cursor parameter.
and find dependent mock classes by naming look up.

Args:
cursor: libclang cursor pointing to the target mock class definition.

Returns:
class_name: a string representing the mock class name.
implline: the first line of the corresponding impl code

Note:
this function return line number only. Because in certain case libclang will fail
Comment thread
foreseeable marked this conversation as resolved.
Outdated
in parsing the method body and return an empty function body instead. So we choose
not to parse the function body, get the start line and the end line instead.
"""
class_name = cursor.semantic_parent.spelling
return class_name, cursor.extent.start.line - 1


def main(args):
Comment thread
foreseeable marked this conversation as resolved.
Outdated
"""
divides the monolith mock file into different mock class files.
"""
decl_filename = args["decl"]
impl_filename = args["impl"]
idx = Index.create()

impl_translation_unit = TranslationUnit.from_source(
impl_filename, options=TranslationUnit.PARSE_SKIP_FUNCTION_BODIES)

impl_includes = get_headers(impl_translation_unit)

decl_translation_unit = idx.parse(decl_filename, ['-x', 'c++'])
defns = class_definitions(decl_translation_unit.cursor)
decl_includes = get_headers(decl_translation_unit)

impls = class_implementations(impl_translation_unit.cursor)

classname_to_impl = dict()
with open(impl_filename, 'r') as source_file:
contents = source_file.readlines()

for i, cursor in enumerate(impls):
classname, implline = extract_implementation(cursor)
if i + 1 < len(impls):
_, impl_end = extract_implementation(impls[i + 1])
impl = ''.join(contents[implline:impl_end])
else:
offset = 0
while implline + offset < len(contents):
if '// namespace' in contents[implline + offset]:
break
offset += 1
impl = ''.join(contents[implline:implline + offset])
try:
classname_to_impl[classname] += impl + "\n"
except KeyError:
classname_to_impl[classname] = impl + "\n"

classnames = [cursor.spelling for cursor in defns]
fullclassnames = []
for cursor in defns:
classname = cursor.spelling

parent_cursor = cursor.semantic_parent
while parent_cursor.kind == CursorKind.NAMESPACE:
if parent_cursor.spelling == "":
break
classname = parent_cursor.spelling + "::" + classname
parent_cursor = parent_cursor.semantic_parent

fullclassname = "class " + classname
fullclassnames.append(fullclassname)
for defn in defns:
class_name, class_defn, deps = extract_definition(defn, classnames)
includes = ""
for name in deps:
includes += '#include "{}.h"\n'.format(to_filename(name))
class_impl = ""
try:
impl_include = impl_includes.replace(decl_filename, '{}.h'.format(to_filename(class_name)))
namespace_prefix = ""
namespace_suffix = ""
parent_cursor = defn.semantic_parent
while parent_cursor.kind == CursorKind.NAMESPACE:
if parent_cursor.spelling == "":
break
namespace_prefix = "namespace {} {{\n".format(parent_cursor.spelling) + namespace_prefix
namespace_suffix += "\n}\n"
parent_cursor = parent_cursor.semantic_parent
class_impl = impl_include + namespace_prefix + \
classname_to_impl[class_name] + namespace_suffix
except KeyError:
print("Warning: empty class {}".format(class_name))
class_impl = ""
with open("{}.h".format(to_filename(class_name)), "w") as decl_file:
decl_file.write(decl_includes + includes + class_defn)
with open("{}.cc".format(to_filename(class_name)), "w") as impl_file:
impl_file.write(class_impl)
# generating bazel build file, need to fill dependency manually
bazel_text = """
envoy_cc_mock(
name = "{}_mocks",
srcs = ["{}.cc"],
hdrs = ["{}.h"],
deps = [

]
)
""".format(to_filename(class_name), to_filename(class_name), to_filename(class_name))
with open("BUILD", "a") as bazel_file:
bazel_file.write(bazel_text)


if __name__ == '__main__':
PARSER = argparse.ArgumentParser()
PARSER.add_argument(
'-d',
'--decl',
default='mocks.h',
help="Path to the monolith header .h file that needs to be splitted",
)
PARSER.add_argument(
'-i',
'--impl',
default='mocks.cc',
help="Path to the impl code .cc file that needs to be splitted",
)
main(vars(PARSER.parse_args()))
Loading