Skip to content

Commit e37ebac

Browse files
committed
Add types, collector and pass all checks
1 parent 129dbff commit e37ebac

File tree

6 files changed

+330
-258
lines changed

6 files changed

+330
-258
lines changed

.pylintrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.app
1111
[pylint.messages_control]
1212
disable=missing-function-docstring, line-too-long, import-error,
1313
too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
14-
too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation, missing-module-docstring, unused-import, missing-class-docstring
14+
too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation, missing-module-docstring, unused-import, missing-class-docstring, no-else-return

codegen/parse.py

+15-224
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
# FIXME: properly make it a package
2323
from utils.clang import get_cpp_include_paths
24+
from utils.collector import EntityCollector
2425
from utils.interface import (
2526
ClassInterface,
2627
ClassType,
@@ -29,6 +30,16 @@
2930
ParamType,
3031
ReturnType,
3132
)
33+
from utils.types import (
34+
check_arg_valid,
35+
check_valid,
36+
cursor_kind_spelling,
37+
from_lean_function,
38+
to_lean_function,
39+
to_lean_method_name,
40+
to_lean_param_name,
41+
to_lean_type_name,
42+
)
3243

3344
cpp_include_paths = get_cpp_include_paths()
3445
print(f"C++ include paths: {cpp_include_paths}")
@@ -43,86 +54,6 @@
4354

4455
ginac_header: Path = dir_build / ginac / "symbol.h"
4556

46-
index_args: List[str] = ["-xc++", "-std=c++11"]
47-
48-
# Add C++ include paths to the index or libclang fail to recognize std::string etc.
49-
index_args.extend(["-I" + include_path for include_path in cpp_include_paths])
50-
51-
# print(index_args)
52-
53-
print(f"Parsing {ginac_header}...")
54-
ginac_ast: TranslationUnit = index.parse(ginac_header, args=index_args)
55-
56-
57-
def to_lean_type_name(type_name_cpp, cpp_type: CppType):
58-
if is_primitive_type(cpp_type.kind):
59-
return map_primitive_type(type_name_cpp, cpp_type)
60-
if "::" in type_name_cpp:
61-
return to_lean_type_name(type_name_cpp.split("::")[-1], cpp_type)
62-
return "".join(word.capitalize() for word in type_name_cpp.split("_"))
63-
64-
65-
class EntityCollector:
66-
recursive: bool = True
67-
visited_headers: Set[str] = set()
68-
data: Dict[str, ClassInterface] = {}
69-
70-
def __init__(self, recursive):
71-
self.recursive = recursive
72-
73-
def in_cpp_include_paths(self, file_name: str) -> bool:
74-
# TODO: the whole parsing should be embeded in this class
75-
# FIXME: cpp_include_paths refering to global variable
76-
for include_path in cpp_include_paths:
77-
if include_path in file_name:
78-
return True
79-
return False
80-
81-
def should_skip(self, file_name: str) -> bool:
82-
# print(f'Checking if {file_name} should be skipped...')
83-
84-
return file_name in self.visited_headers or self.in_cpp_include_paths(file_name)
85-
86-
def visit_file(self, included_filename: str):
87-
# print(f'Visiting {included_filename}...')
88-
self.visited_headers.add(included_filename)
89-
90-
def walk(self, ast: TranslationUnit) -> Iterator[Cursor]:
91-
if self.recursive:
92-
for include in ast.get_includes():
93-
included_filename: str = include.include.name
94-
if not self.should_skip(included_filename):
95-
self.visit_file(included_filename)
96-
# FIXME: index_args referring to global variable
97-
included_ast: TranslationUnit = index.parse(
98-
included_filename, args=index_args
99-
)
100-
for inner_cursor in self.walk(included_ast):
101-
yield inner_cursor
102-
103-
for cursor in ast.cursor.walk_preorder():
104-
yield cursor
105-
106-
def find_or_create_class_interface(
107-
self, type_name_cpp: str, cpp_type: CppType
108-
) -> ClassInterface:
109-
# Caution! It's registered in its lean name
110-
type_name_lean = to_lean_type_name(type_name_cpp, cpp_type)
111-
112-
if type_name_lean in self.data:
113-
return self.data[type_name_lean]
114-
else:
115-
class_interface = ClassInterface(
116-
type_name_cpp, [], ClassType(type_name_lean, type_name_cpp, [])
117-
)
118-
self.data[type_name_lean] = class_interface
119-
return class_interface
120-
121-
def find_or_create_type(self, type_name_cpp: str, cpp_type: CppType) -> ClassType:
122-
class_interface = self.find_or_create_class_interface(type_name_cpp, cpp_type)
123-
return class_interface.type
124-
125-
12657
concerned_cursor_kinds: List[CursorKind] = [
12758
CursorKind.STRUCT_DECL,
12859
CursorKind.TYPEDEF_DECL,
@@ -135,138 +66,10 @@ def find_or_create_type(self, type_name_cpp: str, cpp_type: CppType) -> ClassTyp
13566

13667
collector = EntityCollector(recursive=True)
13768

138-
PATH_ROOT_REGEX = re.compile(".*/ginac-lean/build/ginac-1.8.7/")
139-
140-
141-
def check_valid(cursor: Cursor):
142-
pass
143-
# if cursor.kind.is_invalid():
144-
# print(f'\tINVALID!!! {cursor.spelling}')
145-
146-
147-
# If libclang can't determine a type, it's int! Should be fixed by adding C++ include paths to the index.
148-
# Here we have a hacky way to detect this error.
149-
# TODO: find a better way to detect argument type parsing error
150-
def check_arg_valid(cursor: Cursor):
151-
check_valid(cursor)
152-
token_spellings = " ".join([token.spelling for token in cursor.get_tokens()])
153-
# If the argument type is int, but the token spellings don't contain 'int', it's probably a parsing error
154-
# Can confirm this works by removing C++ include paths from the index, then parse symbol.h which refers to std::string,
155-
# it will print out a lot of warnings like:
156-
# WARN Invalid argument type detected: const int & != const std :: string & initname at ginac-lean/build/ginac-1.8.7/ginac/symbol.h:43:38
157-
if "int" in cursor.type.spelling and "int" not in token_spellings:
158-
if "unsigned int" in cursor.type.spelling:
159-
# Exclude false positives like:
160-
# WARN Invalid argument type detected: unsigned int != unsigned inf at ginac-lean/build/ginac-1.8.7/ginac/symbol.h:48:21
161-
return
162-
# TODO: gather all the warnings and print them at the end, then give hints on how to fix them
163-
print(
164-
f"WARN Invalid argument type detected: {cursor.type.spelling} != {token_spellings} at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}",
165-
file=sys.stderr,
166-
)
167-
168-
169-
# TODO: add more primitive types
170-
def is_primitive_type(typekind: TypeKind):
171-
return typekind in [
172-
TypeKind.BOOL,
173-
# TypeKind.CHAR_U,
174-
# TypeKind.UCHAR,
175-
# TypeKind.CHAR16,
176-
# TypeKind.CHAR32,
177-
TypeKind.USHORT,
178-
TypeKind.UINT,
179-
TypeKind.ULONG,
180-
TypeKind.ULONGLONG,
181-
TypeKind.UINT128,
182-
# TypeKind.CHAR_S,
183-
# TypeKind.SCHAR,
184-
# TypeKind.WCHAR,
185-
TypeKind.SHORT,
186-
TypeKind.INT,
187-
TypeKind.LONG,
188-
TypeKind.LONGLONG,
189-
TypeKind.INT128,
190-
TypeKind.FLOAT,
191-
TypeKind.DOUBLE,
192-
TypeKind.LONGDOUBLE,
193-
]
194-
195-
196-
# FIXME maybe all these primitive types are not meant to mapped to Lean built-in types
197-
MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE: Dict[TypeKind, str] = {
198-
TypeKind.BOOL: "Bool",
199-
# TypeKind.CHAR_U: 'UInt8',
200-
# TypeKind.UCHAR: 'UInt8',
201-
# TypeKind.CHAR16: 'UInt16',
202-
# TypeKind.CHAR32: 'UInt32',
203-
TypeKind.USHORT: "UInt16",
204-
TypeKind.UINT: "UInt32",
205-
TypeKind.ULONG: "UInt64",
206-
TypeKind.ULONGLONG: "UInt64",
207-
TypeKind.UINT128: "UInt128",
208-
# TypeKind.CHAR_S: 'Int8',
209-
# TypeKind.SCHAR: 'Int8',
210-
# TypeKind.WCHAR: 'UInt32',
211-
TypeKind.SHORT: "Int16",
212-
TypeKind.INT: "Int32",
213-
TypeKind.LONG: "Int64",
214-
TypeKind.LONGLONG: "Int64",
215-
TypeKind.INT128: "Int128",
216-
TypeKind.FLOAT: "Float",
217-
TypeKind.DOUBLE: "Float",
218-
TypeKind.LONGDOUBLE: "Float",
219-
}
220-
221-
222-
def map_primitive_type(type_name: str, cpp_type: CppType):
223-
if cpp_type.kind in MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE:
224-
return MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE[cpp_type.kind]
225-
else:
226-
return f"LEAN_PRIMITIVE_{type_name}"
227-
228-
229-
def to_lean_method_name(method_name_cpp: str, cursor: Cursor) -> str:
230-
if cursor.kind == CursorKind.CONSTRUCTOR:
231-
return "mk"
232-
# TODO: should keep or change getters/setters?
233-
else:
234-
return method_name_cpp
235-
236-
237-
def from_lean_function(param_type_cpp: str, cursor: Cursor) -> str:
238-
if param_type_cpp in ["const std::string &", "const char *"]:
239-
return "lean_string_cstr(%s)"
240-
else:
241-
print(
242-
f"WARN Unknown from_lean_function for {param_type_cpp} at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}",
243-
file=sys.stderr,
244-
)
245-
return "UNKOWN_FROM_LEAN_FUNCTION"
246-
247-
248-
def to_lean_function(param_type_cpp: str, cpp_type: CppType) -> str:
249-
if param_type_cpp in ["const std::string &"]:
250-
return "lean_mk_string(%s.c_str())"
251-
elif param_type_cpp in ["const char *"]:
252-
return "lean_mk_string(%s)"
253-
else:
254-
# print(f'WARN Unknown to_lean_function for {param_type_cpp} at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}', file=sys.stderr)
255-
print(
256-
f"INFO Default to_lean_function to as is for {param_type_cpp}"
257-
) # at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}')
258-
return "%s"
259-
260-
261-
def cursor_kind_spelling(kind: CursorKind) -> str:
262-
if kind == CursorKind.CONSTRUCTOR:
263-
return "CONSTRUCTOR"
264-
elif kind == CursorKind.CXX_METHOD:
265-
return "CXX_METHOD"
266-
else:
267-
print(f"WARN Unknown cursor_kind_spelling for {kind}", file=sys.stderr)
268-
return "UNKNOWN_CURSOR_KIND"
69+
print(f"Parsing {ginac_header}...")
70+
ginac_ast: TranslationUnit = collector.parse(ginac_header)
26971

72+
PATH_ROOT_REGEX = re.compile(".*/ginac-lean/build/ginac-1.8.7/")
27073

27174
for cursor in collector.walk(ginac_ast):
27275
if cursor.kind in concerned_cursor_kinds:
@@ -319,18 +122,6 @@ def cursor_kind_spelling(kind: CursorKind) -> str:
319122
return_type=return_type,
320123
)
321124

322-
def to_lean_param_name(arg: Cursor) -> str:
323-
if arg.type.is_pod() and is_primitive_type(arg.type.kind):
324-
return map_primitive_type(arg.type.spelling, arg.type)
325-
elif arg.type.spelling == "const std::string &":
326-
return "@&String"
327-
else:
328-
print(
329-
f"WARN Unknown to_lean_param_name for {arg.type.spelling} at {arg.location.file.name}:{arg.location.line}:{arg.location.column}",
330-
file=sys.stderr,
331-
)
332-
return "UNKOWN_LEAN_PARAM_NAME"
333-
334125
method.params = [
335126
MethodParam(
336127
name=arg.spelling,
@@ -380,7 +171,7 @@ def to_lean_param_name(arg: Cursor) -> str:
380171
for class_name, class_interface in collector.data.items():
381172
if class_name == "Symbol":
382173
class_interface.namespace = "Ginac"
383-
with open(dir_data / f"{class_name}.yml", "w", encoding='utf-8') as file:
174+
with open(dir_data / f"{class_name}.yml", "w", encoding="utf-8") as file:
384175
yaml.dump(class_interface.to_dict(), file)
385176

386177
print(conf.lib)

codegen/utils/clang.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
import subprocess
1+
import subprocess # nosec
22
import sys
33

44

55
def get_cpp_include_paths():
6-
# run `clang -v -E -x c++ - -v < /dev/null 2>&1` to get the include paths
6+
# run `clang -v -E -x c++ - -v < /dev/null 2>&1` to get the include paths # nosec
77
result = subprocess.run(
88
["clang", "-v", "-E", "-x", "c++", "-", "-v"],
99
input=b"",
1010
stdout=subprocess.PIPE,
1111
stderr=subprocess.PIPE,
12-
)
12+
check=True,
13+
) # nosec
1314
output = result.stderr.decode(sys.getdefaultencoding())
1415
search_list = output.split("#include <...> search starts here:\n")[-1].split(
1516
"End of search list."
@@ -19,4 +20,5 @@ def get_cpp_include_paths():
1920
line = line.strip()
2021
if line != "":
2122
include_paths.append(line)
23+
2224
return include_paths

0 commit comments

Comments
 (0)