21
21
22
22
# FIXME: properly make it a package
23
23
from utils .clang import get_cpp_include_paths
24
+ from utils .collector import EntityCollector
24
25
from utils .interface import (
25
26
ClassInterface ,
26
27
ClassType ,
29
30
ParamType ,
30
31
ReturnType ,
31
32
)
33
+ from utils .types import (
34
+ check_arg_valid ,
35
+ check_valid ,
36
+ cursor_kind_spelling ,
37
+ from_lean_function ,
38
+ to_lean_function ,
39
+ to_lean_method_name ,
40
+ to_lean_param_name ,
41
+ to_lean_type_name ,
42
+ )
32
43
33
44
cpp_include_paths = get_cpp_include_paths ()
34
45
print (f"C++ include paths: { cpp_include_paths } " )
43
54
44
55
ginac_header : Path = dir_build / ginac / "symbol.h"
45
56
46
- index_args : List [str ] = ["-xc++" , "-std=c++11" ]
47
-
48
- # Add C++ include paths to the index or libclang fail to recognize std::string etc.
49
- index_args .extend (["-I" + include_path for include_path in cpp_include_paths ])
50
-
51
- # print(index_args)
52
-
53
- print (f"Parsing { ginac_header } ..." )
54
- ginac_ast : TranslationUnit = index .parse (ginac_header , args = index_args )
55
-
56
-
57
- def to_lean_type_name (type_name_cpp , cpp_type : CppType ):
58
- if is_primitive_type (cpp_type .kind ):
59
- return map_primitive_type (type_name_cpp , cpp_type )
60
- if "::" in type_name_cpp :
61
- return to_lean_type_name (type_name_cpp .split ("::" )[- 1 ], cpp_type )
62
- return "" .join (word .capitalize () for word in type_name_cpp .split ("_" ))
63
-
64
-
65
- class EntityCollector :
66
- recursive : bool = True
67
- visited_headers : Set [str ] = set ()
68
- data : Dict [str , ClassInterface ] = {}
69
-
70
- def __init__ (self , recursive ):
71
- self .recursive = recursive
72
-
73
- def in_cpp_include_paths (self , file_name : str ) -> bool :
74
- # TODO: the whole parsing should be embeded in this class
75
- # FIXME: cpp_include_paths refering to global variable
76
- for include_path in cpp_include_paths :
77
- if include_path in file_name :
78
- return True
79
- return False
80
-
81
- def should_skip (self , file_name : str ) -> bool :
82
- # print(f'Checking if {file_name} should be skipped...')
83
-
84
- return file_name in self .visited_headers or self .in_cpp_include_paths (file_name )
85
-
86
- def visit_file (self , included_filename : str ):
87
- # print(f'Visiting {included_filename}...')
88
- self .visited_headers .add (included_filename )
89
-
90
- def walk (self , ast : TranslationUnit ) -> Iterator [Cursor ]:
91
- if self .recursive :
92
- for include in ast .get_includes ():
93
- included_filename : str = include .include .name
94
- if not self .should_skip (included_filename ):
95
- self .visit_file (included_filename )
96
- # FIXME: index_args referring to global variable
97
- included_ast : TranslationUnit = index .parse (
98
- included_filename , args = index_args
99
- )
100
- for inner_cursor in self .walk (included_ast ):
101
- yield inner_cursor
102
-
103
- for cursor in ast .cursor .walk_preorder ():
104
- yield cursor
105
-
106
- def find_or_create_class_interface (
107
- self , type_name_cpp : str , cpp_type : CppType
108
- ) -> ClassInterface :
109
- # Caution! It's registered in its lean name
110
- type_name_lean = to_lean_type_name (type_name_cpp , cpp_type )
111
-
112
- if type_name_lean in self .data :
113
- return self .data [type_name_lean ]
114
- else :
115
- class_interface = ClassInterface (
116
- type_name_cpp , [], ClassType (type_name_lean , type_name_cpp , [])
117
- )
118
- self .data [type_name_lean ] = class_interface
119
- return class_interface
120
-
121
- def find_or_create_type (self , type_name_cpp : str , cpp_type : CppType ) -> ClassType :
122
- class_interface = self .find_or_create_class_interface (type_name_cpp , cpp_type )
123
- return class_interface .type
124
-
125
-
126
57
concerned_cursor_kinds : List [CursorKind ] = [
127
58
CursorKind .STRUCT_DECL ,
128
59
CursorKind .TYPEDEF_DECL ,
@@ -135,138 +66,10 @@ def find_or_create_type(self, type_name_cpp: str, cpp_type: CppType) -> ClassTyp
135
66
136
67
collector = EntityCollector (recursive = True )
137
68
138
- PATH_ROOT_REGEX = re .compile (".*/ginac-lean/build/ginac-1.8.7/" )
139
-
140
-
141
- def check_valid (cursor : Cursor ):
142
- pass
143
- # if cursor.kind.is_invalid():
144
- # print(f'\tINVALID!!! {cursor.spelling}')
145
-
146
-
147
- # If libclang can't determine a type, it's int! Should be fixed by adding C++ include paths to the index.
148
- # Here we have a hacky way to detect this error.
149
- # TODO: find a better way to detect argument type parsing error
150
- def check_arg_valid (cursor : Cursor ):
151
- check_valid (cursor )
152
- token_spellings = " " .join ([token .spelling for token in cursor .get_tokens ()])
153
- # If the argument type is int, but the token spellings don't contain 'int', it's probably a parsing error
154
- # Can confirm this works by removing C++ include paths from the index, then parse symbol.h which refers to std::string,
155
- # it will print out a lot of warnings like:
156
- # WARN Invalid argument type detected: const int & != const std :: string & initname at ginac-lean/build/ginac-1.8.7/ginac/symbol.h:43:38
157
- if "int" in cursor .type .spelling and "int" not in token_spellings :
158
- if "unsigned int" in cursor .type .spelling :
159
- # Exclude false positives like:
160
- # WARN Invalid argument type detected: unsigned int != unsigned inf at ginac-lean/build/ginac-1.8.7/ginac/symbol.h:48:21
161
- return
162
- # TODO: gather all the warnings and print them at the end, then give hints on how to fix them
163
- print (
164
- f"WARN Invalid argument type detected: { cursor .type .spelling } != { token_spellings } at { cursor .location .file .name } :{ cursor .location .line } :{ cursor .location .column } " ,
165
- file = sys .stderr ,
166
- )
167
-
168
-
169
- # TODO: add more primitive types
170
- def is_primitive_type (typekind : TypeKind ):
171
- return typekind in [
172
- TypeKind .BOOL ,
173
- # TypeKind.CHAR_U,
174
- # TypeKind.UCHAR,
175
- # TypeKind.CHAR16,
176
- # TypeKind.CHAR32,
177
- TypeKind .USHORT ,
178
- TypeKind .UINT ,
179
- TypeKind .ULONG ,
180
- TypeKind .ULONGLONG ,
181
- TypeKind .UINT128 ,
182
- # TypeKind.CHAR_S,
183
- # TypeKind.SCHAR,
184
- # TypeKind.WCHAR,
185
- TypeKind .SHORT ,
186
- TypeKind .INT ,
187
- TypeKind .LONG ,
188
- TypeKind .LONGLONG ,
189
- TypeKind .INT128 ,
190
- TypeKind .FLOAT ,
191
- TypeKind .DOUBLE ,
192
- TypeKind .LONGDOUBLE ,
193
- ]
194
-
195
-
196
- # FIXME maybe all these primitive types are not meant to mapped to Lean built-in types
197
- MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE : Dict [TypeKind , str ] = {
198
- TypeKind .BOOL : "Bool" ,
199
- # TypeKind.CHAR_U: 'UInt8',
200
- # TypeKind.UCHAR: 'UInt8',
201
- # TypeKind.CHAR16: 'UInt16',
202
- # TypeKind.CHAR32: 'UInt32',
203
- TypeKind .USHORT : "UInt16" ,
204
- TypeKind .UINT : "UInt32" ,
205
- TypeKind .ULONG : "UInt64" ,
206
- TypeKind .ULONGLONG : "UInt64" ,
207
- TypeKind .UINT128 : "UInt128" ,
208
- # TypeKind.CHAR_S: 'Int8',
209
- # TypeKind.SCHAR: 'Int8',
210
- # TypeKind.WCHAR: 'UInt32',
211
- TypeKind .SHORT : "Int16" ,
212
- TypeKind .INT : "Int32" ,
213
- TypeKind .LONG : "Int64" ,
214
- TypeKind .LONGLONG : "Int64" ,
215
- TypeKind .INT128 : "Int128" ,
216
- TypeKind .FLOAT : "Float" ,
217
- TypeKind .DOUBLE : "Float" ,
218
- TypeKind .LONGDOUBLE : "Float" ,
219
- }
220
-
221
-
222
- def map_primitive_type (type_name : str , cpp_type : CppType ):
223
- if cpp_type .kind in MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE :
224
- return MAP_PRIMITIVE_TYPE_TO_LEAN_TYPE [cpp_type .kind ]
225
- else :
226
- return f"LEAN_PRIMITIVE_{ type_name } "
227
-
228
-
229
- def to_lean_method_name (method_name_cpp : str , cursor : Cursor ) -> str :
230
- if cursor .kind == CursorKind .CONSTRUCTOR :
231
- return "mk"
232
- # TODO: should keep or change getters/setters?
233
- else :
234
- return method_name_cpp
235
-
236
-
237
- def from_lean_function (param_type_cpp : str , cursor : Cursor ) -> str :
238
- if param_type_cpp in ["const std::string &" , "const char *" ]:
239
- return "lean_string_cstr(%s)"
240
- else :
241
- print (
242
- f"WARN Unknown from_lean_function for { param_type_cpp } at { cursor .location .file .name } :{ cursor .location .line } :{ cursor .location .column } " ,
243
- file = sys .stderr ,
244
- )
245
- return "UNKOWN_FROM_LEAN_FUNCTION"
246
-
247
-
248
- def to_lean_function (param_type_cpp : str , cpp_type : CppType ) -> str :
249
- if param_type_cpp in ["const std::string &" ]:
250
- return "lean_mk_string(%s.c_str())"
251
- elif param_type_cpp in ["const char *" ]:
252
- return "lean_mk_string(%s)"
253
- else :
254
- # print(f'WARN Unknown to_lean_function for {param_type_cpp} at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}', file=sys.stderr)
255
- print (
256
- f"INFO Default to_lean_function to as is for { param_type_cpp } "
257
- ) # at {cursor.location.file.name}:{cursor.location.line}:{cursor.location.column}')
258
- return "%s"
259
-
260
-
261
- def cursor_kind_spelling (kind : CursorKind ) -> str :
262
- if kind == CursorKind .CONSTRUCTOR :
263
- return "CONSTRUCTOR"
264
- elif kind == CursorKind .CXX_METHOD :
265
- return "CXX_METHOD"
266
- else :
267
- print (f"WARN Unknown cursor_kind_spelling for { kind } " , file = sys .stderr )
268
- return "UNKNOWN_CURSOR_KIND"
69
+ print (f"Parsing { ginac_header } ..." )
70
+ ginac_ast : TranslationUnit = collector .parse (ginac_header )
269
71
72
+ PATH_ROOT_REGEX = re .compile (".*/ginac-lean/build/ginac-1.8.7/" )
270
73
271
74
for cursor in collector .walk (ginac_ast ):
272
75
if cursor .kind in concerned_cursor_kinds :
@@ -319,18 +122,6 @@ def cursor_kind_spelling(kind: CursorKind) -> str:
319
122
return_type = return_type ,
320
123
)
321
124
322
- def to_lean_param_name (arg : Cursor ) -> str :
323
- if arg .type .is_pod () and is_primitive_type (arg .type .kind ):
324
- return map_primitive_type (arg .type .spelling , arg .type )
325
- elif arg .type .spelling == "const std::string &" :
326
- return "@&String"
327
- else :
328
- print (
329
- f"WARN Unknown to_lean_param_name for { arg .type .spelling } at { arg .location .file .name } :{ arg .location .line } :{ arg .location .column } " ,
330
- file = sys .stderr ,
331
- )
332
- return "UNKOWN_LEAN_PARAM_NAME"
333
-
334
125
method .params = [
335
126
MethodParam (
336
127
name = arg .spelling ,
@@ -380,7 +171,7 @@ def to_lean_param_name(arg: Cursor) -> str:
380
171
for class_name , class_interface in collector .data .items ():
381
172
if class_name == "Symbol" :
382
173
class_interface .namespace = "Ginac"
383
- with open (dir_data / f"{ class_name } .yml" , "w" , encoding = ' utf-8' ) as file :
174
+ with open (dir_data / f"{ class_name } .yml" , "w" , encoding = " utf-8" ) as file :
384
175
yaml .dump (class_interface .to_dict (), file )
385
176
386
177
print (conf .lib )
0 commit comments