-
Notifications
You must be signed in to change notification settings - Fork 92
/
Copy pathdoxygen.py
executable file
·4015 lines (3539 loc) · 188 KB
/
doxygen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
#
# This file is part of m.css.
#
# Copyright © 2017, 2018, 2019, 2020, 2021, 2022
# Vladimír Vondruš <[email protected]>
# Copyright © 2020 Yuri Edward <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
import xml.etree.ElementTree as ET
import argparse
import copy
import enum
import sys
import re
import html
import inspect
import os
import glob
import mimetypes
import shutil
import subprocess
import urllib.parse
import logging
from types import SimpleNamespace as Empty
from typing import Tuple, Dict, Any, List
from importlib.machinery import SourceFileLoader
from jinja2 import Environment, FileSystemLoader
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import TextLexer, BashSessionLexer, get_lexer_by_name, find_lexer_class_for_filename
from _search import CssClass, ResultFlag, ResultMap, Trie, Serializer, serialize_search_data, base85encode_search_data, search_filename, searchdata_filename, searchdata_filename_b85, searchdata_format_version
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../plugins'))
import dot2svg
import latex2svg
import latex2svgextra
import ansilexer
class EntryType(enum.Enum):
# Order must match the search_type_map below; first value is reserved for
# ResultFlag.ALIAS
PAGE = 1
NAMESPACE = 2
GROUP = 3
CLASS = 4
STRUCT = 5
UNION = 6
TYPEDEF = 7
DIR = 8
FILE = 9
FUNC = 10
DEFINE = 11
ENUM = 12
ENUM_VALUE = 13
VAR = 14
# Order must match the EntryType above
search_type_map = [
(CssClass.SUCCESS, "page"),
(CssClass.PRIMARY, "namespace"),
(CssClass.SUCCESS, "group"),
(CssClass.PRIMARY, "class"),
(CssClass.PRIMARY, "struct"),
(CssClass.PRIMARY, "union"),
(CssClass.PRIMARY, "typedef"),
(CssClass.WARNING, "dir"),
(CssClass.WARNING, "file"),
(CssClass.INFO, "func"),
(CssClass.INFO, "define"),
(CssClass.PRIMARY, "enum"),
(CssClass.DEFAULT, "enum val"),
(CssClass.DEFAULT, "var")
]
default_config = {
'DOXYFILE': 'Doxyfile',
'THEME_COLOR': '#22272e',
'FAVICON': 'favicon-dark.png',
'LINKS_NAVBAR1': [
("Pages", 'pages', []),
("Namespaces", 'namespaces', [])
],
'LINKS_NAVBAR2': [
("Classes", 'annotated', []),
("Files", 'files', [])
],
'STYLESHEETS': [
'https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600',
'../css/m-dark+documentation.compiled.css'],
'HTML_HEADER': None,
'EXTRA_FILES': [],
'PAGE_HEADER': None,
'FINE_PRINT': '[default]',
'CLASS_INDEX_EXPAND_LEVELS': 1,
'FILE_INDEX_EXPAND_LEVELS': 1,
'CLASS_INDEX_EXPAND_INNER': False,
'M_MATH_CACHE_FILE': 'm.math.cache',
'M_CODE_FILTERS_PRE': {},
'M_CODE_FILTERS_POST': {},
'SEARCH_DISABLED': False,
'SEARCH_DOWNLOAD_BINARY': False,
'SEARCH_FILENAME_PREFIX': 'searchdata',
'SEARCH_RESULT_ID_BYTES': 2,
'SEARCH_FILE_OFFSET_BYTES': 3,
'SEARCH_NAME_SIZE_BYTES': 1,
'SEARCH_HELP':
"""<p class="m-noindent">Search for symbols, directories, files, pages or
modules. You can omit any prefix from the symbol or file path; adding a
<code>:</code> or <code>/</code> suffix lists all members of given symbol or
directory.</p>
<p class="m-noindent">Use <span class="m-label m-dim">↓</span>
/ <span class="m-label m-dim">↑</span> to navigate through the list,
<span class="m-label m-dim">Enter</span> to go.
<span class="m-label m-dim">Tab</span> autocompletes common prefix, you can
copy a link to the result using <span class="m-label m-dim">⌘</span>
<span class="m-label m-dim">L</span> while <span class="m-label m-dim">⌘</span>
<span class="m-label m-dim">M</span> produces a Markdown link.</p>
""",
'SEARCH_BASE_URL': None,
'SEARCH_EXTERNAL_URL': None,
'SHOW_UNDOCUMENTED': False,
'VERSION_LABELS': False
}
xref_id_rx = re.compile(r"""(.*)_1(_[a-z-0-9]+|@)$""")
slugify_nonalnum_rx = re.compile(r"""[^\w\s-]""")
slugify_hyphens_rx = re.compile(r"""[-\s]+""")
class StateCompound:
def __init__(self):
self.id: str
self.kind: str
self.name: str
self.url: str
self.brief: str
self.has_details: bool
self.deprecated: str
self.is_final: bool = None
self.children: List[str]
self.parent: str = None
class State:
def __init__(self, config):
self.basedir = ''
self.compounds: Dict[str, StateCompound] = {}
self.includes: Dict[str, str] = {}
self.search: List[Any] = []
self.examples: List[Any] = []
self.doxyfile: Dict[str, Any] = {}
self.config: Dict[str, Any] = config
self.images: List[str] = []
self.current = '' # current file being processed (for logging)
# Current kind of compound being processed. Affects current_include
# below (i.e., per-entry includes are parsed only for namespaces or
# modules, because for classes they are consistent and don't need to be
# repeated).
self.current_kind = None
# If this is None (or becomes None), it means the compound is spread
# over multiple files and thus every entry needs its own specific
# include definition
self.current_include = None
self.current_prefix = []
self.current_compound_url = None
self.current_definition_url_base = None
self.parsing_toplevel_desc = False
def slugify(text: str) -> str:
# Maybe some Unicode normalization would be nice here?
return slugify_hyphens_rx.sub('-', slugify_nonalnum_rx.sub('', text.lower()).strip())
def add_wbr(text: str) -> str:
# Stuff contains HTML code, do not touch!
if '<' in text: return text
if '::' in text: # C++ names
return text.replace('::', '::<wbr />')
elif '_' in text: # VERY_LONG_UPPER_CASE macro names
return text.replace('_', '_<wbr />')
# These characters are quite common, so at least check that there is no
# space (which may hint that the text is actually some human language):
elif '/' in text and not ' ' in text: # URLs
return text.replace('/', '/<wbr />')
else:
return text
def parse_ref(state: State, element: ET.Element, add_inline_css_class: str = None) -> str:
id = element.attrib['refid']
if element.attrib['kindref'] == 'compound':
url = id + '.html'
elif element.attrib['kindref'] == 'member':
i = id.rindex('_1')
url = id[:i] + '.html' + '#' + id[i+2:]
else: # pragma: no cover
logging.critical("{}: unknown <ref> kind {}".format(state.current, element.attrib['kindref']))
assert False
if 'external' in element.attrib:
for i in state.doxyfile['TAGFILES']:
name, _, baseurl = i.partition('=')
if os.path.basename(name) == os.path.basename(element.attrib['external']):
url = os.path.join(baseurl, url)
break
else: # pragma: no cover
logging.critical("{}: tagfile {} not specified in Doxyfile".format(state.current, element.attrib['external']))
assert False
class_ = 'm-doc-external'
else:
class_ = 'm-doc'
if add_inline_css_class: # Overrides the default set above
class_ = add_inline_css_class
return '<a href="{}" class="{}">{}</a>'.format(url, class_, add_wbr(parse_inline_desc(state, element).strip()))
def make_include(state: State, file) -> Tuple[str, str]:
if file in state.includes and state.compounds[state.includes[file]].has_details:
return (html.escape('<{}>'.format(file)), state.compounds[state.includes[file]].url)
return None
def parse_id_and_include(state: State, element: ET.Element) -> Tuple[str, str, str, Tuple[str, str], bool]:
# Returns URL base (usually saved to state.current_definition_url_base and
# used by extract_id_hash() later), base URL (with file extension), and the
# actual ID
id = element.attrib['id']
i = id.rindex('_1')
# Extract the corresponding include, if the current compound is a namespace
# or a module
include = None
has_details = False
if state.current_kind in ['namespace', 'group']:
location_attribs = element.find('location').attrib
file = location_attribs['declfile'] if 'declfile' in location_attribs else location_attribs['file']
include = make_include(state, file)
# If the include for current namespace is not yet set (empty string)
# but also not already signalled to be non-unique using None, set it to
# this value. Need to do it this way instead of using the location
# information from the compound, because namespace location is
# sometimes pointed to a *.cpp file, which Doxygen sees before *.h.
if not state.current_include and state.current_include is not None:
assert state.current_kind == 'namespace'
state.current_include = file
# parse_xml() fills compound.include from this later
# If the include differs from current compound include, reset it to
# None to signal that the compound doesn't have one unique include
# file. This will get later picked up by parse_xml() which either adds
# has_details to all compounds or wipes the compound-specific includes.
elif state.current_include and state.current_include != file:
state.current_include = None
# Extract corresponding include also for class/struct/union "relateds", if
# it's different from what the class has. This also forcibly enables
# has_details (unlike the case above, where has_details will be enabled
# only if all members don't have the same include) -- however if
# SHOW_INCLUDE_FILES isn't enabled or the file is not documented, this
# would generate useless empty detailed sections so in that case it's not
# set.
if state.current_kind in ['class', 'struct', 'union']:
location_attribs = element.find('location').attrib
file = location_attribs['declfile'] if 'declfile' in location_attribs else location_attribs['file']
if state.current_include != file:
include = make_include(state, file)
has_details = include and state.doxyfile['SHOW_INCLUDE_FILES']
return id[:i], id[:i] + '.html', id[i+2:], include, has_details
def extract_id_hash(state: State, element: ET.Element) -> str:
# Can't use parse_id() here as sections with _1 in it have it verbatim
# unescaped and mess up with rindex(). OTOH, can't use this approach in
# parse_id() because for example enums can be present in both file and
# namespace documentation, having the base_url either the file one or the
# namespace one, depending on what's documented better. Ugh. See the
# contents_section_underscore_one test for a verification.
#
# Can't use current compound URL base here, as definitions can have
# different URL base (again an enum being present in both file and
# namespace documentation). The state.current_definition_url_base usually
# comes from parse_id()[0]. See the
# contents_anchor_in_both_group_and_namespace test for a verification.
id = element.attrib['id']
assert id.startswith(state.current_definition_url_base), "ID `%s` does not start with `%s`" % (id, state.current_definition_url_base)
return id[len(state.current_definition_url_base)+2:]
and_re_src = re.compile(r'([^\s])&&([^\s])')
and_re_dst = r'\1 && \2'
def fix_type_spacing(type: str) -> str:
return and_re_src.sub(and_re_dst, type
.replace('< ', '<')
.replace(' >', '>')
.replace(' &', '&')
.replace(' *', '*'))
def parse_type(state: State, type: ET.Element) -> str:
# Constructors and typeless enums might not have it
if type is None: return None
out = html.escape(type.text) if type.text else ''
i: ET.Element
for i in type:
if i.tag == 'ref':
out += parse_ref(state, i)
elif i.tag == 'anchor':
# Anchor, used by <= 1.8.14 for deprecated/todo lists. Its base_url
# is always equal to base_url of the page. In 1.8.15 the anchor is
# in the description, making the anchor look extra awful:
# https://github.com/doxygen/doxygen/pull/6587
# TODO: this should get reverted and fixed properly so the
# one-on-one case works as it should
out += '<a name="{}"></a>'.format(extract_id_hash(state, i))
else: # pragma: no cover
logging.warning("{}: ignoring {} in <type>".format(state.current, i.tag))
if i.tail: out += html.escape(i.tail)
# Remove spacing inside <> and before & and *
return fix_type_spacing(out)
def parse_desc_internal(state: State, element: ET.Element, immediate_parent: ET.Element = None, trim = True, add_css_class = None):
out = Empty()
out.section = None
out.templates = {}
out.params = {}
out.return_value = None
out.return_values = []
out.exceptions = []
out.add_css_class = None
out.footer_navigation = False
out.example_navigation = None
out.search_keywords = []
out.search_enum_values_as_keywords = False
out.deprecated = None
out.since = None
# DOXYGEN <PARA> PATCHING 1/4
#
# In the optimistic case, when parsing the <para> element, the parsed
# content is treated as single reasonable paragraph and the caller is told
# to write both <p> and </p> enclosing tag.
#
# Unfortunately Doxygen puts some *block* elements inside a <para> element
# instead of closing it before and opening it again after. That is making
# me raging mad. Nested paragraphs are no way valid HTML and they are ugly
# and problematic in all ways you can imagine, so it's needed to be
# patched. See the long ranty comments below for more parts of the story.
out.write_paragraph_start_tag = element.tag == 'para'
out.write_paragraph_close_tag = element.tag == 'para'
out.is_reasonable_paragraph = element.tag == 'para'
out.parsed: str = ''
if element.text:
out.parsed = html.escape(element.text.strip() if trim else element.text)
# There's some inline text at the start, *do not* add any CSS class to
# the first child element
add_css_class = None
# Needed later for deciding whether we can strip the surrounding <p> from
# the content
paragraph_count = 0
has_block_elements = False
# So we are able to merge content of adjacent sections. Tuple of (tag,
# kind), set only if there is no i.tail, reset in the next iteration.
previous_section = None
# So we can peek what the previous element was. Needed by Doxygen 1.9
# code-after-blockquote discovery.
previous_element = None
# A CSS class to be added inline (not propagated outside of the paragraph)
add_inline_css_class = None
# Also, to make things even funnier, parameter and return value description
# come from inside of some paragraph and can be nested also inside lists
# and whatnot. This bubbles them up. Unfortunately they can be scattered
# around, so also merging them together.
def merge_parsed_subsections(parsed):
if parsed.templates:
out.templates.update(parsed.templates)
if parsed.params:
out.params.update(parsed.params)
if parsed.return_value:
if out.return_value:
logging.warning("{}: superfluous @return section found, ignoring: {}".format(state.current, ''.join(i.itertext()).rstrip()))
else:
out.return_value = parsed.return_value
if parsed.return_values:
out.return_values += parsed.return_values
if parsed.exceptions:
out.exceptions += parsed.exceptions
if parsed.since:
out.since = parsed.since
if parsed.deprecated:
assert not out.since
out.deprecated = parsed.deprecated
i: ET.Element
# The index gets only used in <programlisting> code vs inline detection, to
# check if there are any elements in the block element after it. All uses
# of it need to take into account the <zwj/> skipping in Doxygen 1.9
# blockquotes below.
for index, i in enumerate(element):
# As of 1.9.3 and https://github.com/doxygen/doxygen/pull/7422, a
# stupid ‍ is added at the front of every Markdown blockquote for
# some silly reason, and then the Markdown is processed as a HTML,
# resulting in <blockquote><para><zwj/>. Drop the <zwj/> from there, as
# it's useless and messes up with our <para> patching logic.
if index == 0 and i.tag == 'zwj' and element.tag == 'para' and immediate_parent and immediate_parent.tag == 'blockquote':
if i.tail:
tail: str = html.escape(i.tail)
if trim:
tail = tail.strip()
out.parsed += tail
continue
# State used later
code_block = None
formula_block = None
# A section was left open, but there's nothing to continue it, close
# it. Expect that there was nothing after that would mess with us.
# Don't reset it back to None just yet, as inline/block code
# autodetection needs it.
if previous_section and (i.tag != 'simplesect' or i.attrib['kind'] == 'return'):
assert not out.write_paragraph_close_tag
out.parsed = out.parsed.rstrip() + '</aside>'
# Decide if a formula / code snippet is a block or not
# <formula> can be both, depending on what's inside
if i.tag == 'formula':
if i.text.startswith('$') and i.text.endswith('$'):
formula_block = False
else:
formula_block = True
# <programlisting> is autodetected to be either block or inline
elif i.tag == 'programlisting':
# In a blockquote we need to not count the initial <zwj/> added by
# Doxygen 1.9. Otherwise all code blocks alone in a blockquote
# would be treated as inline.
if element.tag == 'para' and immediate_parent and immediate_parent.tag == 'blockquote':
element_children_count = 0
for listing_index, listing in enumerate(element):
if listing_index == 0 and listing.tag == 'zwj': continue
element_children_count += 1
else:
element_children_count = len([listing for listing in element])
# If it seems to be a standalone code paragraph, don't wrap it
# in <p> and use <pre>:
if (
# It's either alone in the paragraph, with no text or other
# elements around, or
((not element.text or not element.text.strip()) and (not i.tail or not i.tail.strip()) and element_children_count == 1) or
# is a code snippet, i.e. filename instead of just .ext
# (Doxygen unfortunately doesn't put @snippet in its own
# paragraph even if it's separated by blank lines. It does
# so for @include and related, though.)
('filename' in i.attrib and not i.attrib['filename'].startswith('.')) or
# or is
# - code right after a note/attention/... section
# - or in Doxygen 1.9 code right after a blockquote, which is
# no longer wrapped into its own <para>,
# there's no text after and it's the last thing in the
# paragraph (Doxygen ALSO doesn't separate end of a section
# and begin of a code block by a paragraph even if there is
# a blank line. But it does so for xrefitems such as @todo.
# I don't even.)
((previous_section or (previous_element and previous_element.tag == 'blockquote')) and (not i.tail or not i.tail.strip()) and index + 1 == element_children_count)
):
code_block = True
# Looks like inline code, but has multiple code lines, so it's
# suspicious. Use code block, but warn.
elif len([codeline for codeline in i]) > 1:
code_block = True
logging.warning("{}: inline code has multiple lines, fallback to a code block".format(state.current))
# Otherwise wrap it in <p> and use <code>
else:
code_block = False
# DOXYGEN <PARA> PATCHING 2/4
#
# Upon encountering a block element nested in <para>, we need to act.
# If there was any content before, we close the paragraph. If there
# wasn't, we tell the caller to not even open the paragraph. After
# processing the following tag, there probably won't be any paragraph
# open, so we also tell the caller that there's no need to close
# anything (but it's not that simple, see for more patching at the end
# of the cycle iteration).
#
# Those elements are:
# - <heading>
# - <blockquote>
# - <hruler>
# - <simplesect> (if not describing return type) and <xrefsect>
# - <verbatim>, <preformatted> (those are the same thing!)
# - <parblock> (a weird grouping thing that we abuse for <div>s)
# - <variablelist>, <itemizedlist>, <orderedlist>
# - <image>, <dot>, <dotfile>, <table>
# - <mcss:div>
# - <formula> (if block)
# - <programlisting> (if block)
# - <htmlonly> (if block, which is ATM always)
#
# <parameterlist> and <simplesect kind="return"> are extracted out of
# the text flow, so these are removed from this check.
#
# In addition, there's special handling to achieve things like this:
# <ul>
# <li>A paragraph
# <ul>
# <li>A nested list item</li>
# </ul>
# </li>
# I.e., not wrapping "A paragraph" in a <p>, but only if it's
# immediately followed by another and it's the first paragraph in a
# list item. We check that using the immediate_parent variable.
if element.tag == 'para':
end_previous_paragraph = False
# Straightforward elements
if i.tag in ['heading', 'blockquote', 'hruler', 'xrefsect', 'variablelist', 'verbatim', 'parblock', 'preformatted', 'itemizedlist', 'orderedlist', 'image', 'dot', 'dotfile', 'table', '{http://mcss.mosra.cz/doxygen/}div', 'htmlonly']:
end_previous_paragraph = True
# <simplesect> describing return type is cut out of text flow, so
# it doesn't contribute
elif i.tag == 'simplesect' and i.attrib['kind'] != 'return':
end_previous_paragraph = True
# <formula> can be both, depending on what's inside
elif i.tag == 'formula':
assert formula_block is not None
end_previous_paragraph = formula_block
# <programlisting> is autodetected to be either block or inline
elif i.tag == 'programlisting':
assert code_block is not None
end_previous_paragraph = code_block
if end_previous_paragraph:
out.is_reasonable_paragraph = False
out.parsed = out.parsed.rstrip()
if not out.parsed:
out.write_paragraph_start_tag = False
elif immediate_parent and immediate_parent.tag == 'listitem' and i.tag in ['itemizedlist', 'orderedlist']:
out.write_paragraph_start_tag = False
elif out.write_paragraph_close_tag:
out.parsed += '</p>'
out.write_paragraph_close_tag = False
# There might be *inline* elements that need to start a *new*
# paragraph, on the other hand. OF COURSE DOXYGEN DOESN'T DO THAT
# EITHER. There's a similar block of code that handles case with
# non-empty i.tail() at the end of the loop iteration.
if not out.write_paragraph_close_tag and (i.tag in ['linebreak', 'anchor', 'computeroutput', 'emphasis', 'bold', 'ref', 'ulink'] or (i.tag == 'formula' and not formula_block) or (i.tag == 'programlisting' and not code_block)):
# Assume sanity -- we are *either* closing a paragraph because
# a new block element appeared after inline stuff *or* opening
# a paragraph because there's inline text after a block
# element and that is mutually exclusive.
assert not end_previous_paragraph
out.parsed += '<p>'
out.write_paragraph_close_tag = True
# Block elements
if i.tag in ['sect1', 'sect2', 'sect3', 'sect4']:
assert element.tag != 'para' # should be top-level block element
has_block_elements = True
parsed = parse_desc_internal(state, i)
# Render as <section> in toplevel desc
if state.parsing_toplevel_desc:
assert parsed.section
if parsed.templates or parsed.params or parsed.return_value or parsed.return_values or parsed.exceptions:
logging.warning("{}: unexpected @tparam / @param / @return / @retval / @exception found inside a @section, ignoring".format(state.current))
# Top-level section has no ID or title
if not out.section: out.section = ('', '', [])
out.section = (out.section[0], out.section[1], out.section[2] + [parsed.section])
out.parsed += '<section id="{}">{}</section>'.format(extract_id_hash(state, i), parsed.parsed)
# Render directly the contents otherwise, propagate parsed stuff up
else:
merge_parsed_subsections(parsed)
out.parsed += parsed.parsed
if parsed.search_keywords:
out.search_keywords += parsed.search_keywords
elif i.tag == 'title':
assert element.tag != 'para' # should be top-level block element
has_block_elements = True
# Top-level description
if state.parsing_toplevel_desc:
if element.tag == 'sect1':
tag = 'h2'
elif element.tag == 'sect2':
tag = 'h3'
elif element.tag == 'sect3':
tag = 'h4'
elif element.tag == 'sect4':
tag = 'h5'
elif not element.tag == 'simplesect': # pragma: no cover
assert False
# Function/enum/... descriptions are inside <h3> for function
# header, which is inside <h2> for detailed definition section, so
# it needs to be <h4> and below
else:
if element.tag == 'sect1':
tag = 'h4'
elif element.tag == 'sect2':
tag = 'h5'
elif element.tag == 'sect3':
tag = 'h6'
elif element.tag == 'sect4':
tag = 'h6'
logging.warning("{}: more than three levels of sections in member descriptions are not supported, stopping at <h6>".format(state.current))
elif not element.tag == 'simplesect': # pragma: no cover
assert False
# simplesect titles are handled directly inside simplesect
if not element.tag == 'simplesect':
id = extract_id_hash(state, element)
title = html.escape(i.text)
# Populate section info for top-level desc
if state.parsing_toplevel_desc:
assert not out.section
out.section = (id, title, [])
out.parsed += '<{0}><a href="#{1}">{2}</a></{0}>'.format(tag, id, title)
# Otherwise add the ID directly to the heading
else:
out.parsed += '<{0} id="{1}">{2}</{0}>'.format(tag, id, title)
# Apparently, in 1.8.18, <heading> is used for Markdown headers only if
# we run out of sect1-4 tags. Eh, what the hell.
elif i.tag == 'heading':
assert element.tag == 'para' # is inside a paragraph :/
has_block_elements = True
# Do not print anything if there are no contents
if not i.text:
logging.warning("{}: a Markdown heading underline was apparently misparsed by Doxygen, prefix the headings with # instead".format(state.current))
else:
h_tag_level = int(i.attrib['level'])
assert h_tag_level > 0
# Top-level description can have 5 levels (first level used for
# page title), so it needs to be <h2> and below
if state.parsing_toplevel_desc:
h_tag_level += 1
if h_tag_level > 6:
h_tag_level = 6
logging.warning("{}: more than five levels of Markdown headings for top-level docs are not supported, stopping at <h6>".format(state.current))
# Function/enum/... descriptions are inside <h3> for function
# header, which is inside <h2> for detailed definition section,
# so it needs to be <h4> and below
else:
h_tag_level += 3
if h_tag_level > 6:
h_tag_level = 6
logging.warning("{}: more than three levels of Markdown headings in member descriptions are not supported, stopping at <h6>".format(state.current))
out.parsed += '<h{0}>{1}</h{0}>'.format(h_tag_level, html.escape(i.text))
elif i.tag == 'parblock':
assert element.tag in ['para', '{http://mcss.mosra.cz/doxygen/}div']
has_block_elements = True
out.parsed += '<div{}>{}</div>'.format(
' class="{}"'.format(add_css_class) if add_css_class else '',
parse_desc(state, i))
elif i.tag == 'para':
assert element.tag != 'para' # should be top-level block element
paragraph_count += 1
# DOXYGEN <PARA> PATCHING 3/4
#
# Parse contents of the paragraph, don't trim whitespace around
# nested elements but trim it at the begin and end of the paragraph
# itself. Also, some paragraphs are actually block content and we
# might not want to write the start/closing tag.
#
# There's also the patching of nested lists that results in the
# immediate_parent variable in the section 2/4 -- we pass the
# parent only if this is the first paragraph inside it.
parsed = parse_desc_internal(state, i,
immediate_parent=element if paragraph_count == 1 and not has_block_elements else None,
trim=False,
add_css_class=add_css_class)
parsed.parsed = parsed.parsed.strip()
if not parsed.is_reasonable_paragraph:
has_block_elements = True
if parsed.parsed:
if parsed.write_paragraph_start_tag:
# If there is some inline content at the beginning, assume
# the CSS class was meant to be added to the paragraph
# itself, not into a nested (block) element.
out.parsed += '<p{}>'.format(' class="{}"'.format(add_css_class) if add_css_class else '')
out.parsed += parsed.parsed
if parsed.write_paragraph_close_tag: out.parsed += '</p>'
# Paragraphs can have nested parameter / return value / ...
# descriptions, merge them to current state
merge_parsed_subsections(parsed)
# The same is (of course) with bubbling up the <mcss:class>
# element. Reset the current value with the value coming from
# inside -- it's either reset back to None or scheduled to be used
# in the next iteration. In order to make this work, the resetting
# code at the end of the loop iteration resets it to None only if
# this is not a paragraph or the <mcss:class> element -- so we are
# resetting here explicitly.
add_css_class = parsed.add_css_class
# Bubble up also footer / example navigation, search keywords,
# deprecation flag, since badges
if parsed.footer_navigation: out.footer_navigation = True
if parsed.example_navigation: out.example_navigation = parsed.example_navigation
out.search_keywords += parsed.search_keywords
if parsed.search_enum_values_as_keywords: out.search_enum_values_as_keywords = True
# Assert we didn't miss anything important
assert not parsed.section
elif i.tag == 'blockquote':
assert element.tag in ['para', '{http://mcss.mosra.cz/doxygen/}div']
has_block_elements = True
out.parsed += '<blockquote>{}</blockquote>'.format(parse_desc(state, i))
elif i.tag in ['itemizedlist', 'orderedlist']:
assert element.tag in ['para', '{http://mcss.mosra.cz/doxygen/}div']
has_block_elements = True
tag = 'ul' if i.tag == 'itemizedlist' else 'ol'
out.parsed += '<{}{}>'.format(tag,
' class="{}"'.format(add_css_class) if add_css_class else '')
for li in i:
assert li.tag == 'listitem'
parsed = parse_desc_internal(state, li)
out.parsed += '<li>{}</li>'.format(parsed.parsed)
# Lists can have nested parameter / return value / ...
# descriptions, bubble them up. THIS IS FUCKEN UNBELIEVABLE.
merge_parsed_subsections(parsed)
# Bubble up keywords as well
if parsed.search_keywords:
out.search_keywords += parsed.search_keywords
out.parsed += '</{}>'.format(tag)
elif i.tag == 'table':
assert element.tag in ['para', '{http://mcss.mosra.cz/doxygen/}div']
has_block_elements = True
out.parsed += '<table class="m-table{}">'.format(
' ' + add_css_class if add_css_class else '')
thead_written = False
inside_tbody = False
row: ET.Element
for row in i:
if row.tag == 'caption':
out.parsed += '<caption>{}</caption>'.format(parse_inline_desc(state, row))
if row.tag == 'row':
is_header_row = True
row_data = ''
for entry in row:
assert entry.tag == 'entry'
is_header = entry.attrib['thead'] == 'yes'
is_header_row = is_header_row and is_header
rowspan = ' rowspan="{}"'.format(entry.attrib['rowspan']) if 'rowspan' in entry.attrib else ''
colspan = ' colspan="{}"'.format(entry.attrib['colspan']) if 'colspan' in entry.attrib else ''
classes = ' class="{}"'.format(entry.attrib['class']) if 'class' in entry.attrib else ''
row_data += '<{0}{2}{3}{4}>{1}</{0}>'.format(
'th' if is_header else 'td',
parse_desc(state, entry),
rowspan, colspan, classes)
# Table head is opened upon encountering first header row
# and closed upon encountering first body row (in case it was
# ever opened). Encountering header row inside body again will
# not do anything special.
if is_header_row:
if not thead_written:
out.parsed += '<thead>'
thead_written = True
else:
if thead_written and not inside_tbody:
out.parsed += '</thead>'
if not inside_tbody:
out.parsed += '<tbody>'
inside_tbody = True
out.parsed += '<tr>{}</tr>'.format(row_data)
if inside_tbody: out.parsed += '</tbody>'
out.parsed += '</table>'
elif i.tag == 'simplesect':
assert element.tag == 'para' # is inside a paragraph :/
# Return value is separated from the text flow
if i.attrib['kind'] == 'return':
if out.return_value:
logging.warning("{}: superfluous @return section found, ignoring: {}".format(state.current, ''.join(i.itertext()).rstrip()))
else:
out.return_value = parse_desc(state, i)
# Content of @since tags is put as-is into entry description /
# details, if enabled.
elif i.attrib['kind'] == 'since' and state.config['VERSION_LABELS']:
since = parse_inline_desc(state, i).strip()
assert since.startswith('<p>') and since.endswith('</p>')
out.since = since[3:-4]
else:
has_block_elements = True
# There was a section open, but it differs from this one, close
# it
if previous_section and ((i.attrib['kind'] != 'par' and previous_section != i.attrib['kind']) or (i.attrib['kind'] == 'par' and i.find('title').text)):
out.parsed = out.parsed.rstrip() + '</aside>'
# Not continuing with a section from before, put a header in
if not previous_section or (i.attrib['kind'] != 'par' and previous_section != i.attrib['kind']) or (i.attrib['kind'] == 'par' and i.find('title').text):
# TODO: make it possible to override the class using @m_class,
# document this and document behavior of @par
if i.attrib['kind'] == 'see':
title = 'See also'
css_class = 'm-default'
elif i.attrib['kind'] == 'note':
title = 'Note'
css_class = 'm-info'
elif i.attrib['kind'] == 'attention':
title = 'Attention'
css_class = 'm-warning'
elif i.attrib['kind'] == 'warning':
title = 'Warning'
css_class = 'm-danger'
elif i.attrib['kind'] == 'author':
title = 'Author'
css_class = 'm-default'
elif i.attrib['kind'] == 'authors':
title = 'Authors'
css_class = 'm-default'
elif i.attrib['kind'] == 'copyright':
title = 'Copyright'
css_class = 'm-default'
elif i.attrib['kind'] == 'version':
title = 'Version'
css_class = 'm-default'
elif i.attrib['kind'] == 'since':
title = 'Since'
css_class = 'm-default'
elif i.attrib['kind'] == 'date':
title = 'Date'
css_class = 'm-default'
elif i.attrib['kind'] == 'pre':
title = 'Precondition'
css_class = 'm-success'
elif i.attrib['kind'] == 'post':
title = 'Postcondition'
css_class = 'm-success'
elif i.attrib['kind'] == 'invariant':
title = 'Invariant'
css_class = 'm-success'
elif i.attrib['kind'] == 'remark':
title = 'Remark'
css_class = 'm-default'
elif i.attrib['kind'] == 'par':
title = html.escape(i.findtext('title', ''))
css_class = 'm-default'
elif i.attrib['kind'] == 'rcs':
title = html.escape(i.findtext('title', ''))
css_class = 'm-default'
else: # pragma: no cover
title = ''
css_class = ''
logging.warning("{}: ignoring {} kind of <simplesect>".format(state.current, i.attrib['kind']))
if add_css_class:
css_class = add_css_class
heading = 'h3'
else:
css_class = 'm-note ' + css_class
heading = 'h4'
if title:
out.parsed += '<aside class="{css_class}"><{heading}>{title}</{heading}>'.format(
css_class=css_class,
heading=heading,
title=title)
else:
out.parsed += '<aside class="{}">'.format(css_class)
# Parse the section contents and bubble important stuff up
parsed, search_keywords, search_enum_values_as_keywords = parse_desc_keywords(state, i)
out.parsed += parsed
if search_keywords:
out.search_keywords += search_keywords
if search_enum_values_as_keywords:
out.search_enum_values_as_keywords = True
# There's something after, close it
if i.tail and i.tail.strip():
out.parsed += '</aside>'
previous_section = None
# Otherwise put the responsibility on the next iteration, maybe
# there are more paragraphs that should be merged
else:
previous_section = i.attrib['kind']
elif i.tag == 'xrefsect':
assert element.tag == 'para' # is inside a paragraph :/
has_block_elements = True
# Not merging these, as every has usually a different ID each. (And
# apparently Doxygen is able to merge them *but only if* they
# describe some symbol, not on a page.)
id = i.attrib['id']
match = xref_id_rx.match(id)
file = match.group(1)
title = i.find('xreftitle').text
if add_css_class:
css_class = add_css_class
heading = 'h3'
else:
heading = 'h4'
css_class = 'm-note '
# If we have version info from a previous Since badge, use it
# instead of the title
if file.startswith('deprecated'):
css_class += 'm-danger'
if out.since:
out.deprecated = out.since
title = out.since.capitalize()
out.since = None
else:
out.deprecated = 'deprecated'
title = 'Deprecated'
elif file.startswith('bug'):
css_class += 'm-danger'
elif file.startswith('todo'):
css_class += 'm-dim'
else:
css_class += 'm-default'
out.parsed += '<aside class="{css_class}"><{heading}><a href="{file}.html#{anchor}" class="m-doc">{title}</a></{heading}>{description}</aside>'.format(
css_class=css_class,
heading=heading,
file=file,
anchor=match.group(2),
title=title,
description=parse_desc(state, i.find('xrefdescription')))
elif i.tag == 'parameterlist':