Skip to content

Commit 3bf0b52

Browse files
authored
Add a new configuration option to control tabler header row inference (#161)
Add option to infer first table row as table header (defaults to false)
1 parent 1783995 commit 3bf0b52

File tree

4 files changed

+81
-12
lines changed

4 files changed

+81
-12
lines changed

Diff for: README.rst

+5
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ keep_inline_images_in
139139
that should be allowed to contain inline images, for example ``['td']``.
140140
Defaults to an empty list.
141141

142+
table_infer_header
143+
Controls handling of tables with no header row (as indicated by ``<thead>``
144+
or ``<th>``). When set to ``True``, the first body row is used as the header row.
145+
Defaults to ``False``, which leaves the header row empty.
146+
142147
wrap, wrap_width
143148
If ``wrap`` is set to ``True``, all text paragraphs are wrapped at
144149
``wrap_width`` characters. Defaults to ``False`` and ``80``.

Diff for: markdownify/__init__.py

+24-9
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class DefaultOptions:
102102
strong_em_symbol = ASTERISK
103103
sub_symbol = ''
104104
sup_symbol = ''
105+
table_infer_header = False
105106
wrap = False
106107
wrap_width = 80
107108

@@ -518,27 +519,41 @@ def convert_tr(self, el, text, convert_as_inline):
518519
cells = el.find_all(['td', 'th'])
519520
is_headrow = (
520521
all([cell.name == 'th' for cell in cells])
521-
or (not el.previous_sibling and not el.parent.name == 'tbody')
522+
or (el.parent.name == 'thead'
523+
# avoid multiple tr in thead
524+
and len(el.parent.find_all('tr')) == 1)
525+
)
526+
is_head_row_missing = (
527+
(not el.previous_sibling and not el.parent.name == 'tbody')
522528
or (not el.previous_sibling and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
523529
)
524530
overline = ''
525531
underline = ''
526-
if is_headrow and not el.previous_sibling:
527-
# first row and is headline: print headline underline
532+
if ((is_headrow
533+
or (is_head_row_missing
534+
and self.options['table_infer_header']))
535+
and not el.previous_sibling):
536+
# first row and:
537+
# - is headline or
538+
# - headline is missing and header inference is enabled
539+
# print headline underline
528540
full_colspan = 0
529541
for cell in cells:
530542
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
531543
full_colspan += int(cell["colspan"])
532544
else:
533545
full_colspan += 1
534546
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
535-
elif (not el.previous_sibling
536-
and (el.parent.name == 'table'
537-
or (el.parent.name == 'tbody'
538-
and not el.parent.previous_sibling))):
547+
elif ((is_head_row_missing
548+
and not self.options['table_infer_header'])
549+
or (not el.previous_sibling
550+
and (el.parent.name == 'table'
551+
or (el.parent.name == 'tbody'
552+
and not el.parent.previous_sibling)))):
553+
# headline is missing and header inference is disabled or:
539554
# first row, not headline, and:
540-
# - the parent is table or
541-
# - the parent is tbody at the beginning of a table.
555+
# - the parent is table or
556+
# - the parent is tbody at the beginning of a table.
542557
# print empty headline above this row
543558
overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
544559
overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'

Diff for: markdownify/main.py

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ def main(argv=sys.argv[1:]):
6161
"should be converted to markdown images instead, this option can "
6262
"be set to a list of parent tags that should be allowed to "
6363
"contain inline images.")
64+
parser.add_argument('--table-infer-header', dest='table_infer_header',
65+
action='store_true',
66+
help="When a table has no header row (as indicated by '<thead>' "
67+
"or '<th>'), use the first body row as the header row.")
6468
parser.add_argument('-w', '--wrap', action='store_true',
6569
help="Wrap all text paragraphs at --wrap-width characters.")
6670
parser.add_argument('--wrap-width', type=int, default=80)

Diff for: tests/test_tables.py

+48-3
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,33 @@
141141
</tbody>
142142
</table>"""
143143

144+
table_head_body_multiple_head = """<table>
145+
<thead>
146+
<tr>
147+
<td>Creator</td>
148+
<td>Editor</td>
149+
<td>Server</td>
150+
</tr>
151+
<tr>
152+
<td>Operator</td>
153+
<td>Manager</td>
154+
<td>Engineer</td>
155+
</tr>
156+
</thead>
157+
<tbody>
158+
<tr>
159+
<td>Bob</td>
160+
<td>Oliver</td>
161+
<td>Tom</td>
162+
</tr>
163+
<tr>
164+
<td>Thomas</td>
165+
<td>Lucas</td>
166+
<td>Ethan</td>
167+
</tr>
168+
</tbody>
169+
</table>"""
170+
144171
table_missing_text = """<table>
145172
<thead>
146173
<tr>
@@ -245,10 +272,28 @@ def test_table():
245272
assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
246273
assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
247274
assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
275+
assert md(table_head_body_multiple_head) == '\n\n| | | |\n| --- | --- | --- |\n| Creator | Editor | Server |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
248276
assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
249277
assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
250-
assert md(table_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
251-
assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
252-
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
278+
assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
279+
assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
280+
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
253281
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
254282
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
283+
284+
285+
def test_table_infer_header():
286+
assert md(table, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
287+
assert md(table_with_html_content, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
288+
assert md(table_with_paragraphs, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
289+
assert md(table_with_linebreaks, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
290+
assert md(table_with_header_column, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
291+
assert md(table_head_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
292+
assert md(table_head_body_multiple_head, table_infer_header=True) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
293+
assert md(table_head_body_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
294+
assert md(table_missing_text, table_infer_header=True) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
295+
assert md(table_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
296+
assert md(table_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
297+
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
298+
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
299+
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'

0 commit comments

Comments
 (0)