Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP | Fix handling for table_style to prevent repeated re-splitting #1

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions html4docx/h4d.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@ def __init__(self):
]
self.table_style = DEFAULT_TABLE_STYLE

def set_table_style(self, new_table_style: str) -> None:
"""Set table style for all tables in the final DOCX file. The full list of style options can be found at
https://python-docx.readthedocs.io/en/latest/user/styles-understanding.html#table-styles-in-default-template

Args:
new_table_style (str): DOCX-supported table style string, i.e. "Colorful Grid Accent 2".

Raises:
ValueError: Thrown if input string does not contain spaces but fails the regex for splitting by capitals.
"""
# If the input table style string does not contain spaces, i.e. "TableGrid", split with spaces by capitals.
if " " not in new_table_style:
try:
# Fixed 'style lookup by style_id is deprecated.'
# https://stackoverflow.com/a/29567907/17274446
self.table_style = ' '.join(re.findall(r'[A-Z][^A-Z]*', new_table_style))
return
except KeyError as key_error:
raise ValueError(f"Unable to apply style {self.table_style}.") from key_error
# Otherwise, save without reformatting input.
self.table_style = new_table_style

def set_initial_attrs(self, document=None):
self.tags = {
'span': [],
Expand Down Expand Up @@ -330,12 +352,7 @@ def handle_table(self, current_attrs):
self.table = self.doc.add_table(rows, cols)

if self.table_style:
try:
# Fixed 'style lookup by style_id is deprecated.'
# https://stackoverflow.com/a/29567907/17274446
self.table_style = ' '.join(re.findall(r'[A-Z][^A-Z]*', self.table_style))
except KeyError as e:
raise ValueError(f"Unable to apply style {self.table_style}.") from e
self.table.style = self.table_style

rows = self.get_table_rows(table_soup)
cell_row = 0
Expand Down
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@
install_requires = [
'python-docx>=1.1.0',
'beautifulsoup4>=4.12.2'
],
tests_require = [
'pytest>=8.1.1',
'pytest-cov>=4.1.0'
]
)
Empty file added tests/unittests/__init__.py
Empty file.
44 changes: 44 additions & 0 deletions tests/unittests/test_h4d.py
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @kdipippo! Thanks for contribute with this project! Could you explain why create a separated unitttest file instead add this test to tests/test.py? There you don't need to setup and clean because there is already methods to do this for you.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sweet! Thanks for the comment! I'll rework to use test.py. My bad, I missed that this repo has unit testing set up.

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Unit test suite that covers html4docx/h4d.py."""

import os
import pathlib
from html4docx.h4d import HtmlToDocx
from docx import Document


def test_h4d_set_table_style() -> None:
"""Test that covers set_table_style() in h4d.py."""
test_docx_filename = "test_h4d_set_table_style.docx"

with open(f"{str(pathlib.Path(__file__).parent.resolve())}/heyo.html", "r", encoding="utf-8") as heyo_html:
test_html_string = heyo_html.read()

# Test where table_style and table.style are both None.
parser = HtmlToDocx()
document = Document()
parser.add_html_to_document(test_html_string, document)
document.save(test_docx_filename)
assert not parser.table_style
assert hasattr(parser, "table")
assert not parser.table

# Test where table_style is set to valid table Style without spaces but parsed to correct syntax.
parser = HtmlToDocx()
parser.set_table_style("TableGrid")
assert parser.table_style == "Table Grid"
document = Document()
parser.add_html_to_document(test_html_string, document)
document.save(test_docx_filename)
assert parser.table_style == "Table Grid"

# Test where table_style is set to valid table Style with spaces.
parser = HtmlToDocx()
parser.set_table_style("Colorful Grid Accent 2")
assert parser.table_style == "Colorful Grid Accent 2"
document = Document()
parser.add_html_to_document(test_html_string, document)
document.save(test_docx_filename)
assert parser.table_style == "Colorful Grid Accent 2"

# Cleanup test file.
os.remove(test_docx_filename)
Loading