diff --git a/.gitignore b/.gitignore index 159d1d7..850a9e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ __pycache__/ .DS_store +.eggs/ +.idea/ .ipynb_checkpoints/ dataframe_image.egg-info/ dist/ @@ -13,4 +15,5 @@ __init__.pyc /tests/**/*.png /tests/**/*.md /tests/**/*.gif +/tests/**/*.html tests/notebooks/Short_dataframe_image.ipynb diff --git a/dataframe_image/_matplotlib_table.py b/dataframe_image/_matplotlib_table.py index e65ba50..6323b52 100644 --- a/dataframe_image/_matplotlib_table.py +++ b/dataframe_image/_matplotlib_table.py @@ -2,6 +2,7 @@ import io import textwrap +import cssutils import numpy as np import pandas as pd from bs4 import BeautifulSoup @@ -52,7 +53,7 @@ def parse_html(self, html): cur_col_loc = 0 for _ in range(val[-1]): cur_col_loc += 1 - new_row.append(val[:3]) + new_row.append(val[:5]) if val[-2] == 1: del rowspan[col_loc] col_loc += cur_col_loc @@ -62,7 +63,7 @@ def parse_html(self, html): rowspan[col_loc] = val col_loc += val[-1] # usually 1 for _ in range(val[-1]): - new_row.append(val[:3]) + new_row.append(val[:5]) j += 1 new_rows.append(new_row) return new_rows, num_header_rows @@ -77,6 +78,14 @@ def get_text_align(self, element): return val def parse_into_rows(self, html): + + def get_property(class_name, property_name): + for rule in sheet: + selectors = rule.selectorText.replace(" ", "").split(",") + if class_name in selectors: + for style_property in rule.style: + if style_property.name == property_name: + return style_property.value def parse_row(row): values = [] rowspan_dict = {} @@ -88,10 +97,14 @@ def parse_row(row): rowspan = int(el.attrs.get("rowspan", 1)) text_align = self.get_text_align(el) or row_align text = el.get_text() - values.append([text, bold, text_align, rowspan, colspan]) + if "id" in el.attrs: + values.append([text, bold, text_align, get_property("#" + el.attrs["id"], "background-color"), get_property("#" + el.attrs["id"], "color"), rowspan, colspan]) + else: + values.append([text, bold, text_align, "#ffffff", "#000000", rowspan, colspan]) return values soup = BeautifulSoup(html, features="lxml") + sheet = cssutils.parseString(soup.find('style').text) # get number of columns from first row # num_cols = sum(int(el.get('colspan', 1)) for el in soup.find('tr').find_all(['td', 'th'])) thead = soup.find("thead") @@ -215,6 +228,8 @@ def print_table(self): text = val[0] weight = "bold" if val[1] else None ha = val[2] or header_text_align[j] or "right" + fg = val[4] if val[4] else "#000000" + bg = val[3] if val[3] else "#ffffff" if ha == "right": x += xd @@ -228,6 +243,8 @@ def print_table(self): ha=ha, va="center", weight=weight, + color=fg, + backgroundcolor=bg ) if ha == "left": x += xd diff --git a/setup.py b/setup.py index 9e34b0e..8d6abbd 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ "pillow", "packaging", "mistune", + "lxml", "beautifulsoup4", "cssutils", ], diff --git a/tests/test_df_image.py b/tests/test_df_image.py index 0c07f57..ad4e3bb 100644 --- a/tests/test_df_image.py +++ b/tests/test_df_image.py @@ -8,8 +8,9 @@ "tests/notebooks/data/covid19.csv", parse_dates=["date"], index_col="date" ) -test_dpi_values = [50, 200, 400] -converters = ["chrome", "selenium", "matplotlib", "html2image"] +test_dpi_values = [50, 100, 200, 400] +converters = ["chrome", "selenium", "matplotlib"] + class TestImage: @@ -22,6 +23,7 @@ def test_df(self, converter, dpi): dpi=dpi, ) + @pytest.mark.parametrize("dpi", test_dpi_values) @pytest.mark.parametrize("converter", converters) def test_styled(self, converter, dpi):