diff --git a/.github/workflows/Healthcheck tests.yml b/.github/workflows/Healthcheck tests.yml index 025fbca2..908e4f4e 100644 --- a/.github/workflows/Healthcheck tests.yml +++ b/.github/workflows/Healthcheck tests.yml @@ -31,15 +31,13 @@ jobs: run: | pip install --upgrade pip pip install -r requirements.txt - pip install pytest + pip install -r tests/test-requirements.txt pip install papermill - pip install XlsxWriter - pip install ipython - pip install ipykernel pip install itables pip install jinja2 ipython kernel install --name "python3" --user sudo apt-get install -y pandoc + - name: Install pdstools run: pip install . diff --git a/.github/workflows/Python tests.yml b/.github/workflows/Python tests.yml index 81eac327..4cc3ce1d 100644 --- a/.github/workflows/Python tests.yml +++ b/.github/workflows/Python tests.yml @@ -64,12 +64,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r python/requirements.txt - pip install pytest-cov - pip install scipy - pip install testbook - pip install XlsxWriter - pip install ipython - pip install ipykernel + pip install -r python/tests/test-requirements.txt ipython kernel install --name "python3" --user - name: Run tests diff --git a/python/pdstools/README.md b/python/pdstools/README.md deleted file mode 100644 index 2ca814bf..00000000 --- a/python/pdstools/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Python pdstools - -This top-level folder contains all of the subdirectories containing the library. \ No newline at end of file diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py index d8300d60..487906e6 100644 --- a/python/pdstools/adm/ADMDatamart.py +++ b/python/pdstools/adm/ADMDatamart.py @@ -117,7 +117,7 @@ def __init__( drop_cols: Optional[list] = None, include_cols: Optional[list] = None, context_keys: list = ["Channel", "Direction", "Issue", "Group"], - extract_keys: bool = False, # TODO: should be True by default, extract should be efficiently using Configuration + extract_keys: bool = False, # TODO: should be True by default, extract should be efficiently using Configuration predictorCategorization: pl.Expr = cdh_utils.defaultPredictorCategorization, plotting_engine: Union[str, Any] = "plotly", verbose: bool = False, @@ -397,13 +397,13 @@ def _import_utils( try: df = self._apply_query(df, self.query) if self.verbose: - print(f"Query succesful for {name}.") + print(f"Query successful for {name}.") except pl.ColumnNotFoundError: if self.verbose: print( - f"""Query unsuccesful for {name}. + f"""Query unsuccessful for {name}. Maybe the filter you selected only applies to either model data or predictor data - and thus can't be succesful for the other one. That should be fine + and thus can't be successful for the other one. That should be fine as the other table is likely queried correctly.""" ) return df, cols, missing @@ -1421,7 +1421,9 @@ def run_bash_command(bashCommand, working_dir, **kwargs): yaml.dump(params, f) bashCommand = f"quarto render {healthcheck_file} --to {output_type} --output {output_filename} --execute-params params.yaml" + run_bash_command(bashCommand, working_dir, **kwargs) + check_output_file(working_dir, output_filename, verbose, delete_temp_files) if delete_temp_files: _delete_temp_files(working_dir, files) diff --git a/python/pdstools/adm/ADMTrees.py b/python/pdstools/adm/ADMTrees.py index c0fef647..f0cce425 100644 --- a/python/pdstools/adm/ADMTrees.py +++ b/python/pdstools/adm/ADMTrees.py @@ -127,27 +127,27 @@ def __init__(self, file: str, **kwargs): logging.info("Reading model...") self._read_model(file, **kwargs) if self.trees is None: - raise ValueError("Import unsuccesful.") + raise ValueError("Import unsuccessful.") def _read_model(self, file, **kwargs): def _import(file): logging.info("Trying regular import.") with open(file) as f: file = json.load(f) - logging.info("Regular import succesful.") + logging.info("Regular import successful.") return file def read_url(file): logging.info("Trying to read from URL.") file = urllib.request.urlopen(file).read() - logging.info("Import from URL succesful.") + logging.info("Import from URL successful.") return file def decode_string(file): logging.info("Trying to decompress the string.") file = zlib.decompress(base64.b64decode(file)) - logging.info("Decompressing string succesful.") + logging.info("Decompressing string successful.") return file decode = kwargs.pop("decode", False) diff --git a/python/pdstools/adm/README.md b/python/pdstools/adm/README.md deleted file mode 100644 index 50c44255..00000000 --- a/python/pdstools/adm/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# ADM - -This subdirectory contains all analyses using the ADM Datamart. Currently, that is the ADMDatamart class and the ADMTrees. - -The ADMDatamart class in particular also uses the plots in the `plots` folder. - -Both use utilities in the `utils` folder. \ No newline at end of file diff --git a/python/pdstools/adm/Tables.py b/python/pdstools/adm/Tables.py index f9349417..a59cfde2 100644 --- a/python/pdstools/adm/Tables.py +++ b/python/pdstools/adm/Tables.py @@ -2,25 +2,7 @@ from pdstools.utils.cdh_utils import weighted_average_polars from functools import cached_property -# TODO: this whole class can go away - -standardNBADNames = [ - "Assisted_Click_Through_Rate", - "CallCenter_Click_Through_Rate", - "CallCenterAcceptRateOutbound", - "Default_Inbound_Model", - "Default_Outbound_Model", - "Email_Click_Through_Rate", - "Mobile_Click_Through_Rate", - "OmniAdaptiveModel", - "Other_Inbound_Click_Through_Rate", - "Push_Click_Through_Rate", - "Retail_Click_Through_Rate", - "Retail_Click_Through_Rate_Outbound", - "SMS_Click_Through_Rate", - "Web_Click_Through_Rate", -] - +# TODO: reconsier this whole class class Tables: @cached_property @@ -46,7 +28,7 @@ def AvailableTables(self): df = pl.DataFrame( { "modeldata_last_snapshot": [1, 0], - "predictor_last_snapshot": [1, 1], + "predictor_last_snapshot": [1, 1], # TODO how is this used? "predictorbinning": [1, 1], } ) @@ -76,6 +58,8 @@ def modeldata_last_snapshot(self): return modeldata_last_snapshot + # TODO does this summarization really belong here? + @cached_property def predictor_last_snapshot(self): model_identifiers = ["Configuration"] + self.context_keys diff --git a/python/pdstools/plots/README.md b/python/pdstools/plots/README.md deleted file mode 100644 index d807cfe0..00000000 --- a/python/pdstools/plots/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Plots - -This directory contains all methods used to create the visualisations, mainly used by the `ADMDatamart` class. - -It contains of three files: -- `plot_base.py` takes care of preprocessing -- `plot_plotly` takes the preprocessed data and creates Plotly charts -- `plot_mpl.py` takes the preprocessed data and creates matplotlib/seaborn plots \ No newline at end of file diff --git a/python/requirements.txt b/python/requirements.txt index b66d9250..f02a6b80 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,5 @@ pandas +tabulate plotly>=5.5.0 requests pydot diff --git a/python/tests/test-requirements.txt b/python/tests/test-requirements.txt new file mode 100644 index 00000000..4c0faa94 --- /dev/null +++ b/python/tests/test-requirements.txt @@ -0,0 +1,8 @@ +pytest +pytest-cov +scipy +testbook +XlsxWriter +openpyxl +ipython +ipykernel diff --git a/python/tests/test_healthcheck.py b/python/tests/test_healthcheck.py index 8292d294..632946c4 100644 --- a/python/tests/test_healthcheck.py +++ b/python/tests/test_healthcheck.py @@ -1,7 +1,7 @@ import sys import os - import pathlib +from pandas import read_excel, ExcelFile basePath = pathlib.Path(__file__).parent.parent.parent sys.path.append(f"{str(basePath)}/python") @@ -14,15 +14,6 @@ def sample(): return datasets.CDHSample() -def testHealthCheckRunsWithoutErrors(sample): - sample.generateReport(verbose=True) - - -def testAdditionalTables(sample): - sample.exportTables(predictorBinning=True) - os.remove("Tables.xlsx") - - @pytest.fixture def sample_without_predictorbinning(): return ADMDatamart( @@ -32,20 +23,79 @@ def sample_without_predictorbinning(): ) -def testHealthCheckModelRunsWithoutErrors(sample_without_predictorbinning): - sample_without_predictorbinning.generateReport(verbose=True, modelData_only=True) +def test_GenerateHealthCheck(sample): + hc = sample.generateReport(verbose=True) + assert hc == "./HealthCheck.html" + assert pathlib.Path(hc).exists() + os.remove("HealthCheck.html") + assert not pathlib.Path(hc).exists() + +def test_ExportTables(sample): + excel = sample.exportTables(predictorBinning=True) + assert excel == "Tables.xlsx" + assert pathlib.Path(excel).exists() + spreadsheet = ExcelFile(excel) + assert list(spreadsheet.sheet_names) == [ + "modeldata_last_snapshot", + "predictor_last_snapshot", + "predictorbinning", + ] + # TODO we could go further and check the size of the sheets + # spreadsheet = read_excel(excel, sheet_name=None) + os.remove("Tables.xlsx") + assert not pathlib.Path(excel).exists() + +def test_ExportTables_NoBinning(sample): + excel = sample.exportTables(predictorBinning=False) + assert excel == "Tables.xlsx" + assert pathlib.Path(excel).exists() + spreadsheet = ExcelFile(excel) + assert list(spreadsheet.sheet_names) == [ + "modeldata_last_snapshot", + "predictor_last_snapshot", + ] + # TODO we could go further and check the size of the sheets + # spreadsheet = read_excel(excel, sheet_name=None) + os.remove("Tables.xlsx") + assert not pathlib.Path(excel).exists() + +def test_GenerateHealthCheck_ModelDataOnly(sample_without_predictorbinning): + hc = sample_without_predictorbinning.generateReport( + name="MyOrg", verbose=True, modelData_only=True + ) + assert hc == "./HealthCheck_MyOrg.html" + assert pathlib.Path(hc).exists() + os.remove("HealthCheck_MyOrg.html") + assert not pathlib.Path(hc).exists() -def testAdditionalTablesModel(sample_without_predictorbinning): - sample_without_predictorbinning.exportTables( +def test_ExportTables_ModelDataOnly(sample_without_predictorbinning): + excel = sample_without_predictorbinning.exportTables( file="ModelTables.xlsx", predictorBinning=True ) + assert excel == "ModelTables.xlsx" + assert pathlib.Path(excel).exists() + spreadsheet = ExcelFile(excel) + assert list(spreadsheet.sheet_names) == [ + "modeldata_last_snapshot", + ] + # TODO we could go further and check the size of the sheets + # spreadsheet = read_excel(excel, sheet_name=None) os.remove("ModelTables.xlsx") -def modelReportRuns(sample): - sample.generateReport(modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0") - - -def remove_healthCheck(): - os.remove("ADM_HealthCheck.html") +def test_GenerateModelReport(sample): + report = sample.generateReport( + name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0" + ) + assert report == "./ModelReport_MyOrg_bd70a915-697a-5d43-ab2c-53b0557c85a0.html" + assert pathlib.Path(report).exists() + os.remove(report) + assert not pathlib.Path(report).exists() + +def test_GenerateModelReport_Failing(sample_without_predictorbinning): + with pytest.raises(Exception) as e_info: + sample_without_predictorbinning.generateReport( + name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0" + ) + assert "Error when generating healthcheck" in str(e_info) diff --git a/python/tests/test_show_versions.py b/python/tests/test_show_versions.py index 91848fa3..f17f98b1 100644 --- a/python/tests/test_show_versions.py +++ b/python/tests/test_show_versions.py @@ -2,17 +2,19 @@ Testing the functionality of utils/show_versions functions """ -import datetime import pathlib import sys -import numpy as np -import polars as pl -import pytest -from pytz import timezone - basePath = pathlib.Path(__file__).parent.parent.parent sys.path.append(f"{str(basePath)}/python") -from pdstools import cdh_utils, datasets +import pdstools +def test_show_versions(capsys): + pdstools.show_versions() + captured = capsys.readouterr() + assert "pdstools" in captured.out + assert "---Version info---" in captured.out + assert "---Dependencies---" in captured.out + assert "---Streamlit app dependencies---" in captured.out +