pegasystems · operdeck · Jan 13, 2024 · Jan 13, 2024 · Jan 13, 2024 · Jan 13, 2024
diff --git a/.github/workflows/Healthcheck tests.yml b/.github/workflows/Healthcheck tests.yml
@@ -31,15 +31,13 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install -r requirements.txt
-          pip install pytest
+          pip install -r tests/test-requirements.txt
           pip install papermill
-          pip install XlsxWriter
-          pip install ipython 
-          pip install ipykernel
           pip install itables
           pip install jinja2
           ipython kernel install --name "python3" --user
           sudo apt-get install -y pandoc
+
       - name: Install pdstools
         run: pip install .
 

diff --git a/.github/workflows/Python tests.yml b/.github/workflows/Python tests.yml
@@ -64,12 +64,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r python/requirements.txt
-          pip install pytest-cov
-          pip install scipy
-          pip install testbook
-          pip install XlsxWriter
-          pip install ipython 
-          pip install ipykernel
+          pip install -r python/tests/test-requirements.txt
           ipython kernel install --name "python3" --user
 
       - name: Run tests

diff --git a/python/pdstools/README.md b/python/pdstools/README.md
diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py
@@ -117,7 +117,7 @@ def __init__(
         drop_cols: Optional[list] = None,
         include_cols: Optional[list] = None,
         context_keys: list = ["Channel", "Direction", "Issue", "Group"],
-        extract_keys: bool = False, # TODO: should be True by default, extract should be efficiently using Configuration
+        extract_keys: bool = False,  # TODO: should be True by default, extract should be efficiently using Configuration
         predictorCategorization: pl.Expr = cdh_utils.defaultPredictorCategorization,
         plotting_engine: Union[str, Any] = "plotly",
         verbose: bool = False,
@@ -397,13 +397,13 @@ def _import_utils(
             try:
                 df = self._apply_query(df, self.query)
                 if self.verbose:
-                    print(f"Query succesful for {name}.")
+                    print(f"Query successful for {name}.")
             except pl.ColumnNotFoundError:
                 if self.verbose:
                     print(
-                        f"""Query unsuccesful for {name}.
+                        f"""Query unsuccessful for {name}.
                 Maybe the filter you selected only applies to either model data or predictor data
-                and thus can't be succesful for the other one. That should be fine
+                and thus can't be successful for the other one. That should be fine
                 as the other table is likely queried correctly."""
                     )
         return df, cols, missing
@@ -1421,7 +1421,9 @@ def run_bash_command(bashCommand, working_dir, **kwargs):
             yaml.dump(params, f)
 
         bashCommand = f"quarto render {healthcheck_file} --to {output_type} --output {output_filename} --execute-params params.yaml"
+
         run_bash_command(bashCommand, working_dir, **kwargs)
+
         check_output_file(working_dir, output_filename, verbose, delete_temp_files)
         if delete_temp_files:
             _delete_temp_files(working_dir, files)

diff --git a/python/pdstools/adm/ADMTrees.py b/python/pdstools/adm/ADMTrees.py
@@ -127,27 +127,27 @@
         logging.info("Reading model...")
         self._read_model(file, **kwargs)
         if self.trees is None:
-            raise ValueError("Import unsuccesful.")
+            raise ValueError("Import unsuccessful.")
 
     def _read_model(self, file, **kwargs):
         def _import(file):
             logging.info("Trying regular import.")
             with open(file) as f:
                 file = json.load(f)
-            logging.info("Regular import succesful.")
+            logging.info("Regular import successful.")
 
             return file
 
         def read_url(file):
             logging.info("Trying to read from URL.")
             file = urllib.request.urlopen(file).read()
-            logging.info("Import from URL succesful.")
+            logging.info("Import from URL successful.")
             return file
 
         def decode_string(file):
             logging.info("Trying to decompress the string.")
             file = zlib.decompress(base64.b64decode(file))
-            logging.info("Decompressing string succesful.")
+            logging.info("Decompressing string successful.")
             return file
 
         decode = kwargs.pop("decode", False)

diff --git a/python/pdstools/adm/README.md b/python/pdstools/adm/README.md
diff --git a/python/pdstools/adm/Tables.py b/python/pdstools/adm/Tables.py
@@ -2,25 +2,7 @@
 from pdstools.utils.cdh_utils import weighted_average_polars
 from functools import cached_property
 
-# TODO: this whole class can go away
-
-standardNBADNames = [
-    "Assisted_Click_Through_Rate",
-    "CallCenter_Click_Through_Rate",
-    "CallCenterAcceptRateOutbound",
-    "Default_Inbound_Model",
-    "Default_Outbound_Model",
-    "Email_Click_Through_Rate",
-    "Mobile_Click_Through_Rate",
-    "OmniAdaptiveModel",
-    "Other_Inbound_Click_Through_Rate",
-    "Push_Click_Through_Rate",
-    "Retail_Click_Through_Rate",
-    "Retail_Click_Through_Rate_Outbound",
-    "SMS_Click_Through_Rate",
-    "Web_Click_Through_Rate",
-]
-
+# TODO: reconsier this whole class
 
 class Tables:
     @cached_property
@@ -46,7 +28,7 @@ def AvailableTables(self):
         df = pl.DataFrame(
             {
                 "modeldata_last_snapshot": [1, 0],
-                "predictor_last_snapshot": [1, 1],
+                "predictor_last_snapshot": [1, 1], # TODO how is this used?
                 "predictorbinning": [1, 1],
             }
         )
@@ -76,6 +58,8 @@ def modeldata_last_snapshot(self):
 
         return modeldata_last_snapshot
 
+    # TODO does this summarization really belong here?
+
     @cached_property
     def predictor_last_snapshot(self):
         model_identifiers = ["Configuration"] + self.context_keys

diff --git a/python/pdstools/plots/README.md b/python/pdstools/plots/README.md
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -1,4 +1,5 @@
 pandas
+tabulate
 plotly>=5.5.0
 requests
 pydot

diff --git a/python/tests/test-requirements.txt b/python/tests/test-requirements.txt
@@ -0,0 +1,8 @@
+pytest
+pytest-cov
+scipy
+testbook
+XlsxWriter
+openpyxl
+ipython 
+ipykernel
diff --git a/python/tests/test_healthcheck.py b/python/tests/test_healthcheck.py
@@ -1,7 +1,7 @@
 import sys
 import os
-
 import pathlib
+from pandas import read_excel, ExcelFile
 
 basePath = pathlib.Path(__file__).parent.parent.parent
 sys.path.append(f"{str(basePath)}/python")
@@ -14,15 +14,6 @@ def sample():
     return datasets.CDHSample()
 
 
-def testHealthCheckRunsWithoutErrors(sample):
-    sample.generateReport(verbose=True)
-
-
-def testAdditionalTables(sample):
-    sample.exportTables(predictorBinning=True)
-    os.remove("Tables.xlsx")
-
-
 @pytest.fixture
 def sample_without_predictorbinning():
     return ADMDatamart(
@@ -32,20 +23,79 @@ def sample_without_predictorbinning():
     )
 
 
-def testHealthCheckModelRunsWithoutErrors(sample_without_predictorbinning):
-    sample_without_predictorbinning.generateReport(verbose=True, modelData_only=True)
+def test_GenerateHealthCheck(sample):
+    hc = sample.generateReport(verbose=True)
+    assert hc == "./HealthCheck.html"
+    assert pathlib.Path(hc).exists()
+    os.remove("HealthCheck.html")
+    assert not pathlib.Path(hc).exists()
+
+def test_ExportTables(sample):
+    excel = sample.exportTables(predictorBinning=True)
+    assert excel == "Tables.xlsx"
+    assert pathlib.Path(excel).exists()
+    spreadsheet = ExcelFile(excel)
+    assert list(spreadsheet.sheet_names) == [
+        "modeldata_last_snapshot",
+        "predictor_last_snapshot",
+        "predictorbinning",
+    ]
+    # TODO we could go further and check the size of the sheets
+    # spreadsheet = read_excel(excel, sheet_name=None)
+    os.remove("Tables.xlsx")
+    assert not pathlib.Path(excel).exists()
+
+def test_ExportTables_NoBinning(sample):
+    excel = sample.exportTables(predictorBinning=False)
+    assert excel == "Tables.xlsx"
+    assert pathlib.Path(excel).exists()
+    spreadsheet = ExcelFile(excel)
+    assert list(spreadsheet.sheet_names) == [
+        "modeldata_last_snapshot",
+        "predictor_last_snapshot",
+    ]
+    # TODO we could go further and check the size of the sheets
+    # spreadsheet = read_excel(excel, sheet_name=None)
+    os.remove("Tables.xlsx")
+    assert not pathlib.Path(excel).exists()
+
+def test_GenerateHealthCheck_ModelDataOnly(sample_without_predictorbinning):
+    hc = sample_without_predictorbinning.generateReport(
+        name="MyOrg", verbose=True, modelData_only=True
+    )
+    assert hc == "./HealthCheck_MyOrg.html"
+    assert pathlib.Path(hc).exists()
+    os.remove("HealthCheck_MyOrg.html")
+    assert not pathlib.Path(hc).exists()
 
 
-def testAdditionalTablesModel(sample_without_predictorbinning):
-    sample_without_predictorbinning.exportTables(
+def test_ExportTables_ModelDataOnly(sample_without_predictorbinning):
+    excel = sample_without_predictorbinning.exportTables(
         file="ModelTables.xlsx", predictorBinning=True
     )
+    assert excel == "ModelTables.xlsx"
+    assert pathlib.Path(excel).exists()
+    spreadsheet = ExcelFile(excel)
+    assert list(spreadsheet.sheet_names) == [
+        "modeldata_last_snapshot",
+    ]
+    # TODO we could go further and check the size of the sheets
+    # spreadsheet = read_excel(excel, sheet_name=None)
     os.remove("ModelTables.xlsx")
 
 
-def modelReportRuns(sample):
-    sample.generateReport(modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0")
-
-
-def remove_healthCheck():
-    os.remove("ADM_HealthCheck.html")
+def test_GenerateModelReport(sample):
+    report = sample.generateReport(
+        name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0"
+    )
+    assert report == "./ModelReport_MyOrg_bd70a915-697a-5d43-ab2c-53b0557c85a0.html"
+    assert pathlib.Path(report).exists()
+    os.remove(report)
+    assert not pathlib.Path(report).exists()
+
+def test_GenerateModelReport_Failing(sample_without_predictorbinning):
+    with pytest.raises(Exception) as e_info:
+        sample_without_predictorbinning.generateReport(
+            name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0"
+        )
+    assert "Error when generating healthcheck" in str(e_info)
diff --git a/python/tests/test_show_versions.py b/python/tests/test_show_versions.py
@@ -2,17 +2,19 @@
 Testing the functionality of utils/show_versions functions
 """
 
-import datetime
 import pathlib
 import sys
 
-import numpy as np
-import polars as pl
-import pytest
-from pytz import timezone
-
 basePath = pathlib.Path(__file__).parent.parent.parent
 sys.path.append(f"{str(basePath)}/python")
-from pdstools import cdh_utils, datasets
+import pdstools
 
 
+def test_show_versions(capsys):
+    pdstools.show_versions()
+    captured = capsys.readouterr()
+    assert "pdstools" in captured.out
+    assert "---Version info---" in captured.out
+    assert "---Dependencies---" in captured.out
+    assert "---Streamlit app dependencies---" in captured.out
+