Skip to content

Commit

Permalink
Merge pull request #189 from pegasystems/ISSUE_119
Browse files Browse the repository at this point in the history
Improved coverage
  • Loading branch information
operdeck authored Jan 13, 2024
2 parents 06b70be + e0923a5 commit 1554cf6
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 83 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/Healthcheck tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,13 @@ jobs:
run: |
pip install --upgrade pip
pip install -r requirements.txt
pip install pytest
pip install -r tests/test-requirements.txt
pip install papermill
pip install XlsxWriter
pip install ipython
pip install ipykernel
pip install itables
pip install jinja2
ipython kernel install --name "python3" --user
sudo apt-get install -y pandoc
- name: Install pdstools
run: pip install .

Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/Python tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r python/requirements.txt
pip install pytest-cov
pip install scipy
pip install testbook
pip install XlsxWriter
pip install ipython
pip install ipykernel
pip install -r python/tests/test-requirements.txt
ipython kernel install --name "python3" --user
- name: Run tests
Expand Down
3 changes: 0 additions & 3 deletions python/pdstools/README.md

This file was deleted.

10 changes: 6 additions & 4 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def __init__(
drop_cols: Optional[list] = None,
include_cols: Optional[list] = None,
context_keys: list = ["Channel", "Direction", "Issue", "Group"],
extract_keys: bool = False, # TODO: should be True by default, extract should be efficiently using Configuration
extract_keys: bool = False, # TODO: should be True by default, extract should be efficiently using Configuration
predictorCategorization: pl.Expr = cdh_utils.defaultPredictorCategorization,
plotting_engine: Union[str, Any] = "plotly",
verbose: bool = False,
Expand Down Expand Up @@ -397,13 +397,13 @@ def _import_utils(
try:
df = self._apply_query(df, self.query)
if self.verbose:
print(f"Query succesful for {name}.")
print(f"Query successful for {name}.")
except pl.ColumnNotFoundError:
if self.verbose:
print(
f"""Query unsuccesful for {name}.
f"""Query unsuccessful for {name}.
Maybe the filter you selected only applies to either model data or predictor data
and thus can't be succesful for the other one. That should be fine
and thus can't be successful for the other one. That should be fine
as the other table is likely queried correctly."""
)
return df, cols, missing
Expand Down Expand Up @@ -1421,7 +1421,9 @@ def run_bash_command(bashCommand, working_dir, **kwargs):
yaml.dump(params, f)

bashCommand = f"quarto render {healthcheck_file} --to {output_type} --output {output_filename} --execute-params params.yaml"

run_bash_command(bashCommand, working_dir, **kwargs)

check_output_file(working_dir, output_filename, verbose, delete_temp_files)
if delete_temp_files:
_delete_temp_files(working_dir, files)
Expand Down
8 changes: 4 additions & 4 deletions python/pdstools/adm/ADMTrees.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,27 @@ def __init__(self, file: str, **kwargs):
logging.info("Reading model...")
self._read_model(file, **kwargs)
if self.trees is None:
raise ValueError("Import unsuccesful.")
raise ValueError("Import unsuccessful.")

def _read_model(self, file, **kwargs):
def _import(file):
logging.info("Trying regular import.")
with open(file) as f:
file = json.load(f)
logging.info("Regular import succesful.")
logging.info("Regular import successful.")

return file

def read_url(file):
logging.info("Trying to read from URL.")
file = urllib.request.urlopen(file).read()
logging.info("Import from URL succesful.")
logging.info("Import from URL successful.")
return file

def decode_string(file):
logging.info("Trying to decompress the string.")
file = zlib.decompress(base64.b64decode(file))
logging.info("Decompressing string succesful.")
logging.info("Decompressing string successful.")
return file

decode = kwargs.pop("decode", False)
Expand Down
7 changes: 0 additions & 7 deletions python/pdstools/adm/README.md

This file was deleted.

24 changes: 4 additions & 20 deletions python/pdstools/adm/Tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,7 @@
from pdstools.utils.cdh_utils import weighted_average_polars
from functools import cached_property

# TODO: this whole class can go away

standardNBADNames = [
"Assisted_Click_Through_Rate",
"CallCenter_Click_Through_Rate",
"CallCenterAcceptRateOutbound",
"Default_Inbound_Model",
"Default_Outbound_Model",
"Email_Click_Through_Rate",
"Mobile_Click_Through_Rate",
"OmniAdaptiveModel",
"Other_Inbound_Click_Through_Rate",
"Push_Click_Through_Rate",
"Retail_Click_Through_Rate",
"Retail_Click_Through_Rate_Outbound",
"SMS_Click_Through_Rate",
"Web_Click_Through_Rate",
]

# TODO: reconsier this whole class

class Tables:
@cached_property
Expand All @@ -46,7 +28,7 @@ def AvailableTables(self):
df = pl.DataFrame(
{
"modeldata_last_snapshot": [1, 0],
"predictor_last_snapshot": [1, 1],
"predictor_last_snapshot": [1, 1], # TODO how is this used?
"predictorbinning": [1, 1],
}
)
Expand Down Expand Up @@ -76,6 +58,8 @@ def modeldata_last_snapshot(self):

return modeldata_last_snapshot

# TODO does this summarization really belong here?

@cached_property
def predictor_last_snapshot(self):
model_identifiers = ["Configuration"] + self.context_keys
Expand Down
8 changes: 0 additions & 8 deletions python/pdstools/plots/README.md

This file was deleted.

1 change: 1 addition & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pandas
tabulate
plotly>=5.5.0
requests
pydot
Expand Down
8 changes: 8 additions & 0 deletions python/tests/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pytest
pytest-cov
scipy
testbook
XlsxWriter
openpyxl
ipython
ipykernel
90 changes: 70 additions & 20 deletions python/tests/test_healthcheck.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
import os

import pathlib
from pandas import read_excel, ExcelFile

basePath = pathlib.Path(__file__).parent.parent.parent
sys.path.append(f"{str(basePath)}/python")
Expand All @@ -14,15 +14,6 @@ def sample():
return datasets.CDHSample()


def testHealthCheckRunsWithoutErrors(sample):
sample.generateReport(verbose=True)


def testAdditionalTables(sample):
sample.exportTables(predictorBinning=True)
os.remove("Tables.xlsx")


@pytest.fixture
def sample_without_predictorbinning():
return ADMDatamart(
Expand All @@ -32,20 +23,79 @@ def sample_without_predictorbinning():
)


def testHealthCheckModelRunsWithoutErrors(sample_without_predictorbinning):
sample_without_predictorbinning.generateReport(verbose=True, modelData_only=True)
def test_GenerateHealthCheck(sample):
hc = sample.generateReport(verbose=True)
assert hc == "./HealthCheck.html"
assert pathlib.Path(hc).exists()
os.remove("HealthCheck.html")
assert not pathlib.Path(hc).exists()

def test_ExportTables(sample):
excel = sample.exportTables(predictorBinning=True)
assert excel == "Tables.xlsx"
assert pathlib.Path(excel).exists()
spreadsheet = ExcelFile(excel)
assert list(spreadsheet.sheet_names) == [
"modeldata_last_snapshot",
"predictor_last_snapshot",
"predictorbinning",
]
# TODO we could go further and check the size of the sheets
# spreadsheet = read_excel(excel, sheet_name=None)
os.remove("Tables.xlsx")
assert not pathlib.Path(excel).exists()

def test_ExportTables_NoBinning(sample):
excel = sample.exportTables(predictorBinning=False)
assert excel == "Tables.xlsx"
assert pathlib.Path(excel).exists()
spreadsheet = ExcelFile(excel)
assert list(spreadsheet.sheet_names) == [
"modeldata_last_snapshot",
"predictor_last_snapshot",
]
# TODO we could go further and check the size of the sheets
# spreadsheet = read_excel(excel, sheet_name=None)
os.remove("Tables.xlsx")
assert not pathlib.Path(excel).exists()

def test_GenerateHealthCheck_ModelDataOnly(sample_without_predictorbinning):
hc = sample_without_predictorbinning.generateReport(
name="MyOrg", verbose=True, modelData_only=True
)
assert hc == "./HealthCheck_MyOrg.html"
assert pathlib.Path(hc).exists()
os.remove("HealthCheck_MyOrg.html")
assert not pathlib.Path(hc).exists()


def testAdditionalTablesModel(sample_without_predictorbinning):
sample_without_predictorbinning.exportTables(
def test_ExportTables_ModelDataOnly(sample_without_predictorbinning):
excel = sample_without_predictorbinning.exportTables(
file="ModelTables.xlsx", predictorBinning=True
)
assert excel == "ModelTables.xlsx"
assert pathlib.Path(excel).exists()
spreadsheet = ExcelFile(excel)
assert list(spreadsheet.sheet_names) == [
"modeldata_last_snapshot",
]
# TODO we could go further and check the size of the sheets
# spreadsheet = read_excel(excel, sheet_name=None)
os.remove("ModelTables.xlsx")


def modelReportRuns(sample):
sample.generateReport(modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0")


def remove_healthCheck():
os.remove("ADM_HealthCheck.html")
def test_GenerateModelReport(sample):
report = sample.generateReport(
name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0"
)
assert report == "./ModelReport_MyOrg_bd70a915-697a-5d43-ab2c-53b0557c85a0.html"
assert pathlib.Path(report).exists()
os.remove(report)
assert not pathlib.Path(report).exists()

def test_GenerateModelReport_Failing(sample_without_predictorbinning):
with pytest.raises(Exception) as e_info:
sample_without_predictorbinning.generateReport(
name="MyOrg", modelid="bd70a915-697a-5d43-ab2c-53b0557c85a0"
)
assert "Error when generating healthcheck" in str(e_info)
16 changes: 9 additions & 7 deletions python/tests/test_show_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
Testing the functionality of utils/show_versions functions
"""

import datetime
import pathlib
import sys

import numpy as np
import polars as pl
import pytest
from pytz import timezone

basePath = pathlib.Path(__file__).parent.parent.parent
sys.path.append(f"{str(basePath)}/python")
from pdstools import cdh_utils, datasets
import pdstools


def test_show_versions(capsys):
pdstools.show_versions()
captured = capsys.readouterr()
assert "pdstools" in captured.out
assert "---Version info---" in captured.out
assert "---Dependencies---" in captured.out
assert "---Streamlit app dependencies---" in captured.out

0 comments on commit 1554cf6

Please sign in to comment.