Merge pull request #213 from pepkit/dev

v0.12.0 Release
pepkit · Jan 16, 2025 · 9aa242c · 9aa242c
2 parents c47035d + 53437f8
commit 9aa242c
Show file tree

Hide file tree

Showing 9 changed files with 135 additions and 12 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -2,6 +2,13 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
+## [0.12.0] - 2025-01-16
+### Fixed
+- portable report now has proper file extension in messaging.
+- add exception to pipestat summarize if there are no results to report [#210](https://github.com/pepkit/pipestat/issues/210)
+- fix spaces in html files [#211](https://github.com/pepkit/pipestat/issues/211)
+- add output_dir parameter to psm.table
+
 ## [0.11.0] - 2024-10-02
 ### Fixed
 - for output schema, make samples an array type and nest under items [#204](https://github.com/pepkit/pipestat/issues/204)

diff --git a/pipestat/_version.py b/pipestat/_version.py
@@ -1 +1 @@
-__version__ = "0.11.0"
+__version__ = "0.12.0"
diff --git a/pipestat/backends/db_backend/dbbackend.py b/pipestat/backends/db_backend/dbbackend.py
@@ -445,7 +445,13 @@ def select_records(
         ORM = self.get_model(table_name=self.table_name)
 
         with self.session as s:
-            total_count = len(s.exec(sql_select(ORM)).all())
+
+            try:
+                total_count = len(s.exec(sql_select(ORM)).all())
+            except Exception as e:
+                raise PipestatDatabaseError(
+                    msg=f"Could not get total_count. Is the database empty? Original Error Message: {e}"
+                )
 
             if columns is not None:
                 columns = copy.deepcopy(columns)

diff --git a/pipestat/exceptions.py b/pipestat/exceptions.py
@@ -22,6 +22,7 @@
     "ColumnNotFoundError",
     "SchemaValidationErrorDuringReport",
     "PipestatPEPHubError",
+    "PipestatSummarizeError",
 ]
 
 
@@ -104,6 +105,13 @@ def __init__(self, msg):
         super(PipestatDataError, self).__init__(msg)
 
 
+class PipestatSummarizeError(PipestatError):
+    """Error for pipestat summarize"""
+
+    def __init__(self, msg):
+        super(PipestatSummarizeError, self).__init__(msg)
+
+
 class InvalidTimeFormatError(PipestatError):
     """Data error for local data associated with file backend"""
 

diff --git a/pipestat/helpers.py b/pipestat/helpers.py
@@ -184,7 +184,7 @@ def get_all_result_files(results_file_path: str) -> List:
     return files
 
 
-def zip_report(report_dir_name: str):
+def zip_report(report_dir_name: str) -> Union[str, None]:
     """
 
     Walks through files and attempts to zip them into a Zip object using default compression.
@@ -202,6 +202,8 @@ def zip_report(report_dir_name: str):
         _LOGGER.warning("Report zip file not created! \n {e}")
 
     if os.path.exists(zip_file_name + ".zip"):
-        _LOGGER.info(f"Report zip file successfully created: {zip_file_name}")
+        _LOGGER.info(f"Report zip file successfully created: {zip_file_name}.zip")
+        return f"{zip_file_name}.zip"
     else:
         _LOGGER.warning("Report zip file not created.")
+        return None
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
@@ -50,6 +50,7 @@
     PipestatDependencyError,
     RecordNotFoundError,
     SchemaNotFoundError,
+    PipestatSummarizeError,
 )
 from .helpers import default_formatter, make_subdirectories, validate_type, zip_report
 from .reports import HTMLReportBuilder, _create_stats_objs_summaries
@@ -937,6 +938,14 @@ def summarize(
 
         """
 
+        # Before proceeding check if there are any results at the specified backend
+        try:
+            current_results = self.select_records()
+            if len(current_results["records"]) < 1:
+                raise PipestatSummarizeError(f"No results found at specified backend")
+        except Exception as e:
+            raise PipestatSummarizeError(f"PipestatSummarizeError due to exception: {e}")
+
         if output_dir:
             self.cfg[OUTPUT_DIR] = output_dir
 
@@ -955,7 +964,7 @@ def summarize(
         )
 
         if portable is True:
-            zip_report(report_dir_name=os.path.dirname(report_path))
+            report_path = zip_report(report_dir_name=os.path.dirname(report_path))
 
         return report_path
 
@@ -975,12 +984,16 @@ def check_multi_results(self):
     @require_backend
     def table(
         self,
+        output_dir: Optional[str] = None,
     ) -> List[str]:
         """
         Generates stats (.tsv) and object (.yaml) files.
+        :param str output_dir: overrides output_dir set during pipestatManager creation.
         :return list[str] table_path_list: list containing output file paths of stats and objects
 
         """
+        if output_dir:
+            self.cfg[OUTPUT_DIR] = output_dir
 
         self.check_multi_results()
         pipeline_name = self.cfg[PIPELINE_NAME]

diff --git a/pipestat/reports.py b/pipestat/reports.py
@@ -173,7 +173,9 @@ def create_object_parent_html(self, navbar, footer):
                 else ""
             )
             labels.append(f"<b>{key.replace('_', ' ')}</b>: {desc}")
-            page_path = os.path.join(self.pipeline_reports, f"{key}.html".lower())
+            page_path = os.path.join(
+                self.pipeline_reports, f"{key}.html".replace(" ", "_").lower()
+            )
             pages.append(os.path.relpath(page_path, self.pipeline_reports))
 
         template_vars = dict(
@@ -364,7 +366,9 @@ def create_object_htmls(self, navbar, footer):
             os.makedirs(self.pipeline_reports)
         for file_result in file_results:
             links = []
-            html_page_path = os.path.join(self.pipeline_reports, f"{file_result}.html".lower())
+            html_page_path = os.path.join(
+                self.pipeline_reports, f"{file_result}.html".replace(" ", "_").lower()
+            )
 
             pipeline_types = ["sample", "project"]
 
@@ -511,8 +515,9 @@ def create_sample_html(self, sample_stats, navbar, footer, sample_name):
         """
         if not os.path.exists(self.pipeline_reports):
             os.makedirs(self.pipeline_reports)
-        html_page = os.path.join(self.pipeline_reports, f"{sample_name}.html".lower())
-
+        html_page = os.path.join(
+            self.pipeline_reports, f"{sample_name}.html".replace(" ", "_").lower()
+        )
         if self.prj.cfg["multi_result_files"] is True:
             self.prj.cfg["record_identifier"] = sample_name
             temp_result_file_path = mkabs(
@@ -978,7 +983,7 @@ def _get_navbar_dropdown_data_objects(self, objs, wd, context):
         for obj_id in objs:
             displayable_ids.append(obj_id.replace("_", " "))
             page_name = os.path.join(
-                self.pipeline_reports, (obj_id + ".html").replace(" ", "%20").lower()
+                self.pipeline_reports, (obj_id + ".html").replace(" ", "_").lower()
             )
             relpaths.append(_make_relpath(page_name, wd, context))
         return relpaths, displayable_ids
@@ -997,7 +1002,7 @@ def _get_navbar_dropdown_data_samples(self, wd, context):
                 sample_name = sample["record_identifier"]
                 page_name = os.path.join(
                     self.pipeline_reports,
-                    f"{sample_name}.html".replace(" ", "%20").lower(),
+                    f"{sample_name}.html".replace(" ", "_").lower(),
                 )
                 relpaths.append(_make_relpath(page_name, wd, context))
                 sample_names.append(sample_name)

diff --git a/tests/data/output_schema_html_report.yaml b/tests/data/output_schema_html_report.yaml
@@ -18,6 +18,9 @@ properties:
       switch_value:
         type: boolean
         description: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras pharetra."
+      output file with spaces:
+        $ref: "#/$defs/file"
+        description: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce nec cursus nulla."
   samples:
     type: array
     items:

diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py
@@ -13,6 +13,7 @@
 from pipestat.cli import main
 from pipestat.const import *
 from pipestat.exceptions import *
+from pipestat.exceptions import PipestatSummarizeError
 from pipestat.helpers import default_formatter, markdown_formatter
 from pipestat.parsed_schema import ParsedSchema
 
@@ -1212,6 +1213,29 @@ def test_basics_samples_html_report(
             htmlreportpath = psm.summarize(amendment="")
             assert htmlreportpath is not None
 
+    @pytest.mark.parametrize("backend", ["file", "db"])
+    def test_exception_samples_html_report(
+        self,
+        config_file_path,
+        output_schema_html_report,
+        results_file_path,
+        backend,
+        values_sample,
+    ):
+        with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL):
+            results_file_path = f.name
+            args = dict(schema_path=output_schema_html_report, database_only=False)
+            backend_data = (
+                {"config_file": config_file_path}
+                if backend == "db"
+                else {"results_file_path": results_file_path}
+            )
+            args.update(backend_data)
+            psm = SamplePipestatManager(**args)
+
+            with pytest.raises(PipestatSummarizeError):
+                _ = psm.summarize(amendment="")
+
     @pytest.mark.parametrize("backend", ["file", "db"])
     def test_basics_project_html_report(
         self,
@@ -1311,10 +1335,65 @@ def test_zip_html_report_portable(
             htmlreportpath = psm.summarize(amendment="", portable=True)
 
             directory = os.path.dirname(htmlreportpath)
-            zip_files = glob.glob(directory + "*.zip")
+            zip_files = glob.glob(directory)
 
             assert len(zip_files) > 0
 
+    @pytest.mark.parametrize("backend", ["file", "db"])
+    def test_report_spaces_in_record_identifiers(
+        self,
+        config_file_path,
+        output_schema_html_report,
+        results_file_path,
+        backend,
+        values_project,
+    ):
+        with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL):
+            results_file_path = f.name
+            args = dict(schema_path=output_schema_html_report, database_only=False)
+            backend_data = (
+                {"config_file": config_file_path}
+                if backend == "db"
+                else {"results_file_path": results_file_path}
+            )
+            args.update(backend_data)
+            # project level
+            psm = ProjectPipestatManager(**args)
+
+            for i in values_project:
+                for r, v in i.items():
+                    psm.report(
+                        record_identifier=r,
+                        values=v,
+                        force_overwrite=True,
+                    )
+                    psm.set_status(record_identifier=r, status_identifier="running")
+
+            # Add record with sspace in name
+            r = "SAMPLE Three WITH SPACES"
+            psm.report(
+                record_identifier=r,
+                values={"name_of_something": "name of something string"},
+                force_overwrite=True,
+            )
+            psm.set_status(record_identifier=r, status_identifier="completed")
+            r = "SAMPLE FOUR WITH Spaces"
+            psm.report(
+                record_identifier=r,
+                values={
+                    "output file with spaces": {"path": "here is path", "title": "here is a title"}
+                },
+                force_overwrite=True,
+            )
+
+            htmlreportpath = psm.summarize(amendment="")
+
+            directory_path = os.path.dirname(htmlreportpath)
+            all_files = os.listdir(directory_path)
+
+            assert "sample_three_with_spaces.html" in all_files
+            assert "output_file_with_spaces.html" in all_files
+
 
 @pytest.mark.skipif(not DB_DEPENDENCIES, reason="Requires dependencies")
 @pytest.mark.skipif(SERVICE_UNAVAILABLE, reason="requires service X to be available")