Merge pull request #98 from NREL/local-scratch

daniel-thom · web-flow · commit ffc0dc0a3276 · 2024-05-15T09:17:00.000-06:00
Remove use of LOCAL_SCRATCH
diff --git a/jade/__init__.py b/jade/__init__.py
@@ -2,6 +2,6 @@
 
 import logging
 
-__version__ = "0.10.1"
+__version__ = "0.10.2"
 
 logging.getLogger(__name__).addHandler(logging.NullHandler())
diff --git a/jade/hpc/slurm_manager.py b/jade/hpc/slurm_manager.py
@@ -254,7 +254,7 @@ def get_job_stats(self, job_id):
         return stats
 
     def get_local_scratch(self):
-        for key in ("LOCAL_SCRATCH", "TMPDIR"):
+        for key in ("TMPDIR",):
             if key in os.environ:
                 return os.environ[key]
         return tempfile.gettempdir()
diff --git a/jade/utils/dataframe_utils.py b/jade/utils/dataframe_utils.py
@@ -17,7 +17,7 @@
 
 @timed_debug
 def read_dataframe(filename, index_col=None, columns=None, parse_dates=False, **kwargs):
-    """Convert filename to a dataframe. Supports .csv, .json, .feather, .h5.
+    """Convert filename to a dataframe. Supports .csv, .json, .feather.
     Handles compressed files.
 
     Parameters
@@ -64,11 +64,13 @@ def read_dataframe(filename, index_col=None, columns=None, parse_dates=False, **
         needs_new_index = True
         with open_func(filename, "rb") as f_in:
             df = pd.read_feather(f_in, **kwargs)
-    elif ext == ".h5":
-        # This assumes that the file has a single dataframe, and so the
-        # key name is not relevant.
-        df = pd.read_hdf(filename, **kwargs)
-        needs_new_index = True
+    # This requires the pytables library which is painful to install on many platforms.
+    # Disable for now because the functionality isn't worth it.
+    # elif ext == ".h5":
+    #    # This assumes that the file has a single dataframe, and so the
+    #    # key name is not relevant.
+    #    df = pd.read_hdf(filename, **kwargs)
+    #    needs_new_index = True
     else:
         raise InvalidParameter(f"unsupported file extension {ext}")
 
@@ -184,13 +186,10 @@ def read_dataframes_by_substrings(
 def write_dataframe(df, file_path, compress=False, keep_original=False, **kwargs):
     """Write the dataframe to a file with in a format matching the extension.
 
-    Note that the feather and h5 formats do not support row indices.
+    Note that the feather formats do not support row indices.
     Index columns will be lost for those formats. If the dataframe has an index
     then it should be converted to a column before calling this function.
 
-    This function only supports storing a single dataframe inside an HDF5 file.
-    It always uses the key 'data'.
-
     Parameters
     ----------
     df : pd.DataFrame
@@ -216,15 +215,16 @@ def write_dataframe(df, file_path, compress=False, keep_original=False, **kwargs
         df.to_csv(file_path, **kwargs)
     elif ext == ".feather":
         df.to_feather(file_path, **kwargs)
-    elif ext == ".h5":
-        # HDF5 supports built-in compression, levels 1-9
-        if "complevel" in kwargs:
-            complevel = kwargs["complevel"]
-        elif compress:
-            complevel = 9
-        else:
-            complevel = 0
-        df.to_hdf(file_path, "data", mode="w", complevel=complevel, **kwargs)
+    # See note above regarding pytables dependency.
+    # elif ext == ".h5":
+    #    # HDF5 supports built-in compression, levels 1-9
+    #    if "complevel" in kwargs:
+    #        complevel = kwargs["complevel"]
+    #    elif compress:
+    #        complevel = 9
+    #    else:
+    #        complevel = 0
+    #    df.to_hdf(file_path, "data", mode="w", complevel=complevel, **kwargs)
     elif ext == ".json":
         df.to_json(file_path, **kwargs)
     else:
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,7 +62,6 @@ dev = [
     "sphinx>=2.0",
     "sphinxcontrib-plantuml",
     "statsmodels",
-    "tables",
     "tox",
 ]
 
diff --git a/tests/unit/utils/test_dataframe_utils.py b/tests/unit/utils/test_dataframe_utils.py
@@ -65,6 +65,7 @@ def test_read_dataframe__feather():
         assert_frame_equal(df1, df2)
 
 
+@mark.skip
 def test_read_dataframe__h5():
     """Should create identical dataframe on reading HDF5 file
 
@@ -155,6 +156,7 @@ def test_write_dataframe__feather(compress):
         assert_frame_equal(df1, df2)
 
 
+@mark.skip
 @mark.parametrize("compress", [False, True])
 def test_write_dataframe__h5(compress):
     """Should write dataframe into a file with matching extension"""

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,6 @@ dev = [`
`62`	`62`	`"sphinx>=2.0",`
`63`	`63`	`"sphinxcontrib-plantuml",`
`64`	`64`	`"statsmodels",`
`65`		`- "tables",`
`66`	`65`	`"tox",`
`67`	`66`	`]`
`68`	`67`