holoviz · jbednar · May 26, 2023 · May 28, 2021 · May 28, 2021 · May 28, 2021
diff --git a/param/__init__.py b/param/__init__.py
@@ -46,6 +46,12 @@
     __version__ = "0.0.0+unknown"
 
 
+try:
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+
+
 dt_types = (dt.datetime, dt.date)
 
 try:
@@ -282,6 +288,27 @@ def _get_min_max_value(min, max, value=None, step=None):
     return min, max, value
 
 
+def _deserialize_from_path(ext_to_routine, path, type_name):
+    """Call deserialization routine with path according to extension"""
+    if not os.path.isfile(path):
+        raise FileNotFoundError("'{}' does not exist or is not a file".format(path))
+    root, ext = os.path.splitext(path)
+    if ext in {'.gz', '.bz2', '.xz', '.zip'}:
+        # A compressed type. We'll assume the routines can handle such extensions
+        # transparently
+        ext = os.path.splitext(root)[1]
+    # If the extension does match, this is likely the correct routine and we will error
+    # later on. Let's warn the user.
+    try:
+        if ext in ext_to_routine:
+            return ext_to_routine[ext](path)
+    except:
+        get_logger().warning(
+            "Could not parse file '{}' as {}".format(path, type_name), exc_info=True)
+    raise ValueError(
+        "No deserialization routine for files with '{}' extension".format(path))
+
+
 class Infinity(object):
     """
     An instance of this class represents an infinite value. Unlike
@@ -1461,8 +1488,15 @@ def serialize(cls, value):
 
     @classmethod
     def deserialize(cls, value):
-        from numpy import asarray
-        return asarray(value)
+        import numpy
+        try:
+            return _deserialize_from_path(
+                {'.npy': numpy.load, '.txt': lambda x: numpy.loadtxt(str(x))},
+                value, 'Array'
+            )
+        except:
+            pass
+        return numpy.asarray(value)
 
 
 class DataFrame(ClassSelector):
@@ -1544,8 +1578,26 @@ def serialize(cls, value):
 
     @classmethod
     def deserialize(cls, value):
-        from pandas import DataFrame as pdDFrame
-        return pdDFrame(value)
+        import pandas
+        try:
+            # FIXME(sdrobert): pandas.read_hdf5 requires pytables which can be
+            # installed no prob with conda but requires hdf5 headers for pip.
+            return _deserialize_from_path(
+                {
+                    '.csv': pandas.read_csv,
+                    '.dta': pandas.read_stata,
+                    '.feather': pandas.read_feather,
+                    '.json': pandas.read_json,
+                    '.ods': pandas.read_excel,
+                    '.parquet': pandas.read_parquet,
+                    '.pkl': pandas.read_pickle,
+                    '.tsv': lambda x: pandas.read_csv(x, sep='\t'),
+                    '.xlsm': pandas.read_excel,
+                    '.xlsx': pandas.read_excel,
+                }, value, 'DataFrame')
+        except:
+            pass
+        return pandas.DataFrame(value)
 
 
 class Series(ClassSelector):

diff --git a/param/parameterized.py b/param/parameterized.py
@@ -748,7 +748,7 @@ def __init__(self,default=None, doc=None, label=None, precedence=None,  # pylint
         False, so that a user can choose to change the value at the
         Parameterized instance level (affecting only that instance) or
         at the Parameterized class or superclass level (affecting all
-        existing and future instances of that class or superclass). For 
+        existing and future instances of that class or superclass). For
         a mutable Parameter value, the default of False is also appropriate
         if you want all instances to share the same value state, e.g. if
         they are each simply referring to a single global object like

diff --git a/tests/API1/testfiledeserialization.py b/tests/API1/testfiledeserialization.py
@@ -0,0 +1,200 @@
+"""
+Test deserialization routines that read from file
+"""
+
+import logging
+from unittest.case import skip
+import param
+import sys
+
+from . import API1TestCase
+from unittest import skipIf
+from tempfile import mkdtemp
+from shutil import rmtree
+from param.parameterized import get_logger
+
+
+try:
+    import numpy as np
+    ndarray = np.array([[1,2,3],[4,5,6]])
+except:
+    np = ndarray = None
+
+try:
+    import pandas as pd
+    pd_ver = pd.__version__.split('.')
+    df = pd.DataFrame({'A':[1,2,3], 'B':[1.1,2.2,3.3]})
+    modern_pd = pd if (int(pd_ver[0]) >= 1 and int(pd_ver[1]) >= 2) else None
+except:
+    pd = df1 = df2 = modern_pd = None
+
+# The writer could be xlsxwriter, but the sufficient condition is the presence of
+# openpyxl
+try:
+    import openpyxl as xlsxm
+except:
+    xlsxm = None
+
+try:
+    import odf as ods
+except:
+    ods = None
+
+# prior to pandas version 1.2, xlrd was always the default excel reader (though it
+# had to be be of a version before xlrd's 2.0).
+xls = None
+try:
+    import xlrd as xls
+    if int(xls.__version__.split('.')[0]) > 2:
+        raise Exception()
+except:
+    if modern_pd is None:
+        xlsxm = None
+
+try:
+    import feather
+except:
+    feather = None
+
+try:
+    import fastparquet as parquet
+except:
+    parquet = None
+try:
+    import pyarrow as parquet
+except:
+    pass
+
+
+np_skip = skipIf(np is None, "NumPy is not available")
+pd_skip = skipIf(pd is None, "pandas is not available")
+modern_pd_skip = skipIf(modern_pd is None, "pandas is too old")
+xlsxm_skip = skipIf(xlsxm is None, "openpyxl is not available")
+ods_skip = skipIf(ods is None, "odfpy is not available")
+xls_skip = skipIf(xls is None, "xlrd is not available")
+feather_skip = skipIf(feather is None, "feather-format is not available")
+parquet_skip = skipIf(parquet is None, "fastparquet and pyarrow are not available")
+
+
+class TestSet(param.Parameterized):
+    array = None if np is None else param.Array(default=ndarray)
+    data_frame = None if pd is None else param.DataFrame(default=df)
+
+
+class TestFileDeserialization(API1TestCase):
+
+    def run(self, result):
+        self.temp_dir = mkdtemp().replace('\\', '/')
+        try:
+            return super(TestFileDeserialization, self).run(result=result)
+        finally:
+            rmtree(self.temp_dir, ignore_errors=True)
+
+    @np_skip
+    def _test_deserialize_array(self, obj, path, pname, check=True):
+        # assumes the parameter has already been serialized to path!
+        deserialized = obj.param.deserialize_value(
+            pname, '"{}"'.format(path), mode='json')
+        if check:
+            self.assertTrue(np.array_equal(deserialized, getattr(obj, pname)))
+
+    @np_skip
+    def test_array_npy(self):
+        path = '{}/val.npy'.format(self.temp_dir)
+        np.save(path, TestSet.array)
+        self._test_deserialize_array(TestSet, path, 'array')
+
+    @np_skip
+    @skipIf(sys.version_info[0] < 3, "assertLogs not in py2k")
+    def test_bad_deserialization_warns(self):
+        path = '{}/val.npy'.format(self.temp_dir)
+        with open(path, 'w'):
+            pass
+        with self.assertLogs(get_logger(), level=logging.WARN) as cm:
+            # this parses successfully as a string array, but it's probably not what
+            # the user wanted. Should warn
+            self._test_deserialize_array(TestSet, path, 'array', False)
+        self.assertRegex(cm.output[0], "Could not parse")
+
+    @np_skip
+    def test_array_txt(self):
+        path = '{}/val.txt'.format(self.temp_dir)
+        np.savetxt(path, TestSet.array)
+        self._test_deserialize_array(TestSet, path, 'array')
+
+    @np_skip
+    def test_array_txt_gz(self):
+        path = '{}/val.txt.gz'.format(self.temp_dir)
+        np.savetxt(path, TestSet.array)
+        self._test_deserialize_array(TestSet, path, 'array')
+
+    @pd_skip
+    def test_data_frame_pkl(self):
+        path = '{}/val.pkl.zip'.format(self.temp_dir)
+        TestSet.data_frame.to_pickle(path)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    def test_data_frame_csv(self):
+        path = '{}/val.csv.bz2'.format(self.temp_dir)
+        TestSet.data_frame.to_csv(path, index=False)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    def test_data_frame_tsv(self):
+        path = '{}/val.tsv'.format(self.temp_dir)
+        TestSet.data_frame.to_csv(path, index=False, sep='\t')
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    def test_data_frame_json(self):
+        path = '{}/val.json'.format(self.temp_dir)
+        TestSet.data_frame.to_json(path)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    # FIXME(sdrobert): xls are old-style excel files. There are two distinct engines for
+    # reading and writing these, and the writer engine is deprecated by pandas. We could
+    # store the serialized file as a byte array to future-proof somewhat, but that would
+    # break if we ever decided to change the default data_frame value. Who cares.
+
+    @pd_skip
+    @xlsxm_skip
+    def test_data_frame_xlsm(self):
+        path = '{}/val.xlsm'.format(self.temp_dir)
+        TestSet.data_frame.to_excel(path, index=False)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    @xlsxm_skip
+    def test_data_frame_xlsx(self):
+        path = '{}/val.xlsx'.format(self.temp_dir)
+        TestSet.data_frame.to_excel(path, index=False)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    @ods_skip
+    @skipIf(sys.version_info[0] < 3, "py2k pandas does not support 'ods'")
+    def test_data_frame_ods(self):
+        path = '{}/val.ods'.format(self.temp_dir)
+        TestSet.data_frame.to_excel(path, index=False)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    @feather_skip
+    def test_data_frame_feather(self):
+        path = '{}/val.feather'.format(self.temp_dir)
+        TestSet.data_frame.to_feather(path)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    @parquet_skip
+    def test_data_frame_parquet(self):
+        path = '{}/val.parquet'.format(self.temp_dir)
+        TestSet.data_frame.to_parquet(path)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
+
+    @pd_skip
+    def test_data_frame_stata(self):
+        path = '{}/val.dta'.format(self.temp_dir)
+        TestSet.data_frame.to_stata(path, write_index=False)
+        self._test_deserialize_array(TestSet, path, 'data_frame')
diff --git a/tox.ini b/tox.ini
@@ -41,17 +41,55 @@ deps = {[testenv]deps}
        gmpy
 setenv = PARAM_TEST_GMPY = 1
 
+# xlrd is the reader for xls files in pandas. xlrd is also the reader
+# for xlsx and xlsm files for pandas<1.2, but this is only possible using
+# xlrd<2. If pandas>=1.2 is guaranteed, you can remove the version spec
+[testenv:with_xlrd]
+deps = {[testenv]deps}
+       xlrd<2
+setenv = PARAM_TEST_XLRD = 1
+
+[testenv:with_openpyxl]
+deps = {[testenv]deps}
+       openpyxl
+setenv = PARAM_TEST_OPENPYXL = 1
+
+[testenv:with_odfpy]
+deps = {[testenv]deps}
+       odfpy
+setenv = PARAM_TEST_ODFPY = 1
+
+[testenv:with_feather_format]
+deps = {[testenv]deps}
+       feather-format
+setenv = PARAM_TEST_FEATHER_FORMAT = 1
+
+[testenv:with_pyarrow]
+deps = {[testenv]deps}
+       pyarrow
+setenv = PARAM_TEST_PYARROW = 1
+
 [testenv:with_all]
 deps = {[testenv:with_numpy]deps}
        {[testenv:with_pandas]deps}
        {[testenv:with_ipython]deps}
        {[testenv:with_jsonschema]deps}
        {[testenv:with_gmpy]deps}
+       {[testenv:with_openpyxl]deps}
+       {[testenv:with_odfpy]deps}
+       {[testenv:with_feather_format]deps}
+       {[testenv:with_pyarrow]deps}
+       {[testenv:with_xlrd]deps}
 setenv = {[testenv:with_numpy]setenv}
          {[testenv:with_pandas]setenv}
          {[testenv:with_ipython]setenv}
          {[testenv:with_jsonschema]setenv}
          {[testenv:with_gmpy]setenv}
+         {[testenv:with_openpyxl]setenv}
+         {[testenv:with_odfpy]setenv}
+         {[testenv:with_feather_format]setenv}
+         {[testenv:with_pyarrow]setenv}
+         {[testenv:with_xlrd]setenv}
 
 [testenv:coverage]
 # remove develop install if https://github.com/ioam/param/issues/219