-
Notifications
You must be signed in to change notification settings - Fork 298
Open
Labels
Description
🐛 Bug Report
When cubes have string content (normally with dtype 'S1', and a string dimension), cube comparison fails .
How To Reproduce
>>> from iris.cube import Cube
>>> cube1 = Cube(np.array([list('abc'), list('def')], dtype='S1'))
>>> print(cube1)
unknown / (unknown) (-- : 2; -- : 3)
>>> cube1.data
array([[b'a', b'b', b'c'],
[b'd', b'e', b'f']], dtype='|S1')
>>>
>>> cube2 = cube1.copy()
>>> cube1 == cube2
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/h05/itpp/git/iris/iris_main/lib/iris/cube.py", line 3672, in __eq__
).compute()
^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/base.py", line 314, in compute
(result,) = compute(self, traverse=False, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/base.py", line 599, in compute
results = schedule(dsk, keys, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/threaded.py", line 89, in get
results = get_async(
^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/local.py", line 511, in get_async
raise_exception(exc, tb)
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/local.py", line 319, in reraise
raise exc
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/local.py", line 224, in execute_task
result = _execute_task(task, data)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in <genexpr>
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in _execute_task
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in <listcomp>
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in _execute_task
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in <listcomp>
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/optimization.py", line 990, in __call__
return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 149, in get
result = _execute_task(task, cache)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in <genexpr>
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in _execute_task
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 113, in <listcomp>
return [_execute_task(a, cache) for a in arg]
^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/utils.py", line 73, in apply
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/dask/array/core.py", line 4919, in _enforce_dtype
result = function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "<__array_function__ internals>", line 200, in isclose
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/numpy/core/numeric.py", line 2374, in isclose
dt = multiarray.result_type(y, 1.)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<__array_function__ internals>", line 200, in result_type
TypeError: The DType <class 'numpy._FloatAbstractDType'> could not be promoted by <class 'numpy.dtype[bytes_]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[bytes_]'>, <class 'numpy._FloatAbstractDType'>)
>>>
Expected behaviour
Clearly, this should succeed and return True.
Key info
Although it appears to be a failure of dask.array.all_close, I think this is really a numpy problem
>>> np.all(cube1.data == cube2.data)
True
>>> np.allclose(cube1.data, cube2.data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<__array_function__ internals>", line 200, in allclose
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/numpy/core/numeric.py", line 2270, in allclose
res = all(isclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<__array_function__ internals>", line 200, in isclose
File "/tmp/persistent/newconda-envs/ncdata/lib/python3.11/site-packages/numpy/core/numeric.py", line 2374, in isclose
dt = multiarray.result_type(y, 1.)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<__array_function__ internals>", line 200, in result_type
TypeError: The DType <class 'numpy._FloatAbstractDType'> could not be promoted by <class 'numpy.dtype[bytes_]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[bytes_]'>, <class 'numpy._FloatAbstractDType'>)
>>>
So, perhaps we need to special-case character data so it doesn't use 'allclose' for comparison.
Environment
Latest 'main' branch Iris,
Dask version '2023.5.0'
Numpy version '1.24.2'