diff --git a/.gitignore b/.gitignore index 76328ee9fd..3937eab487 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,4 @@ docs/static/arrow/ .perspectiverc python/perspective/perspective/tests/table/psp_test -python/perspective/perspective/node/assets/zmq.node +python/perspective/perspective/node/assets/* diff --git a/cpp/perspective/src/include/perspective/val.h b/cpp/perspective/src/include/perspective/val.h index 947457cbfc..f907f357c2 100644 --- a/cpp/perspective/src/include/perspective/val.h +++ b/cpp/perspective/src/include/perspective/val.h @@ -8,7 +8,7 @@ */ #pragma once -#ifdef(PSP_ENABLE_PYTHON) +#ifdef PSP_ENABLE_PYTHON #include diff --git a/examples/simple/test.html b/examples/simple/test.html deleted file mode 100644 index c38abc6fe8..0000000000 --- a/examples/simple/test.html +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/packages/perspective/test/js/updates.js b/packages/perspective/test/js/updates.js index 895f34ba49..56766895fa 100644 --- a/packages/perspective/test/js/updates.js +++ b/packages/perspective/test/js/updates.js @@ -1054,7 +1054,7 @@ module.exports = perspective => { table.delete(); }); - it("should apply mulitple sequential updates using '__INDEX__' on a table with explicit index set", async function() { + it("should apply multiple sequential updates using '__INDEX__' on a table with explicit index set", async function() { let table = perspective.table(data, {index: "x"}); table.update([ { diff --git a/python/perspective/README.md b/python/perspective/README.md index faca22e165..3aa6a25ee7 100644 --- a/python/perspective/README.md +++ b/python/perspective/README.md @@ -69,14 +69,14 @@ To build `perspective-python:table` from source, you'll need the following C++ d - Python 3.7 - CMake -- Boost - PyBind11 +- numpy - tbb On MacOS, you should be able to install Boost, PyBind11, and tbb from brew: ```shell -brew install boost pybind11 tbb +brew install pybind11 tbb ``` And then install Python dependencies using pip: diff --git a/python/perspective/perspective/include/perspective/python/base.h b/python/perspective/perspective/include/perspective/python/base.h index 4b97d061e7..c57ee9e412 100644 --- a/python/perspective/perspective/include/perspective/python/base.h +++ b/python/perspective/perspective/include/perspective/python/base.h @@ -19,6 +19,11 @@ #include #include +/****************************************************************************** + * + * Numpy includes + */ +#include /****************************************************************************** * diff --git a/python/perspective/perspective/include/perspective/python/utils.h b/python/perspective/perspective/include/perspective/python/utils.h index e794d5ff7f..1d9ee65f61 100644 --- a/python/perspective/perspective/include/perspective/python/utils.h +++ b/python/perspective/perspective/include/perspective/python/utils.h @@ -37,8 +37,6 @@ static auto IS_BYTES = [](auto type_instance) { return type_instance.is(py::modu * * Date Parsing */ -t_date pythondate_to_t_date(t_val date); -std::int64_t pythondatetime_to_ms(t_val datetime); t_dtype type_string_to_t_dtype(std::string type, std::string name = ""); t_dtype type_string_to_t_dtype(py::str type, py::str name = ""); diff --git a/python/perspective/perspective/node/assets/zmq.node b/python/perspective/perspective/node/assets/zmq.node deleted file mode 100755 index b8b56aa337..0000000000 Binary files a/python/perspective/perspective/node/assets/zmq.node and /dev/null differ diff --git a/python/perspective/perspective/src/fill.cpp b/python/perspective/perspective/src/fill.cpp index 312f0623cd..a534508d33 100644 --- a/python/perspective/perspective/src/fill.cpp +++ b/python/perspective/perspective/src/fill.cpp @@ -62,7 +62,7 @@ _fill_col_time(t_data_accessor accessor, std::shared_ptr col, std::str continue; } - col->set_nth(i, pythondatetime_to_ms(item)); + col->set_nth(i, item.cast()); } } } @@ -107,7 +107,10 @@ _fill_col_date(t_data_accessor accessor, std::shared_ptr col, std::str continue; } - col->set_nth(i, pythondate_to_t_date(item)); + + auto date_components = item.cast>(); + t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]); + col->set_nth(i, dt); } } } @@ -229,6 +232,7 @@ _fill_col_string(t_data_accessor accessor, std::shared_ptr col, std::s continue; } + // convert to a python string first std::wstring welem = item.cast(); std::wstring_convert converter; std::string elem = converter.to_bytes(welem); @@ -267,7 +271,7 @@ _fill_col_int64(t_data_accessor accessor, t_data_table& tbl, std::shared_ptr(); if (isnan(fval)) { - WARN("Promoting to string"); + WARN("Promoting %s to string from int64", name); tbl.promote_column(name, DTYPE_STR, i, false); col = tbl.get_column(name); _fill_col_string( @@ -327,7 +331,10 @@ set_column_nth(std::shared_ptr col, t_uindex idx, t_val value) { break; } case DTYPE_DATE: { - col->set_nth(idx, pythondate_to_t_date(value), STATUS_VALID); + t_date dt = t_date(value.attr("year").cast(), + value.attr("month").cast(), + value.attr("day").cast()); + col->set_nth(idx, dt, STATUS_VALID); break; } case DTYPE_TIME: { @@ -404,13 +411,13 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl, // inference checked the entire column/we could reset parsing. double fval = item.cast(); if (fval > 2147483647 || fval < -2147483648) { - WARN("Promoting to float"); + WARN("Promoting %s to float from int32", name); tbl.promote_column(name, DTYPE_FLOAT64, i, true); col = tbl.get_column(name); type = DTYPE_FLOAT64; col->set_nth(i, fval); } else if (isnan(fval)) { - WARN("Promoting to string"); + WARN("Promoting column %s to string from int32", name); tbl.promote_column(name, DTYPE_STR, i, false); col = tbl.get_column(name); _fill_col_string( @@ -424,6 +431,16 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl, col->set_nth(i, item.cast()); } break; case DTYPE_FLOAT64: { + bool is_float = py::isinstance(item); + bool is_numpy_nan = is_float && npy_isnan(item.cast()); + if (!is_float || is_numpy_nan) { + WARN("Promoting column %s to string from float64", name); + tbl.promote_column(name, DTYPE_STR, i, false); + col = tbl.get_column(name); + _fill_col_string( + accessor, col, name, cidx, DTYPE_STR, is_arrow, is_update); + return; + } col->set_nth(i, item.cast()); } break; default: diff --git a/python/perspective/perspective/src/table.cpp b/python/perspective/perspective/src/table.cpp index 718648b27d..6f4dacb194 100644 --- a/python/perspective/perspective/src/table.cpp +++ b/python/perspective/perspective/src/table.cpp @@ -69,7 +69,7 @@ std::shared_ptr make_table_py(t_val table, t_data_accessor accessor, t_va auto current_schema = current_data_table->get_schema(); for (auto idx = 0; idx < current_schema.m_types.size(); ++idx) { if (data_types[idx] == DTYPE_INT64) { - WARN("Promoting int64 '" + column_names[idx] + "'"); + WARN("Promoting %s to int64", column_names[idx]); current_gnode->promote_column(column_names[idx], DTYPE_INT64); } } diff --git a/python/perspective/perspective/src/utils.cpp b/python/perspective/perspective/src/utils.cpp index c3c5e13484..d0ff76fce0 100644 --- a/python/perspective/perspective/src/utils.cpp +++ b/python/perspective/perspective/src/utils.cpp @@ -16,27 +16,6 @@ namespace perspective { namespace binding { -/****************************************************************************** - * - * Date Parsing - */ -t_date -pythondate_to_t_date(t_val date) { - return t_date(date.attr("year").cast(), - date.attr("month").cast(), - date.attr("day").cast()); -} - -std::int64_t -pythondatetime_to_ms(t_val datetime) { - /** - * Rounding the python timestamp to an int causes microsecond-level precision issues. This can be exposed by - * passing a datetime with the `microsecond` field set to a roundable value, i.e. 5500. On conversion, the - * microsecond value becomes 6000 due to the rounding error. - */ - return static_cast(datetime.attr("timestamp")().cast() * 1000); -} - t_dtype type_string_to_t_dtype(std::string value, std::string name){ auto type = t_dtype::DTYPE_STR; diff --git a/python/perspective/perspective/src/view.cpp b/python/perspective/perspective/src/view.cpp index 89b43747b9..9f78b9de76 100644 --- a/python/perspective/perspective/src/view.cpp +++ b/python/perspective/perspective/src/view.cpp @@ -76,18 +76,26 @@ make_filter_term(t_dtype column_type, t_val date_parser, const std::string& colu case DTYPE_DATE: { if (py::isinstance(filter_term)) { t_val parsed_date = date_parser.attr("parse")(filter_term); - terms.push_back(mktscalar(pythondate_to_t_date(parsed_date))); + auto date_components = + date_parser.attr("to_date_components")(parsed_date).cast>(); + t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]); + terms.push_back(mktscalar(dt)); } else { - terms.push_back(mktscalar(pythondate_to_t_date(filter_term))); + auto date_components = + date_parser.attr("to_date_components")(filter_term).cast>(); + t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]); + terms.push_back(mktscalar(dt)); } } break; case DTYPE_TIME: { if (py::isinstance(filter_term)) { t_val parsed_date = date_parser.attr("parse")(filter_term); - t_tscalar timestamp = mktscalar(t_time(pythondatetime_to_ms(parsed_date))); + std::int64_t ts = date_parser.attr("to_timestamp")(parsed_date).cast(); + t_tscalar timestamp = mktscalar(t_time(ts)); terms.push_back(timestamp); } else { - t_tscalar timestamp = mktscalar(t_time(pythondatetime_to_ms(filter_term))); + t_tscalar timestamp = mktscalar( + t_time(date_parser.attr("to_timestamp")(filter_term).cast())); terms.push_back(timestamp); } } break; diff --git a/python/perspective/perspective/table/_accessor.py b/python/perspective/perspective/table/_accessor.py index 58a89428f4..53789558f2 100644 --- a/python/perspective/perspective/table/_accessor.py +++ b/python/perspective/perspective/table/_accessor.py @@ -5,12 +5,10 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # +from math import isnan from ._date_validator import _PerspectiveDateValidator from perspective.table.libbinding import t_dtype -try: - import pandas -except (ImportError, ModuleNotFoundError): - pandas = None +import pandas def _type_to_format(data_or_schema): @@ -42,7 +40,7 @@ def _type_to_format(data_or_schema): # Can't process raise NotImplementedError("Dict values must be list or type!") else: - if pandas is None or not (isinstance(data_or_schema, pandas.DataFrame) or isinstance(data_or_schema, pandas.Series)): + if not (isinstance(data_or_schema, pandas.DataFrame) or isinstance(data_or_schema, pandas.Series)): # if pandas not installed or is not a dataframe or series raise NotImplementedError("Must be dict or list!") else: @@ -133,12 +131,40 @@ def marshal(self, cidx, ridx, type): column_name = self._names[cidx] val = self.get(column_name, ridx) - # parse string dates/datetimes into objects - if isinstance(val, str) and type in (t_dtype.DTYPE_DATE, t_dtype.DTYPE_TIME): - val = self._date_validator.parse(val) + if val is None: + return val + + # first, check for numpy nans without using numpy.isnan as it tries to cast values + if isinstance(val, float) and isnan(val): + val = None elif isinstance(val, list) and len(val) == 1: - # implicit index: strip out + # strip out values encased lists val = val[0] + elif type == t_dtype.DTYPE_INT32 or type == t_dtype.DTYPE_INT64: + if not isinstance(val, bool) and isinstance(val, float): + # should be able to update int columns with either ints or floats + val = int(val) + elif type == t_dtype.DTYPE_FLOAT32 or type == t_dtype.DTYPE_FLOAT64: + if not isinstance(val, bool) and isinstance(val, int): + # should be able to update float columns with either ints or floats + val = float(val) + elif type == t_dtype.DTYPE_DATE: + # return datetime.date + if isinstance(val, str): + parsed = self._date_validator.parse(val) + val = self._date_validator.to_date_components(parsed) + else: + val = self._date_validator.to_date_components(val) + elif type == t_dtype.DTYPE_TIME: + # return unix timestamps for time + if isinstance(val, str): + parsed = self._date_validator.parse(val) + val = self._date_validator.to_timestamp(parsed) + else: + val = self._date_validator.to_timestamp(val) + elif type == t_dtype.DTYPE_STR: + val = str(val) + return val def has_column(self, ridx, name): diff --git a/python/perspective/perspective/table/_date_validator.py b/python/perspective/perspective/table/_date_validator.py index 3c6d951dd8..4ba61384d4 100644 --- a/python/perspective/perspective/table/_date_validator.py +++ b/python/perspective/perspective/table/_date_validator.py @@ -5,6 +5,9 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # +import time +import numpy +from datetime import datetime from dateutil.parser import parse from perspective.table.libbinding import t_dtype @@ -21,12 +24,60 @@ def parse(self, str): Params: str (str) : the datestring to parse + + Returns: + A datetime.date or datetime.datetime object if parse is successful, None otherwise ''' try: return parse(str) except (ValueError, OverflowError): return None + def to_date_components(self, d): + '''Return a dictionary of string keys and integer values for `year`, `month`, and `day`. + + This method converts both datetime.date and numpy.datetime64 objects that contain datetime.date. + ''' + if d is None: + return d + + if isinstance(d, numpy.datetime64): + if str(d) == "NaT": + return None + dt = d.astype(datetime) + return { + "year": dt.year, + "month": dt.month, + "day": dt.day + } + + return { + "year": d.year, + "month": d.month, + "day": d.day + } + + def to_timestamp(self, d): + '''Return an integer that corresponds to the Unix timestamp, i.e. number of milliseconds since epoch. + + This method converts both datetime.datetime and numpy.datetime64 objects. + ''' + if d is None: + return d + + if isinstance(d, numpy.datetime64): + if str(d) == "NaT": + return None + + d = d.astype(datetime) + + if isinstance(d, int): + # sometimes `astype(datetime)` returns an int timestamp in nanoseconds - parse this. + return round(d / 1000000) + + # Convert `datetime.datetime` and `pandas.Timestamp` to millisecond timestamps + return int((time.mktime(d.timetuple()) + d.microsecond / 1000000.0) * 1000) + def format(self, str): '''Return either t_dtype.DTYPE_DATE or t_dtype.DTYPE_TIME depending on the format of the parsed date. diff --git a/python/perspective/perspective/table/table.py b/python/perspective/perspective/table/table.py index ef6bc97b62..7090410d69 100644 --- a/python/perspective/perspective/table/table.py +++ b/python/perspective/perspective/table/table.py @@ -145,7 +145,6 @@ def update(self, data): index_pos = self._accessor._names.index(self._index) index_dtype = self._accessor._types[index_pos] self._accessor._types.append(index_dtype) - print(index_pos, index_dtype) else: self._accessor._types.append(t_dtype.DTYPE_INT32) diff --git a/python/perspective/perspective/table/view.py b/python/perspective/perspective/table/view.py index 3f3d6f485f..bdb9b87396 100644 --- a/python/perspective/perspective/table/view.py +++ b/python/perspective/perspective/table/view.py @@ -5,6 +5,7 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # +import pandas from functools import wraps from random import random from perspective.table.libbinding import make_view_zero, make_view_one, make_view_two @@ -246,7 +247,6 @@ def to_df(self, options=None): Returns: pandas.DataFrame : a pandas dataframe containing the serialized data. ''' - import pandas cols = self.to_numpy(options=options) return pandas.DataFrame(cols) diff --git a/python/perspective/perspective/tests/node/test_node.py b/python/perspective/perspective/tests/node/test_node.py index a52120f160..c7e4eb7f20 100644 --- a/python/perspective/perspective/tests/node/test_node.py +++ b/python/perspective/perspective/tests/node/test_node.py @@ -5,10 +5,8 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # - from perspective.node import Perspective - class TestNode(object): def test_table(self): psp = Perspective() diff --git a/python/perspective/perspective/tests/table/test_table_pandas.py b/python/perspective/perspective/tests/table/test_table_pandas.py index f83cb609c8..823b2d723d 100644 --- a/python/perspective/perspective/tests/table/test_table_pandas.py +++ b/python/perspective/perspective/tests/table/test_table_pandas.py @@ -5,7 +5,8 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # - +from io import StringIO +from datetime import date, datetime import numpy as np from perspective.table import Table from random import random, randint, choice @@ -44,18 +45,150 @@ def superstore(count=10): data.append(dat) return pd.DataFrame(data) - class TestTableNumpy(object): def test_empty_table(self): tbl = Table([]) assert tbl.size() == 0 def test_table_dataframe(self): - import pandas as pd data = pd.DataFrame([{"a": 1, "b": 2}, {"a": 3, "b": 4}]) tbl = Table(data) assert tbl.size() == 2 + def test_table_read_nan_int_col(self): + data = pd.DataFrame({"str": ["abc", float("nan"), "def"], "int": [np.nan, 1, 2]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "int": float # np.nan is float type - ints convert to floats when filled in + } + assert tbl.size() == 3 + assert tbl.view().to_dict() == { + "str": ["abc", None, "def"], + "int": [None, 1.0, 2.0] + } + + def test_table_read_nan_float_col(self): + data = pd.DataFrame({"str": [float("nan"), "abc", float("nan")], "float": [np.nan, 1.5, 2.5]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "float": float # can only promote to string or float + } + assert tbl.size() == 3 + assert tbl.view().to_dict() == { + "str": [None, "abc", None], + "float": [None, 1.5, 2.5] + } + + def test_table_read_nan_bool_col(self): + data = pd.DataFrame({"bool": [float("nan"), True, float("nan")], "bool2": [False, float("nan"), True]}) + tbl = Table(data) + # if np.nan begins a column, it is inferred as float and then can be promoted. if np.nan is in the values (but not at start), the column type is whatever is inferred. + assert tbl.schema() == { + "bool": str, + "bool2": bool + } + assert tbl.size() == 3 + # np.nans are always serialized as None + assert tbl.view().to_dict() == { + "bool": [None, "True", None], + "bool2": [False, None, True] + } + + def test_table_read_nan_date_col(self): + data = pd.DataFrame({"str": ["abc", "def"], "date": [float("nan"), date(2019, 7, 11)]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "date": str # can only promote to string or float + } + assert tbl.size() == 2 + assert tbl.view().to_dict() == { + "str": ["abc", "def"], + "date": [None, '2019-07-11'] + } + + def test_table_read_nan_datetime_col(self): + data = pd.DataFrame({"str": ["abc", "def"], "datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "datetime": datetime # can only promote to string or float + } + assert tbl.size() == 2 + assert tbl.view().to_dict() == { + "str": ["abc", "def"], + "datetime": [None, datetime(2019, 7, 11, 11, 0)] + } + + def test_table_read_nan_datetime_as_date_col(self): + data = pd.DataFrame({"str": ["abc", "def"], "datetime": [float("nan"), datetime(2019, 7, 11)]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "datetime": datetime # can only promote to string or float + } + assert tbl.size() == 2 + assert tbl.view().to_dict() == { + "str": ["abc", "def"], + "datetime": [None, datetime(2019, 7, 11)] + } + + def test_table_read_nan_datetime_no_seconds(self): + data = pd.DataFrame({"str": ["abc", "def"], "datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "datetime": datetime # can only promote to string or float + } + assert tbl.size() == 2 + assert tbl.view().to_dict() == { + "str": ["abc", "def"], + "datetime": [None, datetime(2019, 7, 11, 11, 0)] + } + + def test_table_read_nan_datetime_milliseconds(self): + data = pd.DataFrame({"str": ["abc", "def"], "datetime": [np.nan, datetime(2019, 7, 11, 10, 30, 55)]}) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "datetime": datetime # can only promote to string or float + } + assert tbl.size() == 2 + assert tbl.view().to_dict() == { + "str": ["abc", "def"], + "datetime": [None, datetime(2019, 7, 11, 10, 30, 55)] + } + + def test_table_correct_csv_nan_end(self): + csv = StringIO("str,int\n,1\n,2\nabc,3") + data = pd.read_csv(csv) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "int": int + } + assert tbl.size() == 3 + assert tbl.view().to_dict() == { + "str": [None, None, "abc"], + "int": [1, 2, 3] + } + + def test_table_correct_csv_nan_intermittent(self): + csv = StringIO("str,float\nabc,\n,2\nghi,") + data = pd.read_csv(csv) + tbl = Table(data) + assert tbl.schema() == { + "str": str, + "float": float + } + assert tbl.size() == 3 + assert tbl.view().to_dict() == { + "str": ["abc", None, "ghi"], + "float": [None, 2, None] + } + def test_table_series(self): import pandas as pd data = pd.Series([1, 2, 3], name="a") @@ -67,6 +200,7 @@ def test_rowpivots(self): df = superstore() df_pivoted = df.set_index(['Country', 'Region']) table = Table(df_pivoted) + assert table.size() == 10 def test_pivottable(self): df = superstore() diff --git a/python/perspective/perspective/tests/table/test_update.py b/python/perspective/perspective/tests/table/test_update.py index f28ff91d9c..2aadd23b72 100644 --- a/python/perspective/perspective/tests/table/test_update.py +++ b/python/perspective/perspective/tests/table/test_update.py @@ -5,7 +5,9 @@ # This file is part of the Perspective library, distributed under the terms of # the Apache License 2.0. The full license can be found in the LICENSE file. # -from pytest import mark +import numpy as np +import pandas as pd +from datetime import date, datetime from perspective.table import Table @@ -47,6 +49,381 @@ def test_update_columnar_partial(self): tbl.update({"a": ["abc"], "b": [456]}) assert tbl.view().to_records() == [{"a": "abc", "b": 456}] + # numpy array updates + + def test_update_np(self): + tbl = Table({"a": [1, 2, 3, 4]}) + tbl.update({"a": np.array([5, 6, 7, 8])}) + assert tbl.view().to_dict() == { + "a": [1, 2, 3, 4, 5, 6, 7, 8] + } + + def test_update_np_datetime(self): + tbl = Table({ + "a": [np.datetime64(datetime(2019, 7, 11, 11, 0))] + }) + + tbl.update({ + "a": np.array([datetime(2019, 7, 12, 11, 0)], dtype=datetime) + }) + + assert tbl.view().to_dict() == { + "a": [datetime(2019, 7, 11, 11, 0), datetime(2019, 7, 12, 11, 0)] + } + + def test_update_np_partial(self): + tbl = Table({ + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([5, 6, 7, 8]), + "b": np.array(["a", "b", "c", "d"], dtype=object) + }) + + assert tbl.view().to_dict() == { + "a": [5, 6, 7, 8], + "b": ["a", "b", "c", "d"] + } + + def test_update_np_partial_implicit(self): + tbl = Table({"a": [1, 2, 3, 4]}) + + tbl.update({ + "a": np.array([5, 6, 7, 8]), + "__INDEX__": np.array([0, 1, 2, 3]) + }) + + assert tbl.view().to_dict() == { + "a": [5, 6, 7, 8] + } + + def test_update_np_datetime_partial(self): + tbl = Table({ + "a": [np.datetime64(datetime(2019, 7, 11, 11, 0))], + "b": [1] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([datetime(2019, 7, 12, 11, 0)], dtype=datetime), + "b": np.array([1]) + }) + + assert tbl.view().to_dict() == { + "a": [datetime(2019, 7, 12, 11, 0)], + "b": [1] + } + + def test_update_np_nonseq_partial(self): + tbl = Table({ + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([5, 6, 7]), + "b": np.array(["a", "c", "d"], dtype=object)} + ) + + assert tbl.view().to_dict() == { + "a": [5, 2, 6, 7], + "b": ["a", "b", "c", "d"] + } + + def test_update_np_with_none_partial(self): + tbl = Table({ + "a": [1, np.nan, 3], + "b": ["a", None, "d"] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([4, 5]), + "b": np.array(["a", "d"], dtype=object) + }) + + assert tbl.view().to_dict() == { + "a": [None, 4, 5], + "b": [None, "a", "d"] # pkeys are ordered + } + + def test_update_np_unset_partial(self): + tbl = Table({ + "a": [1, 2, 3], + "b": ["a", "b", "c"] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([None, None]), + "b": np.array(["a", "c"], dtype=object) + }) + + assert tbl.view().to_dict() == { + "a": [None, 2, None], + "b": ["a", "b", "c"] + } + + def test_update_np_nan_partial(self): + tbl = Table({ + "a": [1, 2, 3], + "b": ["a", "b", "c"] + }, {"index": "b"}) + + tbl.update({ + "a": np.array([None, None]), + "b": np.array(["a", "c"], dtype=object) + }) + + assert tbl.view().to_dict() == { + "a": [None, 2, None], + "b": ["a", "b", "c"] + } + + # pandas dataframe updates + + def test_update_df(self): + tbl = Table({"a": [1, 2, 3, 4]}) + + update_data = pd.DataFrame({ + "a": [5, 6, 7, 8] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [1, 2, 3, 4, 5, 6, 7, 8] + } + + def test_update_df_datetime(self): + tbl = Table({"a": [np.datetime64(datetime(2019, 7, 11, 11, 0))]}) + + update_data = pd.DataFrame({ + "a": [datetime(2019, 7, 12, 11, 0)] + }) + + tbl.update(update_data) + assert tbl.view().to_dict() == { + "a": [datetime(2019, 7, 11, 11, 0), datetime(2019, 7, 12, 11, 0)] + } + + def test_update_df_partial(self): + tbl = Table({ + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [5, 6, 7, 8], + "b": ["a", "b", "c", "d"] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [5, 6, 7, 8], + "b": ["a", "b", "c", "d"] + } + + def test_update_df_partial_implicit(self): + tbl = Table({"a": [1, 2, 3, 4]}) + + update_data = pd.DataFrame({ + "a": [5, 6, 7, 8], + "__INDEX__": [0, 1, 2, 3] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [5, 6, 7, 8] + } + + def test_update_df_datetime_partial(self): + tbl = Table({ + "a": [np.datetime64(datetime(2019, 7, 11, 11, 0))], + "b": [1] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [datetime(2019, 7, 12, 11, 0)], + "b": [1] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [datetime(2019, 7, 12, 11, 0)], + "b": [1] + } + + def test_update_df_nonseq_partial(self): + tbl = Table({ + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [5, 6, 7], + "b": ["a", "c", "d"] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [5, 2, 6, 7], + "b": ["a", "b", "c", "d"] + } + + def test_update_df_with_none_partial(self): + tbl = Table({ + "a": [1, np.nan, 3], + "b": ["a", None, "d"] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [4, 5], + "b": ["a", "d"] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [None, 4, 5], + "b": [None, "a", "d"] # pkeys are ordered + } + + def test_update_df_unset_partial(self): + tbl = Table({ + "a": [1, 2, 3], + "b": ["a", "b", "c"] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [None, None], + "b": ["a", "c"] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [None, 2, None], + "b": ["a", "b", "c"] + } + + def test_update_df_nan_partial(self): + tbl = Table({ + "a": [1, 2, 3], + "b": ["a", "b", "c"] + }, {"index": "b"}) + + update_data = pd.DataFrame({ + "a": [None, None], + "b": ["a", "c"] + }) + + tbl.update(update_data) + + assert tbl.view().to_dict() == { + "a": [None, 2, None], + "b": ["a", "b", "c"] + } + + # dates and datetimes + def test_update_date(self): + tbl = Table({"a": [date(2019, 7, 11)]}) + tbl.update([{"a": date(2019, 7, 12)}]) + assert tbl.view().to_records() == [ + {"a": datetime(2019, 7, 11, 0, 0)}, + {"a": datetime(2019, 7, 12, 0, 0)} + ] + + def test_update_date_np(self): + tbl = Table({"a": [date(2019, 7, 11)]}) + tbl.update([{"a": np.datetime64(date(2019, 7, 12))}]) + assert tbl.view().to_records() == [ + {"a": datetime(2019, 7, 11, 0, 0)}, + {"a": datetime(2019, 7, 12, 0, 0)} + ] + + def test_update_datetime(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": datetime(2019, 7, 12, 11, 0)}]) + assert tbl.view().to_records() == [ + {"a": datetime(2019, 7, 11, 11, 0)}, + {"a": datetime(2019, 7, 12, 11, 0)} + ] + + def test_update_datetime_np(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": np.datetime64(datetime(2019, 7, 12, 11, 0))}]) + assert tbl.view().to_records() == [ + {"a": datetime(2019, 7, 11, 11, 0)}, + {"a": datetime(2019, 7, 12, 11, 0)} + ] + + def test_update_datetime_np_ts(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": np.datetime64("2019-07-12T11:00")}]) + assert tbl.view().to_records() == [ + {"a": datetime(2019, 7, 11, 11, 0)}, + {"a": datetime(2019, 7, 12, 11, 0)} + ] + + # partial date & datetime updates + + def test_update_date_partial(self): + tbl = Table({"a": [date(2019, 7, 11)], "b": [1]}, {"index": "b"}) + tbl.update([{"a": date(2019, 7, 12), "b": 1}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 0, 0), "b": 1}] + + def test_update_date_np_partial(self): + tbl = Table({"a": [date(2019, 7, 11)], "b": [1]}, {"index": "b"}) + tbl.update([{"a": np.datetime64(date(2019, 7, 12)), "b": 1}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 0, 0), "b": 1}] + + def test_update_datetime_partial(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)], "b": [1]}, {"index": "b"}) + tbl.update([{"a": datetime(2019, 7, 12, 11, 0), "b": 1}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0), "b": 1}] + + def test_update_datetime_np_partial(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)], "b": [1]}, {"index": "b"}) + tbl.update([{"a": np.datetime64(datetime(2019, 7, 12, 11, 0)), "b": 1}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0), "b": 1}] + + def test_update_datetime_np_ts_partial(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)], "b": [1]}, {"index": "b"}) + tbl.update([{"a": np.datetime64("2019-07-12T11:00"), "b": 1}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0), "b": 1}] + + # updating dates using implicit index + + def test_update_date_partial_implicit(self): + tbl = Table({"a": [date(2019, 7, 11)]}) + tbl.update([{"a": date(2019, 7, 12), "__INDEX__": 0}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 0, 0)}] + + def test_update_date_np_partial_implicit(self): + tbl = Table({"a": [date(2019, 7, 11)]}) + tbl.update([{"a": np.datetime64(date(2019, 7, 12)), "__INDEX__": 0}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 0, 0)}] + + def test_update_datetime_partial_implicit(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": datetime(2019, 7, 12, 11, 0), "__INDEX__": 0}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0)}] + + def test_update_datetime_np_partial_implicit(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": np.datetime64(datetime(2019, 7, 12, 11, 0)), "__INDEX__": 0}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0)}] + + def test_update_datetime_np_ts_partial_implicit(self): + tbl = Table({"a": [datetime(2019, 7, 11, 11, 0)]}) + tbl.update([{"a": np.datetime64("2019-07-12T11:00"), "__INDEX__": 0}]) + assert tbl.view().to_records() == [{"a": datetime(2019, 7, 12, 11, 0)}] + + # implicit index def test_update_implicit_index(self): @@ -143,26 +520,23 @@ def test_update_explicit_index_multi_append_noindex(self): }]) assert view.to_records() == [{"a": None, "b": 5}, {"a": 1, "b": 3}, {"a": 2, "b": 3}, {"a": 3, "b": 4}] - @mark.skip def test_update_implicit_index_with_explicit_unset(self): data = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] tbl = Table(data, {"index": "a"}) view = tbl.view() tbl.update([{ - "__INDEX__": [0], + "__INDEX__": [1], "b": 3 }]) assert view.to_records() == [{"a": 1, "b": 3}, {"a": 2, "b": 3}] - @mark.skip def test_update_implicit_index_with_explicit_set(self): - # should abort() data = [{"a": 1, "b": 2}, {"a": 2, "b": 3}] tbl = Table(data, {"index": "a"}) view = tbl.view() tbl.update([{ - "__INDEX__": [0], - "a": 1, + "__INDEX__": [1], + "a": 1, # should ignore re-specification of pkey "b": 3 }]) assert view.to_records() == [{"a": 1, "b": 3}, {"a": 2, "b": 3}] diff --git a/python/perspective/perspective/tests/table/test_view.py b/python/perspective/perspective/tests/table/test_view.py index 33d1387427..10f0b38b27 100644 --- a/python/perspective/perspective/tests/table/test_view.py +++ b/python/perspective/perspective/tests/table/test_view.py @@ -6,6 +6,7 @@ # the Apache License 2.0. The full license can be found in the LICENSE file. # +import numpy as np from perspective.table import Table from datetime import date, datetime @@ -342,6 +343,18 @@ def test_view_filter_date_neq(self): view = tbl.view({"filter": [["a", "!=", date(2019, 7, 12)]]}) assert view.to_records() == [{"a": datetime(2019, 7, 11), "b": 2}] + def test_view_filter_date_np_eq(self): + data = [{"a": date(2019, 7, 11), "b": 2}, {"a": date(2019, 7, 12), "b": 4}] + tbl = Table(data) + view = tbl.view({"filter": [["a", "==", np.datetime64(date(2019, 7, 12))]]}) + assert view.to_records() == [{"a": datetime(2019, 7, 12), "b": 4}] + + def test_view_filter_date_np_neq(self): + data = [{"a": date(2019, 7, 11), "b": 2}, {"a": date(2019, 7, 12), "b": 4}] + tbl = Table(data) + view = tbl.view({"filter": [["a", "!=", np.datetime64(date(2019, 7, 12))]]}) + assert view.to_records() == [{"a": datetime(2019, 7, 11), "b": 2}] + def test_view_filter_date_str_eq(self): data = [{"a": date(2019, 7, 11), "b": 2}, {"a": date(2019, 7, 12), "b": 4}] tbl = Table(data) @@ -366,6 +379,18 @@ def test_view_filter_datetime_neq(self): view = tbl.view({"filter": [["a", "!=", datetime(2019, 7, 11, 8, 15)]]}) assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + def test_view_filter_datetime_np_eq(self): + data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + tbl = Table(data) + view = tbl.view({"filter": [["a", "==", np.datetime64(datetime(2019, 7, 11, 8, 15))]]}) + assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}] + + def test_view_filter_datetime_np_neq(self): + data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + tbl = Table(data) + view = tbl.view({"filter": [["a", "!=", np.datetime64(datetime(2019, 7, 11, 8, 15))]]}) + assert view.to_records() == [{"a": datetime(2019, 7, 11, 8, 16), "b": 4}] + def test_view_filter_datetime_str_eq(self): data = [{"a": datetime(2019, 7, 11, 8, 15), "b": 2}, {"a": datetime(2019, 7, 11, 8, 16), "b": 4}] tbl = Table(data) diff --git a/python/perspective/requirements-dev.txt b/python/perspective/requirements-dev.txt index 6998dc313e..6e0b7f9c5e 100644 --- a/python/perspective/requirements-dev.txt +++ b/python/perspective/requirements-dev.txt @@ -4,6 +4,7 @@ flake8>=3.7.8 ipywidgets>=7.4.2 numpy>=1.8.1 matplotlib>=3.0.0 +mock pandas>=0.22.0 psutil>=5.4.8 pytest>=4.3.0