Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read numpy nans/datetimes #741

Merged
merged 10 commits into from
Sep 25, 2019
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,4 @@ docs/static/arrow/
.perspectiverc

python/perspective/perspective/tests/table/psp_test
python/perspective/perspective/node/assets/zmq.node
python/perspective/perspective/node/assets/*
2 changes: 1 addition & 1 deletion cpp/perspective/src/include/perspective/val.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*/
#pragma once

#ifdef(PSP_ENABLE_PYTHON)
#ifdef PSP_ENABLE_PYTHON

#include <perspective/base.h>

Expand Down
54 changes: 0 additions & 54 deletions examples/simple/test.html

This file was deleted.

2 changes: 1 addition & 1 deletion packages/perspective/test/js/updates.js
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ module.exports = perspective => {
table.delete();
});

it("should apply mulitple sequential updates using '__INDEX__' on a table with explicit index set", async function() {
it("should apply multiple sequential updates using '__INDEX__' on a table with explicit index set", async function() {
let table = perspective.table(data, {index: "x"});
table.update([
{
Expand Down
4 changes: 2 additions & 2 deletions python/perspective/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ To build `perspective-python:table` from source, you'll need the following C++ d

- Python 3.7
- CMake
- Boost
- PyBind11
- numpy
- tbb

On MacOS, you should be able to install Boost, PyBind11, and tbb from brew:

```shell
brew install boost pybind11 tbb
brew install pybind11 tbb
```

And then install Python dependencies using pip:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
#include <pybind11/numpy.h>
#include <boost/optional.hpp>

/******************************************************************************
*
* Numpy includes
*/
#include <numpy/npy_math.h>

/******************************************************************************
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ static auto IS_BYTES = [](auto type_instance) { return type_instance.is(py::modu
*
* Date Parsing
*/
t_date pythondate_to_t_date(t_val date);
std::int64_t pythondatetime_to_ms(t_val datetime);

t_dtype type_string_to_t_dtype(std::string type, std::string name = "");
t_dtype type_string_to_t_dtype(py::str type, py::str name = "");
Expand Down
Binary file removed python/perspective/perspective/node/assets/zmq.node
Binary file not shown.
29 changes: 23 additions & 6 deletions python/perspective/perspective/src/fill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ _fill_col_time(t_data_accessor accessor, std::shared_ptr<t_column> col, std::str
continue;
}

col->set_nth(i, pythondatetime_to_ms(item));
col->set_nth(i, item.cast<std::int64_t>());
}
}
}
Expand Down Expand Up @@ -107,7 +107,10 @@ _fill_col_date(t_data_accessor accessor, std::shared_ptr<t_column> col, std::str
continue;
}

col->set_nth(i, pythondate_to_t_date(item));

auto date_components = item.cast<std::map<std::string, std::int32_t>>();
t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]);
col->set_nth(i, dt);
}
}
}
Expand Down Expand Up @@ -229,6 +232,7 @@ _fill_col_string(t_data_accessor accessor, std::shared_ptr<t_column> col, std::s
continue;
}

// convert to a python string first
std::wstring welem = item.cast<std::wstring>();
std::wstring_convert<utf16convert_type, wchar_t> converter;
std::string elem = converter.to_bytes(welem);
Expand Down Expand Up @@ -267,7 +271,7 @@ _fill_col_int64(t_data_accessor accessor, t_data_table& tbl, std::shared_ptr<t_c

double fval = item.cast<double>();
if (isnan(fval)) {
WARN("Promoting to string");
WARN("Promoting %s to string from int64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
Expand Down Expand Up @@ -327,7 +331,10 @@ set_column_nth(std::shared_ptr<t_column> col, t_uindex idx, t_val value) {
break;
}
case DTYPE_DATE: {
col->set_nth<t_date>(idx, pythondate_to_t_date(value), STATUS_VALID);
t_date dt = t_date(value.attr("year").cast<std::int32_t>(),
value.attr("month").cast<std::int32_t>(),
value.attr("day").cast<std::int32_t>());
col->set_nth<t_date>(idx, dt, STATUS_VALID);
break;
}
case DTYPE_TIME: {
Expand Down Expand Up @@ -404,13 +411,13 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl,
// inference checked the entire column/we could reset parsing.
double fval = item.cast<double>();
if (fval > 2147483647 || fval < -2147483648) {
WARN("Promoting to float");
WARN("Promoting %s to float from int32", name);
tbl.promote_column(name, DTYPE_FLOAT64, i, true);
col = tbl.get_column(name);
type = DTYPE_FLOAT64;
col->set_nth(i, fval);
} else if (isnan(fval)) {
WARN("Promoting to string");
WARN("Promoting column %s to string from int32", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
Expand All @@ -424,6 +431,16 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl,
col->set_nth(i, item.cast<float>());
} break;
case DTYPE_FLOAT64: {
bool is_float = py::isinstance<py::float_>(item);
bool is_numpy_nan = is_float && npy_isnan(item.cast<double>());
if (!is_float || is_numpy_nan) {
WARN("Promoting column %s to string from float64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
accessor, col, name, cidx, DTYPE_STR, is_arrow, is_update);
return;
}
col->set_nth(i, item.cast<double>());
} break;
default:
Expand Down
2 changes: 1 addition & 1 deletion python/perspective/perspective/src/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor, t_va
auto current_schema = current_data_table->get_schema();
for (auto idx = 0; idx < current_schema.m_types.size(); ++idx) {
if (data_types[idx] == DTYPE_INT64) {
WARN("Promoting int64 '" + column_names[idx] + "'");
WARN("Promoting %s to int64", column_names[idx]);
current_gnode->promote_column(column_names[idx], DTYPE_INT64);
}
}
Expand Down
21 changes: 0 additions & 21 deletions python/perspective/perspective/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,6 @@
namespace perspective {
namespace binding {

/******************************************************************************
*
* Date Parsing
*/
t_date
pythondate_to_t_date(t_val date) {
return t_date(date.attr("year").cast<std::int32_t>(),
date.attr("month").cast<std::int32_t>(),
date.attr("day").cast<std::int32_t>());
}

std::int64_t
pythondatetime_to_ms(t_val datetime) {
/**
* Rounding the python timestamp to an int causes microsecond-level precision issues. This can be exposed by
* passing a datetime with the `microsecond` field set to a roundable value, i.e. 5500. On conversion, the
* microsecond value becomes 6000 due to the rounding error.
*/
return static_cast<std::int64_t>(datetime.attr("timestamp")().cast<double>() * 1000);
}

t_dtype type_string_to_t_dtype(std::string value, std::string name){
auto type = t_dtype::DTYPE_STR;

Expand Down
16 changes: 12 additions & 4 deletions python/perspective/perspective/src/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,26 @@ make_filter_term(t_dtype column_type, t_val date_parser, const std::string& colu
case DTYPE_DATE: {
if (py::isinstance<py::str>(filter_term)) {
t_val parsed_date = date_parser.attr("parse")(filter_term);
terms.push_back(mktscalar(pythondate_to_t_date(parsed_date)));
auto date_components =
date_parser.attr("to_date_components")(parsed_date).cast<std::map<std::string, std::int32_t>>();
t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]);
terms.push_back(mktscalar(dt));
} else {
terms.push_back(mktscalar(pythondate_to_t_date(filter_term)));
auto date_components =
date_parser.attr("to_date_components")(filter_term).cast<std::map<std::string, std::int32_t>>();
t_date dt = t_date(date_components["year"], date_components["month"], date_components["day"]);
terms.push_back(mktscalar(dt));
}
} break;
case DTYPE_TIME: {
if (py::isinstance<py::str>(filter_term)) {
t_val parsed_date = date_parser.attr("parse")(filter_term);
t_tscalar timestamp = mktscalar(t_time(pythondatetime_to_ms(parsed_date)));
std::int64_t ts = date_parser.attr("to_timestamp")(parsed_date).cast<std::int64_t>();
t_tscalar timestamp = mktscalar(t_time(ts));
terms.push_back(timestamp);
} else {
t_tscalar timestamp = mktscalar(t_time(pythondatetime_to_ms(filter_term)));
t_tscalar timestamp = mktscalar(
t_time(date_parser.attr("to_timestamp")(filter_term).cast<std::int64_t>()));
terms.push_back(timestamp);
}
} break;
Expand Down
44 changes: 35 additions & 9 deletions python/perspective/perspective/table/_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
# This file is part of the Perspective library, distributed under the terms of
# the Apache License 2.0. The full license can be found in the LICENSE file.
#
from math import isnan
from ._date_validator import _PerspectiveDateValidator
from perspective.table.libbinding import t_dtype
try:
import pandas
except (ImportError, ModuleNotFoundError):
pandas = None
import pandas


def _type_to_format(data_or_schema):
Expand Down Expand Up @@ -42,7 +40,7 @@ def _type_to_format(data_or_schema):
# Can't process
raise NotImplementedError("Dict values must be list or type!")
else:
if pandas is None or not (isinstance(data_or_schema, pandas.DataFrame) or isinstance(data_or_schema, pandas.Series)):
if not (isinstance(data_or_schema, pandas.DataFrame) or isinstance(data_or_schema, pandas.Series)):
# if pandas not installed or is not a dataframe or series
raise NotImplementedError("Must be dict or list!")
else:
Expand Down Expand Up @@ -133,12 +131,40 @@ def marshal(self, cidx, ridx, type):
column_name = self._names[cidx]
val = self.get(column_name, ridx)

# parse string dates/datetimes into objects
if isinstance(val, str) and type in (t_dtype.DTYPE_DATE, t_dtype.DTYPE_TIME):
val = self._date_validator.parse(val)
if val is None:
return val

# first, check for numpy nans without using numpy.isnan as it tries to cast values
if isinstance(val, float) and isnan(val):
val = None
elif isinstance(val, list) and len(val) == 1:
# implicit index: strip out
# strip out values encased lists
val = val[0]
elif type == t_dtype.DTYPE_INT32 or type == t_dtype.DTYPE_INT64:
if not isinstance(val, bool) and isinstance(val, float):
# should be able to update int columns with either ints or floats
val = int(val)
elif type == t_dtype.DTYPE_FLOAT32 or type == t_dtype.DTYPE_FLOAT64:
if not isinstance(val, bool) and isinstance(val, int):
# should be able to update float columns with either ints or floats
val = float(val)
elif type == t_dtype.DTYPE_DATE:
# return datetime.date
if isinstance(val, str):
parsed = self._date_validator.parse(val)
val = self._date_validator.to_date_components(parsed)
else:
val = self._date_validator.to_date_components(val)
elif type == t_dtype.DTYPE_TIME:
# return unix timestamps for time
if isinstance(val, str):
parsed = self._date_validator.parse(val)
val = self._date_validator.to_timestamp(parsed)
else:
val = self._date_validator.to_timestamp(val)
elif type == t_dtype.DTYPE_STR:
val = str(val)

return val

def has_column(self, ridx, name):
Expand Down
Loading