Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow partial updates on computed column source columns #729

Merged
merged 3 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cpp/perspective/src/cpp/base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,24 @@ dtype_to_str(t_dtype dtype) {
return str_dtype.str();
}

t_dtype
str_to_dtype(const std::string& typestring) {
// returns most commonly used types in the JS/python public APIs.
if (typestring == "integer") {
return DTYPE_INT32;
} else if (typestring == "float") {
return DTYPE_FLOAT64;
} else if (typestring == "boolean") {
return DTYPE_BOOL;
} else if (typestring == "date") {
return DTYPE_DATE;
} else if (typestring == "datetime") {
return DTYPE_TIME;
} else {
return DTYPE_STR;
}
}

std::string
filter_op_to_str(t_filter_op op) {
switch (op) {
Expand Down
207 changes: 108 additions & 99 deletions cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ namespace binding {

template <>
void
set_column_nth(t_column* col, t_uindex idx, t_val value) {
set_column_nth(std::shared_ptr<t_column> col, t_uindex idx, t_val value) {

// Check if the value is a javascript null
if (value.isNull()) {
Expand Down Expand Up @@ -1063,111 +1063,124 @@ namespace binding {

template <>
void
table_add_computed_column(t_data_table& table, t_val computed_defs) {
auto vcomputed_defs = vecFromArray<t_val, t_val>(computed_defs);
for (auto i = 0; i < vcomputed_defs.size(); ++i) {
t_val coldef = vcomputed_defs[i];
std::string name = coldef["column"].as<std::string>();
t_val inputs = coldef["inputs"];
t_val func = coldef["func"];
t_val type = coldef["type"];

std::string stype;

if (type.isUndefined()) {
stype = "string";
} else {
stype = type.as<std::string>();
}
add_computed_column(std::shared_ptr<t_data_table> table, const std::vector<t_uindex>& row_indices, t_val computed_def) {
std::uint32_t end = row_indices.size();
if (end == 0) {
// iterate through all rows if no row indices are specified
end = table->size();
}

t_dtype dtype;
if (stype == "integer") {
dtype = DTYPE_INT32;
} else if (stype == "float") {
dtype = DTYPE_FLOAT64;
} else if (stype == "boolean") {
dtype = DTYPE_BOOL;
} else if (stype == "date") {
dtype = DTYPE_DATE;
} else if (stype == "datetime") {
dtype = DTYPE_TIME;
} else {
dtype = DTYPE_STR;
}
t_val input_names = computed_def["inputs"];
std::vector<std::string> input_column_names = vecFromArray<t_val, std::string>(input_names);
std::string output_column_name = computed_def["column"].as<std::string>();
t_val type = computed_def["type"];
t_val computed_func = computed_def["func"];

std::string typestring;

// Get list of input column names
auto icol_names = vecFromArray<t_val, std::string>(inputs);
if (type.isUndefined()) {
typestring = "string";
} else {
typestring = type.as<std::string>();
}

// Get t_column* for all input columns
std::vector<const t_column*> icols;
for (const auto& cc : icol_names) {
icols.push_back(table._get_column(cc));
}
t_dtype output_column_dtype = str_to_dtype(typestring);

int arity = icols.size();
std::vector<std::shared_ptr<t_column>> input_columns;

// Add new column
t_column* out = table.add_column(name, dtype, true);
for (const auto& column_name : input_column_names) {
input_columns.push_back(table->get_column(column_name));
}

t_val i1 = t_val::undefined(), i2 = t_val::undefined(), i3 = t_val::undefined(),
i4 = t_val::undefined();
// don't double create output column
auto schema = table->get_schema();
std::shared_ptr<t_column> output_column;

t_uindex size = table.size();
for (t_uindex ridx = 0; ridx < size; ++ridx) {
t_val value = t_val::undefined();
if (schema.has_column(output_column_name)) {
output_column = table->get_column(output_column_name);
} else {
output_column = table->add_column_sptr(output_column_name, output_column_dtype, true);
}

switch (arity) {
case 0: {
value = func();
break;
}
case 1: {
i1 = scalar_to_val(icols[0]->get_scalar(ridx));
if (!i1.isNull()) {
value = func(i1);
}
break;
}
case 2: {
i1 = scalar_to_val(icols[0]->get_scalar(ridx));
i2 = scalar_to_val(icols[1]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull()) {
value = func(i1, i2);
}
break;
t_val i1 = t_val::undefined(), i2 = t_val::undefined(), i3 = t_val::undefined(),
i4 = t_val::undefined();

auto arity = input_columns.size();
for (t_uindex idx = 0; idx < end; ++idx) {
// iterate through row indices OR through all rows
t_uindex ridx;
if (row_indices.size() > 0) {
ridx = row_indices[idx];
} else {
ridx = idx;
}

t_val value = t_val::undefined();

switch (arity) {
case 0: {
value = computed_func();
break;
}
case 1: {
i1 = scalar_to_val(input_columns[0]->get_scalar(ridx));
if (!i1.isNull()) {
value = computed_func(i1);
}
case 3: {
i1 = scalar_to_val(icols[0]->get_scalar(ridx));
i2 = scalar_to_val(icols[1]->get_scalar(ridx));
i3 = scalar_to_val(icols[2]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull() && !i3.isNull()) {
value = func(i1, i2, i3);
}
break;
break;
}
case 2: {
i1 = scalar_to_val(input_columns[0]->get_scalar(ridx));
i2 = scalar_to_val(input_columns[1]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull()) {
value = computed_func(i1, i2);
}
case 4: {
i1 = scalar_to_val(icols[0]->get_scalar(ridx));
i2 = scalar_to_val(icols[1]->get_scalar(ridx));
i3 = scalar_to_val(icols[2]->get_scalar(ridx));
i4 = scalar_to_val(icols[3]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull() && !i3.isNull() && !i4.isNull()) {
value = func(i1, i2, i3, i4);
}
break;
break;
}
case 3: {
i1 = scalar_to_val(input_columns[0]->get_scalar(ridx));
i2 = scalar_to_val(input_columns[1]->get_scalar(ridx));
i3 = scalar_to_val(input_columns[2]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull() && !i3.isNull()) {
value = computed_func(i1, i2, i3);
}
default: {
// Don't handle other arity values
break;
break;
}
case 4: {
i1 = scalar_to_val(input_columns[0]->get_scalar(ridx));
i2 = scalar_to_val(input_columns[1]->get_scalar(ridx));
i3 = scalar_to_val(input_columns[2]->get_scalar(ridx));
i4 = scalar_to_val(input_columns[3]->get_scalar(ridx));
if (!i1.isNull() && !i2.isNull() && !i3.isNull() && !i4.isNull()) {
value = computed_func(i1, i2, i3, i4);
}
break;
}

if (!value.isUndefined()) {
set_column_nth(out, ridx, value);
default: {
// Don't handle other arity values
break;
}
}

if (!value.isUndefined()) {
set_column_nth(output_column, ridx, value);
}
}
}

template <>
std::vector<t_computed_column_def>
make_computed_lambdas(std::vector<t_val> computed) {
std::vector<t_computed_column_def> converted;
for (const auto& j_computed_def : computed) {
converted.push_back(
[j_computed_def](std::shared_ptr<t_data_table> table, const std::vector<t_uindex>& row_indices) {
add_computed_column(table, row_indices, j_computed_def);
});
}
return converted;
}

/******************************************************************************
*
* Fill tables with data
Expand Down Expand Up @@ -1348,11 +1361,6 @@ namespace binding {
// write data at the correct row
_fill_data(data_table, accessor, input_schema, index, offset, limit, is_arrow, is_update);

if (!computed.isUndefined()) {
// re-add computed columns after update, delete, etc.
table_add_computed_column(data_table, computed);
}

// calculate offset, limit, and set the gnode
tbl->init(data_table, row_count, op);
return tbl;
Expand All @@ -1362,11 +1370,13 @@ namespace binding {
std::shared_ptr<Table>
make_computed_table(std::shared_ptr<Table> table, t_val computed) {
auto gnode = table->get_gnode();

t_data_table* data_table = gnode->_get_pkeyed_table();
table_add_computed_column(*data_table, computed);
table->replace_data_table(data_table);

auto pkeyed_table = gnode->get_pkeyed_table_sptr();
auto computed_defs = vecFromArray<t_val, t_val>(computed);
auto computed_lambdas = make_computed_lambdas(computed_defs);
for (const auto lambda : computed_lambdas) {
lambda(pkeyed_table, {});
}
table->add_computed_columns(pkeyed_table, computed_lambdas);
return table;
}

Expand Down Expand Up @@ -2006,7 +2016,6 @@ EMSCRIPTEN_BINDINGS(perspective) {
function("make_computed_table", &make_computed_table<t_val>);
function("scalar_vec_to_val", &scalar_vec_to_val);
function("scalar_vec_to_string", &scalar_vec_to_string);
function("table_add_computed_column", &table_add_computed_column<t_val>);
function("col_to_js_typed_array", &col_to_js_typed_array);
function("make_view_zero", &make_view<t_ctx0>);
function("make_view_one", &make_view<t_ctx1>);
Expand Down
Loading