From cf0712e8e9c1e1ef050700138703137ff7c532e2 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Wed, 5 Dec 2018 22:08:49 -0600 Subject: [PATCH 1/2] port data_types --- packages/perspective/src/js/parse_data.js | 2 +- src/cpp/main.cpp | 110 ++++++++++++++++------ 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/packages/perspective/src/js/parse_data.js b/packages/perspective/src/js/parse_data.js index 1b225929c9..0847e2eadf 100644 --- a/packages/perspective/src/js/parse_data.js +++ b/packages/perspective/src/js/parse_data.js @@ -213,7 +213,7 @@ export class DataParser { parse(__MODULE__, data) { const format = this.is_format(data); let names = __MODULE__.column_names(data, format); - let types = this.data_types(__MODULE__, data, format, names); + let types = __MODULE__.data_types(data, format, names, moment, DATE_PARSE_CANDIDATES); let [cdata, row_count] = this.make_columnar_data(__MODULE__, data, format, names, types); return {cdata, names, types, row_count, is_arrow: false}; } diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 01367ee7dc..01dbc9f0a7 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -874,6 +874,38 @@ is_valid_date(val moment, val candidates, val x) { .as(); } +// Name parsing +val +column_names(val data, t_int32 format) { + val column_names = val::array(); + val Object = val::global("Object"); + + if (format == 1) { + t_int32 max_check = 50; + column_names = Object.call("keys", data[0]); + t_int32 check_index = val::global("Math").call("min", val(max_check), val(data["length"])).as(); + + for (auto ix = 0; ix < check_index; ix++) { + val next = Object.call("keys", data[ix]); + if (column_names["length"] != next["length"]) { + if (max_check == 50) { + std::cout << "Data parse warning: Array data has inconsistent rows" << std::endl; + } + + std::cout << boost::format("Extending from %d to %d") % column_names["length"].as() % next["length"].as() << std::endl; + column_names = next; + max_check *= 2; + } + + } + } else if (format == 2 || format == 3) { + column_names = Object.call("keys", data); + } + + return column_names; +} + +// Type inferrence t_dtype infer_type(val x, val moment, val candidates) { t_str jstype = x.typeOf().as(); @@ -917,36 +949,6 @@ infer_type(val x, val moment, val candidates) { return t; } -val -column_names(val data, t_int32 format) { - val column_names = val::array(); - val Object = val::global("Object"); - - if (format == 1) { - t_int32 max_check = 50; - column_names = Object.call("keys", data[0]); - t_int32 check_index = val::global("Math").call("min", val(max_check), val(data["length"])).as(); - - for (auto ix = 0; ix < check_index; ix++) { - val next = Object.call("keys", data[ix]); - if (column_names["length"] != next["length"]) { - if (max_check == 50) { - std::cout << "Data parse warning: Array data has inconsistent rows" << std::endl; - } - - std::cout << boost::format("Extending from %d to %d") % column_names["length"].as() % next["length"].as() << std::endl; - column_names = next; - max_check *= 2; - } - - } - } else if (format == 2 || format == 3) { - column_names = Object.call("keys", data); - } - - return column_names; -} - t_dtype get_data_type(val data, t_int32 format, t_str name, val moment, val candidates) { t_int32 i = 0; @@ -984,6 +986,53 @@ get_data_type(val data, t_int32 format, t_str name, val moment, val candidates) } } +val +data_types(val data, t_int32 format, val column_names, val moment, val candidates) { + t_int32 names_length = column_names["length"].as(); + if (names_length == 0) { + throw std::invalid_argument("Cannot determine data types without column names!"); + } + + val types = val::array(); + + if (format == 3) { + val names_from_data = val::global("Object").call("keys", data); + + for (t_int32 i = 0; i < names_from_data["length"].as(); i++) { + t_str value = data[names_from_data[i]].as(); + t_dtype type = t_dtype::DTYPE_PTR; // use a type we don't use in the JS library as a flag + + if (value == "integer") { + type = t_dtype::DTYPE_INT32; + } else if (value == "float") { + type = t_dtype::DTYPE_FLOAT64; + } else if (value == "string") { + type = t_dtype::DTYPE_STR; + } else if (value == "boolean") { + type = t_dtype::DTYPE_BOOL; + } else if (value == "datetime") { + type = t_dtype::DTYPE_TIME; + } else if (value == "date") { + type = t_dtype::DTYPE_DATE; + } else { + throw std::logic_error("Unknown type!"); + } + + types.call("push", type); + } + + return types; + } + + for (t_int32 i = 0; i < names_length; i++) { + t_str name = column_names[i].as(); + t_dtype type = get_data_type(data, format, name, moment, candidates); + types.call("push", type); + } + + return types; +} + /** * Create a default gnode. * @@ -1577,6 +1626,7 @@ EMSCRIPTEN_BINDINGS(perspective) { function("infer_type", &infer_type); function("column_names", &column_names); function("get_data_type", &get_data_type); + function("data_types", &data_types); function("make_table", &make_table, allow_raw_pointers()); function("make_gnode", &make_gnode); function("clone_gnode_table", &clone_gnode_table, allow_raw_pointers()); From 08b0a5cae304e88ace11db5ae1e86973f038de96 Mon Sep 17 00:00:00 2001 From: Jun Tan Date: Fri, 7 Dec 2018 16:25:28 -0600 Subject: [PATCH 2/2] remove vestigial data_types in DataParser --- packages/perspective/src/js/parse_data.js | 84 ++------------------ packages/perspective/test/js/constructors.js | 19 +++++ 2 files changed, 25 insertions(+), 78 deletions(-) diff --git a/packages/perspective/src/js/parse_data.js b/packages/perspective/src/js/parse_data.js index 0847e2eadf..cf7af03174 100644 --- a/packages/perspective/src/js/parse_data.js +++ b/packages/perspective/src/js/parse_data.js @@ -39,82 +39,6 @@ export class DataParser { } } - data_types(__MODULE__, data, format, column_names) { - let types = []; - - if (!column_names) { - throw "Cannot determine data types without column names!"; - } - - if (format === this.data_formats.schema) { - for (let name in data) { - const dtypes = __MODULE__.t_dtype; - let type = undefined; - switch (data[name]) { - case "integer": - type = dtypes.DTYPE_INT32; - break; - case "float": - type = dtypes.DTYPE_FLOAT64; - break; - case "string": - type = dtypes.DTYPE_STR; - break; - case "boolean": - type = dtypes.DTYPE_BOOL; - break; - case "datetime": - type = dtypes.DTYPE_TIME; - break; - case "date": - type = dtypes.DTYPE_DATE; - break; - default: - throw `Unknown type: ${name}`; - } - - types.push(type); - } - return types; - } - - for (let name of column_names) { - let type = __MODULE__.get_data_type(data, format, name, moment, DATE_PARSE_CANDIDATES); //this.get_data_type(__MODULE__, data, format, name); - types.push(type); - } - - return types; - } - - get_data_type(__MODULE__, data, format, name) { - let i = 0; - let inferredType = undefined; - - if (format === this.data_formats.row) { - while (inferredType === undefined && i < 100 && i < data.length) { - if (data[i].hasOwnProperty(name)) { - if (data[i][name] !== null) { - inferredType = __MODULE__.infer_type(data[i][name], moment, DATE_PARSE_CANDIDATES); - } else { - inferredType = null; - } - } - i++; - } - } else if (format === this.data_formats.column) { - while (inferredType === undefined && i < 100 && i < data[name].length) { - if (data[name][i] !== null) { - inferredType = __MODULE__.infer_type(data[name][i], moment, DATE_PARSE_CANDIDATES); - } else { - inferredType = null; - } - i++; - } - } - - return inferredType || __MODULE__.t_dtype.DTYPE_STR; - } - make_columnar_data(__MODULE__, data, format, column_names, data_types) { let cdata = []; let row_count = 0; @@ -142,12 +66,16 @@ export class DataParser { } switch (type.value) { - case __MODULE__.t_dtype.DTYPE_FLOAT64.value: + case __MODULE__.t_dtype.DTYPE_FLOAT64.value: { + col.push(Number(val)); + break; + } case __MODULE__.t_dtype.DTYPE_INT32.value: { col.push(Number(val)); if (val > 2147483647 || val < -2147483648) { - // Avoid overflow errors + // FIXME: fully avoid overflow errors data_types[column_names.indexOf(name)] = __MODULE__.t_dtype.DTYPE_FLOAT64; + console.warn(`Promoting type of column ${name} from Integer to Float type.`); } break; } diff --git a/packages/perspective/test/js/constructors.js b/packages/perspective/test/js/constructors.js index eb2d03ed71..7e17decce6 100644 --- a/packages/perspective/test/js/constructors.js +++ b/packages/perspective/test/js/constructors.js @@ -462,6 +462,25 @@ module.exports = perspective => { table.delete(); }); + it.skip("Upgrades integer columns with values beyond max/min_int to float", async function() { + const schema = { + a: "integer" + }; + + const int_to_float = { + a: [1, 2, 3, 2147483667, 5] + }; + + var table = perspective.table(schema); + var schema_1 = await table.schema(); + expect(schema_1["a"]).toEqual("integer"); + + table.update(int_to_float); + + var schema_2 = await table.schema(); + expect(schema_2["a"]).toEqual("float"); + }); + it("has correct size", async function() { var table = perspective.table(data); let result = await table.size();