Skip to content

Commit

Permalink
Showing 3 changed files with 106 additions and 109 deletions.
86 changes: 7 additions & 79 deletions packages/perspective/src/js/parse_data.js
Original file line number Diff line number Diff line change
@@ -39,82 +39,6 @@ export class DataParser {
}
}

data_types(__MODULE__, data, format, column_names) {
let types = [];

if (!column_names) {
throw "Cannot determine data types without column names!";
}

if (format === this.data_formats.schema) {
for (let name in data) {
const dtypes = __MODULE__.t_dtype;
let type = undefined;
switch (data[name]) {
case "integer":
type = dtypes.DTYPE_INT32;
break;
case "float":
type = dtypes.DTYPE_FLOAT64;
break;
case "string":
type = dtypes.DTYPE_STR;
break;
case "boolean":
type = dtypes.DTYPE_BOOL;
break;
case "datetime":
type = dtypes.DTYPE_TIME;
break;
case "date":
type = dtypes.DTYPE_DATE;
break;
default:
throw `Unknown type: ${name}`;
}

types.push(type);
}
return types;
}

for (let name of column_names) {
let type = __MODULE__.get_data_type(data, format, name, moment, DATE_PARSE_CANDIDATES); //this.get_data_type(__MODULE__, data, format, name);
types.push(type);
}

return types;
}

get_data_type(__MODULE__, data, format, name) {
let i = 0;
let inferredType = undefined;

if (format === this.data_formats.row) {
while (inferredType === undefined && i < 100 && i < data.length) {
if (data[i].hasOwnProperty(name)) {
if (data[i][name] !== null) {
inferredType = __MODULE__.infer_type(data[i][name], moment, DATE_PARSE_CANDIDATES);
} else {
inferredType = null;
}
}
i++;
}
} else if (format === this.data_formats.column) {
while (inferredType === undefined && i < 100 && i < data[name].length) {
if (data[name][i] !== null) {
inferredType = __MODULE__.infer_type(data[name][i], moment, DATE_PARSE_CANDIDATES);
} else {
inferredType = null;
}
i++;
}
}

return inferredType || __MODULE__.t_dtype.DTYPE_STR;
}

make_columnar_data(__MODULE__, data, format, column_names, data_types) {
let cdata = [];
let row_count = 0;
@@ -142,12 +66,16 @@ export class DataParser {
}

switch (type.value) {
case __MODULE__.t_dtype.DTYPE_FLOAT64.value:
case __MODULE__.t_dtype.DTYPE_FLOAT64.value: {
col.push(Number(val));
break;
}
case __MODULE__.t_dtype.DTYPE_INT32.value: {
col.push(Number(val));
if (val > 2147483647 || val < -2147483648) {
// Avoid overflow errors
// FIXME: fully avoid overflow errors
data_types[column_names.indexOf(name)] = __MODULE__.t_dtype.DTYPE_FLOAT64;
console.warn(`Promoting type of column ${name} from Integer to Float type.`);
}
break;
}
@@ -213,7 +141,7 @@ export class DataParser {
parse(__MODULE__, data) {
const format = this.is_format(data);
let names = __MODULE__.column_names(data, format);
let types = this.data_types(__MODULE__, data, format, names);
let types = __MODULE__.data_types(data, format, names, moment, DATE_PARSE_CANDIDATES);
let [cdata, row_count] = this.make_columnar_data(__MODULE__, data, format, names, types);
return {cdata, names, types, row_count, is_arrow: false};
}
19 changes: 19 additions & 0 deletions packages/perspective/test/js/constructors.js
Original file line number Diff line number Diff line change
@@ -462,6 +462,25 @@ module.exports = perspective => {
table.delete();
});

it.skip("Upgrades integer columns with values beyond max/min_int to float", async function() {
const schema = {
a: "integer"
};

const int_to_float = {
a: [1, 2, 3, 2147483667, 5]
};

var table = perspective.table(schema);
var schema_1 = await table.schema();
expect(schema_1["a"]).toEqual("integer");

table.update(int_to_float);

var schema_2 = await table.schema();
expect(schema_2["a"]).toEqual("float");
});

it("has correct size", async function() {
var table = perspective.table(data);
let result = await table.size();
110 changes: 80 additions & 30 deletions src/cpp/main.cpp
Original file line number Diff line number Diff line change
@@ -874,6 +874,38 @@ is_valid_date(val moment, val candidates, val x) {
.as<t_bool>();
}

// Name parsing
val
column_names(val data, t_int32 format) {
val column_names = val::array();
val Object = val::global("Object");

if (format == 1) {
t_int32 max_check = 50;
column_names = Object.call<val>("keys", data[0]);
t_int32 check_index = val::global("Math").call<val>("min", val(max_check), val(data["length"])).as<t_int32>();

for (auto ix = 0; ix < check_index; ix++) {
val next = Object.call<val>("keys", data[ix]);
if (column_names["length"] != next["length"]) {
if (max_check == 50) {
std::cout << "Data parse warning: Array data has inconsistent rows" << std::endl;
}

std::cout << boost::format("Extending from %d to %d") % column_names["length"].as<t_int32>() % next["length"].as<t_int32>() << std::endl;
column_names = next;
max_check *= 2;
}

}
} else if (format == 2 || format == 3) {
column_names = Object.call<val>("keys", data);
}

return column_names;
}

// Type inferrence
t_dtype
infer_type(val x, val moment, val candidates) {
t_str jstype = x.typeOf().as<t_str>();
@@ -917,36 +949,6 @@ infer_type(val x, val moment, val candidates) {
return t;
}

val
column_names(val data, t_int32 format) {
val column_names = val::array();
val Object = val::global("Object");

if (format == 1) {
t_int32 max_check = 50;
column_names = Object.call<val>("keys", data[0]);
t_int32 check_index = val::global("Math").call<val>("min", val(max_check), val(data["length"])).as<t_int32>();

for (auto ix = 0; ix < check_index; ix++) {
val next = Object.call<val>("keys", data[ix]);
if (column_names["length"] != next["length"]) {
if (max_check == 50) {
std::cout << "Data parse warning: Array data has inconsistent rows" << std::endl;
}

std::cout << boost::format("Extending from %d to %d") % column_names["length"].as<t_int32>() % next["length"].as<t_int32>() << std::endl;
column_names = next;
max_check *= 2;
}

}
} else if (format == 2 || format == 3) {
column_names = Object.call<val>("keys", data);
}

return column_names;
}

t_dtype
get_data_type(val data, t_int32 format, t_str name, val moment, val candidates) {
t_int32 i = 0;
@@ -984,6 +986,53 @@ get_data_type(val data, t_int32 format, t_str name, val moment, val candidates)
}
}

val
data_types(val data, t_int32 format, val column_names, val moment, val candidates) {
t_int32 names_length = column_names["length"].as<t_int32>();
if (names_length == 0) {
throw std::invalid_argument("Cannot determine data types without column names!");
}

val types = val::array();

if (format == 3) {
val names_from_data = val::global("Object").call<val>("keys", data);

for (t_int32 i = 0; i < names_from_data["length"].as<t_int32>(); i++) {
t_str value = data[names_from_data[i]].as<t_str>();
t_dtype type = t_dtype::DTYPE_PTR; // use a type we don't use in the JS library as a flag

if (value == "integer") {
type = t_dtype::DTYPE_INT32;
} else if (value == "float") {
type = t_dtype::DTYPE_FLOAT64;
} else if (value == "string") {
type = t_dtype::DTYPE_STR;
} else if (value == "boolean") {
type = t_dtype::DTYPE_BOOL;
} else if (value == "datetime") {
type = t_dtype::DTYPE_TIME;
} else if (value == "date") {
type = t_dtype::DTYPE_DATE;
} else {
throw std::logic_error("Unknown type!");
}

types.call<void>("push", type);
}

return types;
}

for (t_int32 i = 0; i < names_length; i++) {
t_str name = column_names[i].as<t_str>();
t_dtype type = get_data_type(data, format, name, moment, candidates);
types.call<void>("push", type);
}

return types;
}

/**
* Create a default gnode.
*
@@ -1577,6 +1626,7 @@ EMSCRIPTEN_BINDINGS(perspective) {
function("infer_type", &infer_type);
function("column_names", &column_names);
function("get_data_type", &get_data_type);
function("data_types", &data_types);
function("make_table", &make_table, allow_raw_pointers());
function("make_gnode", &make_gnode);
function("clone_gnode_table", &clone_gnode_table, allow_raw_pointers());

0 comments on commit 0fa9f56

Please sign in to comment.