Skip to content

Commit cafaec6

Browse files
authored
Merge pull request #1044 from finos/py-segfaults
Remove duplicate `psp_okey` column from arrow updates
2 parents 1cfe88f + 9dd545e commit cafaec6

File tree

3 files changed

+56
-4
lines changed

3 files changed

+56
-4
lines changed

cpp/perspective/src/cpp/emscripten.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,8 @@ namespace binding {
10641064

10651065
// Always use the `Table` column names and data types on up
10661066
if (table_initialized && is_update) {
1067-
auto schema = gnode->get_output_schema();
1067+
auto gnode_output_schema = gnode->get_output_schema();
1068+
auto schema = gnode_output_schema.drop({"psp_okey"});
10681069
column_names = schema.columns();
10691070
data_types = schema.types();
10701071

@@ -1099,7 +1100,7 @@ namespace binding {
10991100
}
11001101

11011102
// Updated data types need to reflect in new data table
1102-
auto new_schema = gnode->get_output_schema();
1103+
auto new_schema = gnode->get_output_schema().drop({"psp_okey"});
11031104
data_types = new_schema.types();
11041105
}
11051106
} else {

python/perspective/perspective/src/table.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor,
6262

6363
// Always use the `Table` column names and data types on update.
6464
if (table_initialized && is_update) {
65-
auto schema = gnode->get_output_schema();
65+
auto gnode_output_schema = gnode->get_output_schema();
66+
auto schema = gnode_output_schema.drop({"psp_okey"});
6667
column_names = schema.columns();
6768
data_types = schema.types();
6869

@@ -97,7 +98,7 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor,
9798
}
9899
}
99100
// Make sure promoted types are used to construct data table
100-
auto new_schema = gnode->get_output_schema();
101+
auto new_schema = gnode->get_output_schema().drop({"psp_okey"});
101102
data_types = new_schema.types();
102103
} else {
103104
column_names = arrow_loader.names();

python/perspective/perspective/tests/table/test_update_arrow.py

+50
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
#
88

99
import os
10+
import random
11+
import uuid
1012
import pyarrow as pa
1113
from datetime import date, datetime
14+
from pytest import mark
1215
from perspective.table import Table
1316

1417
SOURCE_STREAM_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "int_float_str.arrow")
@@ -476,3 +479,50 @@ def test_update_arrow_column_order_int(self, util):
476479
assert tbl.view().to_dict() == {
477480
name: data[0] for name in names
478481
}
482+
483+
def test_update_arrow_thread_safe_int_index(self, util):
484+
data = [["a", "b", "c"] for i in range(10)]
485+
data += [[1, 2, 3]]
486+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
487+
arrow = util.make_arrow(names, data)
488+
tbl = Table(arrow, index="uid")
489+
490+
for i in range(100):
491+
idx = (1, 2, 3)[random.randint(0, 2)]
492+
update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
493+
update_names = [names[random.randint(0, 9)], "uid"]
494+
update_arrow = util.make_arrow(update_names, update_data)
495+
tbl.update(update_arrow)
496+
497+
assert tbl.size() == 3
498+
499+
def test_update_arrow_thread_safe_datetime_index(self, util):
500+
data = [["a", "b", "c"] for i in range(10)]
501+
data += [[datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19)]]
502+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
503+
arrow = util.make_arrow(names, data)
504+
tbl = Table(arrow, index="uid")
505+
506+
for i in range(100):
507+
idx = (datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19))[random.randint(0, 2)]
508+
update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
509+
update_names = [names[random.randint(0, 9)], "uid"]
510+
update_arrow = util.make_arrow(update_names, update_data)
511+
tbl.update(update_arrow)
512+
513+
assert tbl.size() == 3
514+
515+
def test_update_arrow_thread_safe_str_index(self, util):
516+
data = [["a", "b", "c"] for i in range(11)]
517+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
518+
arrow = util.make_arrow(names, data)
519+
tbl = Table(arrow, index="uid")
520+
521+
for i in range(100):
522+
idx = ("a", "b", "c")[random.randint(0, 2)]
523+
update_data = [[str(uuid.uuid4()) + str(random.randint(100, 1000000000))], [idx]]
524+
update_names = [names[random.randint(0, 9)], "uid"]
525+
update_arrow = util.make_arrow(update_names, update_data)
526+
tbl.update(update_arrow)
527+
528+
assert tbl.size() == 3

0 commit comments

Comments
 (0)