Skip to content

Commit 74406e9

Browse files
Merge pull request #4 from pcmoritz/key_tuples
Tuple keys in dicts can be serialized
2 parents 7b548bd + 5c409c3 commit 74406e9

File tree

4 files changed

+30
-23
lines changed

4 files changed

+30
-23
lines changed

cpp/src/numbuf/dict.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ using namespace arrow;
55
namespace numbuf {
66

77
std::shared_ptr<arrow::StructArray> DictBuilder::Finish(
8-
std::shared_ptr<Array> list_data,
9-
std::shared_ptr<Array> tuple_data,
10-
std::shared_ptr<Array> dict_data) {
8+
std::shared_ptr<Array> key_tuple_data,
9+
std::shared_ptr<Array> val_list_data,
10+
std::shared_ptr<Array> val_tuple_data,
11+
std::shared_ptr<Array> val_dict_data) {
1112
// lists and dicts can't be keys of dicts in Python, that is why for
1213
// the keys we do not need to collect sublists
13-
auto keys = keys_.Finish(nullptr, nullptr, nullptr);
14-
auto vals = vals_.Finish(list_data, tuple_data, dict_data);
14+
auto keys = keys_.Finish(nullptr, key_tuple_data, nullptr);
15+
auto vals = vals_.Finish(val_list_data, val_tuple_data, val_dict_data);
1516
auto keys_field = std::make_shared<Field>("keys", keys->type());
1617
auto vals_field = std::make_shared<Field>("vals", vals->type());
1718
auto type = std::make_shared<StructType>(std::vector<FieldPtr>({keys_field, vals_field}));

cpp/src/numbuf/dict.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ class DictBuilder {
3434
value list of the dictionary
3535
*/
3636
std::shared_ptr<arrow::StructArray> Finish(
37-
std::shared_ptr<arrow::Array> list_data,
38-
std::shared_ptr<arrow::Array> tuple_data,
39-
std::shared_ptr<arrow::Array> dict_data);
37+
std::shared_ptr<arrow::Array> key_tuple_data,
38+
std::shared_ptr<arrow::Array> val_list_data,
39+
std::shared_ptr<arrow::Array> val_tuple_data,
40+
std::shared_ptr<arrow::Array> val_dict_data);
4041

4142
private:
4243
SequenceBuilder keys_;

python/src/pynumbuf/adapters/python.cc

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -171,29 +171,33 @@ Status DeserializeTuple(std::shared_ptr<Array> array, int32_t start_idx, int32_t
171171

172172
Status SerializeDict(std::vector<PyObject*> dicts, std::shared_ptr<Array>* out) {
173173
DictBuilder result;
174-
std::vector<PyObject*> sublists, subtuples, subdicts, dummy;
174+
std::vector<PyObject*> key_tuples, val_lists, val_tuples, val_dicts, dummy;
175175
for (const auto& dict : dicts) {
176176
PyObject *key, *value;
177177
Py_ssize_t pos = 0;
178178
while (PyDict_Next(dict, &pos, &key, &value)) {
179-
RETURN_NOT_OK(append(key, result.keys(), dummy, dummy, dummy));
179+
RETURN_NOT_OK(append(key, result.keys(), dummy, key_tuples, dummy));
180180
DCHECK(dummy.size() == 0);
181-
RETURN_NOT_OK(append(value, result.vals(), sublists, subtuples, subdicts));
181+
RETURN_NOT_OK(append(value, result.vals(), val_lists, val_tuples, val_dicts));
182182
}
183183
}
184-
std::shared_ptr<Array> val_list;
185-
if (sublists.size() > 0) {
186-
RETURN_NOT_OK(SerializeSequences(sublists, &val_list));
184+
std::shared_ptr<Array> key_tuples_arr;
185+
if (key_tuples.size() > 0) {
186+
RETURN_NOT_OK(SerializeSequences(key_tuples, &key_tuples_arr));
187187
}
188-
std::shared_ptr<Array> val_tuples;
189-
if (subtuples.size() > 0) {
190-
RETURN_NOT_OK(SerializeSequences(subtuples, &val_tuples));
188+
std::shared_ptr<Array> val_list_arr;
189+
if (val_lists.size() > 0) {
190+
RETURN_NOT_OK(SerializeSequences(val_lists, &val_list_arr));
191191
}
192-
std::shared_ptr<Array> val_dict;
193-
if (subdicts.size() > 0) {
194-
RETURN_NOT_OK(SerializeDict(subdicts, &val_dict));
192+
std::shared_ptr<Array> val_tuples_arr;
193+
if (val_tuples.size() > 0) {
194+
RETURN_NOT_OK(SerializeSequences(val_tuples, &val_tuples_arr));
195+
}
196+
std::shared_ptr<Array> val_dict_arr;
197+
if (val_dicts.size() > 0) {
198+
RETURN_NOT_OK(SerializeDict(val_dicts, &val_dict_arr));
195199
}
196-
*out = result.Finish(val_list, val_tuples, val_dict);
200+
*out = result.Finish(key_tuples_arr, val_list_arr, val_tuples_arr, val_dict_arr);
197201
return Status::OK();
198202
}
199203

python/test/runtest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import numpy as np
44
from numpy.testing import assert_equal
55

6-
TEST_OBJECTS = [[1, "hello", 3.0], 42, 43L, "hello world", u"x", u"\u262F",
7-
42.0, 1L << 62, (1.0, "hi"),
6+
TEST_OBJECTS = [{(1,2) : 1}, {() : 2}, [1, "hello", 3.0], 42, 43L, "hello world",
7+
u"x", u"\u262F", 42.0,
8+
1L << 62, (1.0, "hi"),
89
None, (None, None), ("hello", None),
910
True, False, (True, False), "hello",
1011
{True: "hello", False: "world"},

0 commit comments

Comments
 (0)