diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt index f77545c1189..79e0e304196 100644 --- a/cpp/src/arrow/python/CMakeLists.txt +++ b/cpp/src/arrow/python/CMakeLists.txt @@ -75,6 +75,8 @@ add_arrow_lib(arrow_python ${ARROW_PYTHON_SRCS} OUTPUTS ARROW_PYTHON_LIBRARIES + DEPENDENCIES + arrow_dependencies SHARED_LINK_FLAGS "" SHARED_LINK_LIBS diff --git a/cpp/src/arrow/python/deserialize.cc b/cpp/src/arrow/python/deserialize.cc index f17abbe7cb3..8e7ba8a4a02 100644 --- a/cpp/src/arrow/python/deserialize.cc +++ b/cpp/src/arrow/python/deserialize.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include "arrow/table.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" +#include "arrow/util/parsing.h" #include "arrow/python/common.h" #include "arrow/python/helpers.h" @@ -47,6 +49,7 @@ namespace arrow { using internal::checked_cast; +using internal::StringConverter; namespace py { @@ -194,14 +197,20 @@ Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type, return Status::OK(); } -std::vector GetPythonTypes(const UnionArray& data) { - std::vector result; +Status GetPythonTypes(const UnionArray& data, std::vector* result) { + ARROW_CHECK(result != nullptr); auto type = data.type(); for (int i = 0; i < type->num_children(); ++i) { - // stoi is locale dependent, but should be ok for small integers - result.push_back(static_cast(std::stoi(type->child(i)->name()))); + StringConverter converter; + int8_t tag = 0; + const std::string& data = type->child(i)->name(); + if (!converter(data.c_str(), data.size(), &tag)) { + return Status::SerializationError("Cannot convert string: \"", + type->child(i)->name(), "\" to int8_t"); + } + result->push_back(tag); } - return result; + return Status::OK(); } template @@ -215,7 +224,8 @@ Status DeserializeSequence(PyObject* context, const Array& array, int64_t start_ RETURN_IF_PYERROR(); const uint8_t* type_ids = data.raw_type_ids(); const int32_t* value_offsets = data.raw_value_offsets(); - auto python_types = GetPythonTypes(data); + std::vector python_types; + RETURN_NOT_OK(GetPythonTypes(data, &python_types)); for (int64_t i = start_idx; i < stop_idx; ++i) { if (data.IsNull(i)) { Py_INCREF(Py_None); diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc index 3ccdfc8eee5..09a092dc035 100644 --- a/cpp/src/arrow/python/serialize.cc +++ b/cpp/src/arrow/python/serialize.cc @@ -90,8 +90,10 @@ class SequenceBuilder { MakeBuilderFn make_builder) { if (!*child_builder) { child_builder->reset(make_builder()); - // std::to_string is locale dependent, but should be ok for small integers - type_map_[tag] = builder_->AppendChild(*child_builder, std::to_string(tag)); + std::ostringstream convert; + convert.imbue(std::locale::classic()); + convert << static_cast(tag); + type_map_[tag] = builder_->AppendChild(*child_builder, convert.str()); } return Update(child_builder->get(), type_map_[tag]); }