@@ -174,8 +174,9 @@ void copyArrayDataReplacingNulls(T* dst, std::shared_ptr<arrow::Array> arr) {
174
174
175
175
template <typename T>
176
176
std::shared_ptr<arrow::ChunkedArray> replaceNullValuesImpl (
177
- std::shared_ptr<arrow::ChunkedArray> arr) {
178
- if (!std::is_same_v<T, bool > && arr->null_count () == 0 ) {
177
+ std::shared_ptr<arrow::ChunkedArray> arr,
178
+ bool force_copy) {
179
+ if (!force_copy && !std::is_same_v<T, bool > && arr->null_count () == 0 ) {
179
180
// for boolean columns we still need to convert bitmaps to array
180
181
return arr;
181
182
}
@@ -884,7 +885,9 @@ std::shared_ptr<arrow::ChunkedArray> convertDecimalToInteger(
884
885
std::shared_ptr<arrow::ChunkedArray> replaceNullValues (
885
886
std::shared_ptr<arrow::ChunkedArray> arr,
886
887
const hdk::ir::Type* type,
887
- StringDictionary* dict) {
888
+ StringDictionary* dict,
889
+ bool force_single_chunk) {
890
+ bool force_copy = force_single_chunk && (arr->chunks ().size () > 1 );
888
891
if (type->isTime ()) {
889
892
if (type->size () != 8 ) {
890
893
throw std::runtime_error (" Unsupported time type for Arrow import: " s +
@@ -902,7 +905,7 @@ std::shared_ptr<arrow::ChunkedArray> replaceNullValues(
902
905
case 2 :
903
906
return convertDateReplacingNulls<int32_t , int16_t >(arr);
904
907
case 4 :
905
- return replaceNullValuesImpl<int32_t >(arr);
908
+ return replaceNullValuesImpl<int32_t >(arr, force_copy );
906
909
case 8 :
907
910
return convertDateReplacingNulls<int32_t , int64_t >(arr);
908
911
default :
@@ -912,26 +915,26 @@ std::shared_ptr<arrow::ChunkedArray> replaceNullValues(
912
915
} else if (type->isInteger () || type->isTimestamp ()) {
913
916
switch (type->size ()) {
914
917
case 1 :
915
- return replaceNullValuesImpl<int8_t >(arr);
918
+ return replaceNullValuesImpl<int8_t >(arr, force_copy );
916
919
case 2 :
917
- return replaceNullValuesImpl<int16_t >(arr);
920
+ return replaceNullValuesImpl<int16_t >(arr, force_copy );
918
921
case 4 :
919
- return replaceNullValuesImpl<int32_t >(arr);
922
+ return replaceNullValuesImpl<int32_t >(arr, force_copy );
920
923
case 8 :
921
- return replaceNullValuesImpl<int64_t >(arr);
924
+ return replaceNullValuesImpl<int64_t >(arr, force_copy );
922
925
default :
923
926
throw std::runtime_error (" Unsupported integer/datetime type for Arrow import: " s +
924
927
type->toString ());
925
928
}
926
929
} else if (type->isFloatingPoint ()) {
927
930
switch (type->as <hdk::ir::FloatingPointType>()->precision ()) {
928
931
case hdk::ir::FloatingPointType::kFloat :
929
- return replaceNullValuesImpl<float >(arr);
932
+ return replaceNullValuesImpl<float >(arr, force_copy );
930
933
case hdk::ir::FloatingPointType::kDouble :
931
- return replaceNullValuesImpl<double >(arr);
934
+ return replaceNullValuesImpl<double >(arr, force_copy );
932
935
}
933
936
} else if (type->isBoolean ()) {
934
- return replaceNullValuesImpl<bool >(arr);
937
+ return replaceNullValuesImpl<bool >(arr, force_copy );
935
938
} else if (type->isFixedLenArray ()) {
936
939
return replaceNullValuesFixedSizeArray (arr, type, dict);
937
940
} else if (type->isVarLenArray ()) {
0 commit comments