diff --git a/cpp/src/arrow/array/CMakeLists.txt b/cpp/src/arrow/array/CMakeLists.txt index d8dc83bb71d..6a9c3cec02c 100644 --- a/cpp/src/arrow/array/CMakeLists.txt +++ b/cpp/src/arrow/array/CMakeLists.txt @@ -16,6 +16,7 @@ # under the License. add_arrow_test(concatenate_test) +add_arrow_test(data_test) add_arrow_test(diff_test) # Headers: top level diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index e921da86e15..c6636df9bb3 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -641,7 +641,7 @@ struct ARROW_EXPORT ArraySpan { this->length = length; if (this->type->id() == Type::NA) { this->null_count = this->length; - } else if (this->MayHaveNulls()) { + } else if (buffers[0].data != NULLPTR) { this->null_count = kUnknownNullCount; } else { this->null_count = 0; diff --git a/cpp/src/arrow/array/data_test.cc b/cpp/src/arrow/array/data_test.cc new file mode 100644 index 00000000000..011249c54e0 --- /dev/null +++ b/cpp/src/arrow/array/data_test.cc @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/array.h" +#include "arrow/array/data.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { + +TEST(ArraySpan, SetSlice) { + auto arr = ArrayFromJSON(int32(), "[0, 1, 2, 3, 4, 5, 6, null, 7, 8, 9]"); + ArraySpan span(*arr->data()); + ASSERT_EQ(span.length, arr->length()); + ASSERT_EQ(span.null_count, 1); + ASSERT_EQ(span.offset, 0); + + span.SetSlice(0, 7); + ASSERT_EQ(span.length, 7); + ASSERT_EQ(span.null_count, kUnknownNullCount); + ASSERT_EQ(span.offset, 0); + ASSERT_EQ(span.GetNullCount(), 0); + + span.SetSlice(7, 4); + ASSERT_EQ(span.length, 4); + ASSERT_EQ(span.null_count, kUnknownNullCount); + ASSERT_EQ(span.offset, 7); + ASSERT_EQ(span.GetNullCount(), 1); +} + +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 196912679ba..b357a28d0f7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -3720,6 +3720,17 @@ TEST(TestChoose, FixedSizeBinary) { *MakeArrayOfNull(type, 5)); } +// GH-47807: Null count in ArraySpan not updated correctly when executing chunked. +TEST(TestChoose, WrongNullCountForChunked) { + auto indices = ArrayFromJSON(int64(), "[0, 1, 0, 1, 0, null]"); + auto values1 = ArrayFromJSON(int64(), "[10, 11, 12, 13, 14, 15]"); + auto values2 = ChunkedArrayFromJSON(int64(), {"[100, 101]", "[102, 103, 104, 105]"}); + ASSERT_OK_AND_ASSIGN(auto result, CallFunction("choose", {indices, values1, values2})); + ASSERT_OK(result.chunked_array()->ValidateFull()); + AssertDatumsEqual(ChunkedArrayFromJSON(int64(), {"[10, 101]", "[12, 103, 14, null]"}), + result); +} + TEST(TestChooseKernel, DispatchBest) { ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose")); auto Check = [&](std::vector original_values) {