diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 09e539d162f..5d050b77f77 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -25,6 +25,7 @@ #include #include "arrow/status.h" +#include "arrow/util/bit-util.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -204,7 +205,8 @@ class ARROW_EXPORT BufferBuilder { Status Append(const uint8_t* data, int64_t length) { if (capacity_ < length + size_) { - RETURN_NOT_OK(Resize(length + size_)); + int64_t new_capacity = BitUtil::NextPower2(length + size_); + RETURN_NOT_OK(Resize(new_capacity)); } UnsafeAppend(data, length); return Status::OK(); @@ -213,7 +215,8 @@ class ARROW_EXPORT BufferBuilder { // Advance pointer and zero out memory Status Advance(int64_t length) { if (capacity_ < length + size_) { - RETURN_NOT_OK(Resize(length + size_)); + int64_t new_capacity = BitUtil::NextPower2(length + size_); + RETURN_NOT_OK(Resize(new_capacity)); } memset(data_ + size_, 0, static_cast(length)); size_ += length; diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index 8ba9360e917..13d7b20591d 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -156,6 +156,22 @@ static void BM_BuildStringDictionary( sizeof(int32_t)); } +static void BM_BuildBinaryArray(benchmark::State& state) { // NOLINT non-const reference + const int64_t iterations = 1 << 20; + + std::string value = "1234567890"; + while (state.KeepRunning()) { + BinaryBuilder builder(default_memory_pool()); + for (int64_t i = 0; i < iterations; i++) { + ABORT_NOT_OK(builder.Append(value)); + } + std::shared_ptr out; + ABORT_NOT_OK(builder.Finish(&out)); + } + // Assuming a string here needs on average 2 bytes + state.SetBytesProcessed(state.iterations() * iterations * value.size()); +} + BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMicrosecond); BENCHMARK(BM_BuildVectorNoNulls)->Repetitions(3)->Unit(benchmark::kMicrosecond); BENCHMARK(BM_BuildAdaptiveIntNoNulls)->Repetitions(3)->Unit(benchmark::kMicrosecond); @@ -166,4 +182,6 @@ BENCHMARK(BM_BuildAdaptiveUIntNoNulls)->Repetitions(3)->Unit(benchmark::kMicrose BENCHMARK(BM_BuildDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); BENCHMARK(BM_BuildStringDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_BuildBinaryArray)->Repetitions(3)->Unit(benchmark::kMicrosecond); + } // namespace arrow diff --git a/cpp/src/arrow/python/config.cc b/cpp/src/arrow/python/config.cc index 92ca9db9cc3..bda7a7af163 100644 --- a/cpp/src/arrow/python/config.cc +++ b/cpp/src/arrow/python/config.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/python/platform.h" + #include "arrow/python/config.h" namespace arrow { diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc index 34adc6261eb..a9425b6f7d9 100644 --- a/cpp/src/plasma/store.cc +++ b/cpp/src/plasma/store.cc @@ -690,8 +690,9 @@ int main(int argc, char* argv[]) { close(shm_fd); if (system_memory > shm_mem_avail) { ARROW_LOG(FATAL) << "System memory request exceeds memory available in /dev/shm. The " - "request is for " << system_memory - << " bytes, and the amount available is " << shm_mem_avail + "request is for " + << system_memory << " bytes, and the amount available is " + << shm_mem_avail << " bytes. You may be able to free up space by deleting files in " "/dev/shm. If you are inside a Docker container, you may need to " "pass "