diff --git a/cpp/cmake_modules/UseCython.cmake b/cpp/cmake_modules/UseCython.cmake index ccdeb4f3f03..0d4b17d3e57 100644 --- a/cpp/cmake_modules/UseCython.cmake +++ b/cpp/cmake_modules/UseCython.cmake @@ -22,7 +22,7 @@ # (this is an inherent limitation of Cython). # # The sample paths set with the CMake include_directories() command will be used -# for include directories to search for *.pxd when running the Cython complire. +# for include directories to search for *.pxd when running the Cython compiler. # # Cache variables that effect the behavior include: # diff --git a/cpp/examples/arrow/row-wise-conversion-example.cc b/cpp/examples/arrow/row-wise-conversion-example.cc index 42cab6cc76e..fb54b040f44 100644 --- a/cpp/examples/arrow/row-wise-conversion-example.cc +++ b/cpp/examples/arrow/row-wise-conversion-example.cc @@ -78,7 +78,7 @@ arrow::Status VectorToColumnarTable(const std::vector& rows, // Indicate the start of a new list row. This will memorise the current // offset in the values builder. ARROW_RETURN_NOT_OK(components_builder.Append()); - // Store the actual values. The final nullptr argument tells the underyling + // Store the actual values. The final nullptr argument tells the underlying // builder that all added values are valid, i.e. non-null. ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(), row.cost_components.size())); diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc index d46e9194d8a..0b138caafe7 100644 --- a/cpp/src/arrow/dataset/dataset_test.cc +++ b/cpp/src/arrow/dataset/dataset_test.cc @@ -744,7 +744,7 @@ TEST_F(TestSchemaUnification, SelectPartitionColumnsFilterPhysicalColumn) { } TEST_F(TestSchemaUnification, SelectMixedColumnsAndFilter) { - // Selects mix of phyical/virtual with a different order and uses a filter on + // Selects mix of physical/virtual with a different order and uses a filter on // a physical column not selected. ASSERT_OK_AND_ASSIGN(auto scan_builder, dataset_->NewScan()); ASSERT_OK(scan_builder->Filter("phy_2"_ >= 212)); diff --git a/cpp/src/arrow/dbi/hiveserver2/thrift/Types.thrift b/cpp/src/arrow/dbi/hiveserver2/thrift/Types.thrift index 4238f9c26ba..39ae6d0ba6d 100644 --- a/cpp/src/arrow/dbi/hiveserver2/thrift/Types.thrift +++ b/cpp/src/arrow/dbi/hiveserver2/thrift/Types.thrift @@ -115,7 +115,7 @@ enum TRuntimeFilterMode { // fragment. LOCAL, - // All fiters are computed in the BE, and are published globally. + // All filters are computed in the BE, and are published globally. GLOBAL } diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h index c6037b9a26c..6a7619d31b3 100644 --- a/cpp/src/arrow/ipc/message.h +++ b/cpp/src/arrow/ipc/message.h @@ -365,7 +365,7 @@ class ARROW_EXPORT MessageDecoder { /// memcpy(buffer->mutable_data() + current_buffer_size, /// small_chunk, /// small_chunk_size); - /// if (buffer->size() < decoder.next_requied_size()) { + /// if (buffer->size() < decoder.next_required_size()) { /// continue; /// } /// std::shared_ptr chunk(buffer.release()); diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index d7a6a66692e..831826ebf98 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -355,7 +355,7 @@ class ARROW_EXPORT StreamDecoder { /// memcpy(buffer->mutable_data() + current_buffer_size, /// small_chunk, /// small_chunk_size); - /// if (buffer->size() < decoder.next_requied_size()) { + /// if (buffer->size() < decoder.next_required_size()) { /// continue; /// } /// std::shared_ptr chunk(buffer.release()); diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index adce911e102..624edec0e2f 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -1039,7 +1039,7 @@ class ARROW_EXPORT IpcFormatWriter : public RecordBatchWriter { return Status::Invalid( "Dictionary replacement detected when writing IPC file format. " "Arrow IPC files only support a single dictionary for a given field " - "accross all batches."); + "across all batches."); } RETURN_NOT_OK(GetDictionaryPayload(dictionary_id, dictionary, options_, &payload)); diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index c8a71ab9c13..c45cf857a2a 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -126,7 +126,7 @@ class ARROW_EXPORT DataType : public detail::Fingerprintable { ARROW_DEPRECATED("Use field(i)") const std::shared_ptr& child(int i) const { return field(i); } - /// Returns the the child-field at index i. + /// Returns the child-field at index i. const std::shared_ptr& field(int i) const { return children_[i]; } ARROW_DEPRECATED("Use fields()") @@ -1718,18 +1718,18 @@ class ARROW_EXPORT SchemaBuilder { }; /// \brief Construct an empty SchemaBuilder - /// `field_merge_options` is only effecitive when `conflict_policy` == `CONFLICT_MERGE`. + /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. SchemaBuilder( ConflictPolicy conflict_policy = CONFLICT_APPEND, Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults()); /// \brief Construct a SchemaBuilder from a list of fields - /// `field_merge_options` is only effecitive when `conflict_policy` == `CONFLICT_MERGE`. + /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. SchemaBuilder( std::vector> fields, ConflictPolicy conflict_policy = CONFLICT_APPEND, Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults()); /// \brief Construct a SchemaBuilder from a schema, preserving the metadata - /// `field_merge_options` is only effecitive when `conflict_policy` == `CONFLICT_MERGE`. + /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`. SchemaBuilder( const std::shared_ptr& schema, ConflictPolicy conflict_policy = CONFLICT_APPEND, diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h index eaa85bf3d7c..6dba30e0bc2 100644 --- a/cpp/src/arrow/util/bit_run_reader.h +++ b/cpp/src/arrow/util/bit_run_reader.h @@ -64,7 +64,7 @@ class BitRunReaderLinear { }; #if ARROW_LITTLE_ENDIAN -/// A convenience class for counting the number of continguous set/unset bits +/// A convenience class for counting the number of contiguous set/unset bits /// in a bitmap. class ARROW_EXPORT BitRunReader { public: @@ -145,7 +145,7 @@ class ARROW_EXPORT BitRunReader { } // Two cases: - // 1. For unset, CountTrailingZeros works natually so we don't + // 1. For unset, CountTrailingZeros works naturally so we don't // invert the word. // 2. Otherwise invert so we can use CountTrailingZeros. if (current_run_bit_set_) { diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 63783da77ef..484dbab36b4 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -421,7 +421,7 @@ static void CopyBitmapWithOffset(benchmark::State& state) { // NOLINT non-const CopyBitmap<4>(state); } -// Trigger the slow path where both source and dest buffer are not byte aligend. +// Trigger the slow path where both source and dest buffer are not byte aligned. static void CopyBitmapWithOffsetBoth(benchmark::State& state) { CopyBitmap<3, 7>(state); } // Benchmark the worst case of comparing two identical bitmap diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index adb1e0f7ef4..ef6f1254348 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -689,7 +689,7 @@ TEST(FirstTimeBitmapWriter, AppendWordShiftsBitsCorrectly) { check_append("11111110", "10110010", "10110010", 15, true); } -TEST(TestAppendBitmap, AppendWordOnlyApproriateBytesWritten) { +TEST(TestAppendBitmap, AppendWordOnlyAppropriateBytesWritten) { std::vector valid_bits = {0x00, 0x00}; uint64_t bitmap = 0x1FF; diff --git a/cpp/src/arrow/util/byte_stream_split.h b/cpp/src/arrow/util/byte_stream_split.h index bab50746064..28dcce52bb8 100644 --- a/cpp/src/arrow/util/byte_stream_split.h +++ b/cpp/src/arrow/util/byte_stream_split.h @@ -58,7 +58,7 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t out[i] = arrow::util::SafeLoadAs(&gathered_byte_data[0]); } - // The blocks get processed hierahically using the unpack intrinsics. + // The blocks get processed hierarchically using the unpack intrinsics. // Example with four streams: // Stage 1: AAAA BBBB CCCC DDDD // Stage 2: ACAC ACAC BDBD BDBD @@ -200,7 +200,7 @@ void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t out[i] = arrow::util::SafeLoadAs(&gathered_byte_data[0]); } - // Processed hierahically using unpack intrinsics, then permute intrinsics. + // Processed hierarchically using unpack intrinsics, then permute intrinsics. __m256i stage[kNumStreamsLog2 + 1U][kNumStreams]; __m256i final_result[kNumStreams]; constexpr size_t kNumStreamsHalf = kNumStreams / 2U; @@ -292,7 +292,7 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value } // Path for float. - // 1. Processed hierahically to 32i blcok using the unpack intrinsics. + // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. // 2. Pack 128i block using _mm256_permutevar8x32_epi32. // 3. Pack final 256i block with _mm256_permute2x128_si256. constexpr size_t kNumUnpack = 3U; @@ -358,7 +358,7 @@ void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_ out[i] = arrow::util::SafeLoadAs(&gathered_byte_data[0]); } - // Processed hierahically using the unpack, then two shuffles. + // Processed hierarchically using the unpack, then two shuffles. __m512i stage[kNumStreamsLog2 + 1U][kNumStreams]; __m512i shuffle[kNumStreams]; __m512i final_result[kNumStreams]; @@ -526,7 +526,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101); } else { // Path for float. - // 1. Processed hierahically to 32i blcok using the unpack intrinsics. + // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. // 2. Pack 128i block using _mm256_permutevar8x32_epi32. // 3. Pack final 256i block with _mm256_permute2x128_si256. for (size_t i = 0; i < kNumStreams; ++i) diff --git a/cpp/src/arrow/util/spaced.h b/cpp/src/arrow/util/spaced.h index 710775811d7..f2f369adb63 100644 --- a/cpp/src/arrow/util/spaced.h +++ b/cpp/src/arrow/util/spaced.h @@ -161,7 +161,7 @@ inline int SpacedExpand(T* buffer, int num_values, int null_count, idx_buffer -= kBatchSize; idx_decode -= current_block.popcount; - // Foward scan and pack the target data to temp + // Forward scan and pack the target data to temp int idx = idx_decode + 1; for (uint64_t i = 0; i < kBatchSize; i++) { if (valid_bits_reader.IsSet()) { diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h index afc6c172e48..2c7dfbbadf6 100644 --- a/cpp/src/arrow/util/utf8.h +++ b/cpp/src/arrow/util/utf8.h @@ -356,7 +356,7 @@ static inline uint8_t* UTF8Encode(uint8_t* str, uint32_t codepoint) { static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) { const uint8_t* str = *data; - if (*str < 0x80) { // ascci + if (*str < 0x80) { // ascii *codepoint = *str++; } else if (ARROW_PREDICT_FALSE(*str < 0xC0)) { // invalid non-ascii char return false; @@ -403,7 +403,7 @@ static inline bool UTF8Decode(const uint8_t** data, uint32_t* codepoint) { static inline bool UTF8DecodeReverse(const uint8_t** data, uint32_t* codepoint) { const uint8_t* str = *data; - if (*str < 0x80) { // ascci + if (*str < 0x80) { // ascii *codepoint = *str--; } else { if (ARROW_PREDICT_FALSE(!Utf8IsContinuation(*str))) { diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index ad73751ecdc..735fd5f8ddb 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -122,7 +122,7 @@ Status Engine::Make(const std::shared_ptr& conf, auto ctx = arrow::internal::make_unique(); auto module = arrow::internal::make_unique("codegen", *ctx); - // Capture before moving, ExceutionEngine does not allow retrieving the + // Capture before moving, ExecutionEngine does not allow retrieving the // original Module. auto module_ptr = module.get();