diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index fe1d4999..53630e67 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -366,7 +366,7 @@ if (NOT ARROW_FOUND) -DARROW_BUILD_TESTS=OFF) if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "") - set(ARROW_VERSION "f2806fa518583907a129b2ecb0b7ec8758b69e17") + set(ARROW_VERSION "fc4e2c36d2c56a8bd5d1ab17eeb406826924d3e5") else() set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}") endif() diff --git a/src/parquet/arrow/arrow-reader-writer-benchmark.cc b/src/parquet/arrow/arrow-reader-writer-benchmark.cc index a54fb5d1..edeef1ed 100644 --- a/src/parquet/arrow/arrow-reader-writer-benchmark.cc +++ b/src/parquet/arrow/arrow-reader-writer-benchmark.cc @@ -112,11 +112,9 @@ std::shared_ptr<::arrow::Table> TableFromVector( EXIT_NOT_OK(builder.Finish(&array)); auto field = ::arrow::field("column", type, nullable); - auto schema = std::make_shared<::arrow::Schema>( - std::vector>({field})); + auto schema = ::arrow::schema({field}); auto column = std::make_shared<::arrow::Column>(field, array); - return std::make_shared<::arrow::Table>( - schema, std::vector>({column})); + return ::arrow::Table::Make(schema, {column}); } template <> @@ -139,8 +137,7 @@ std::shared_ptr<::arrow::Table> TableFromVector(const std::vector( std::vector>({field})); auto column = std::make_shared<::arrow::Column>(field, array); - return std::make_shared<::arrow::Table>( - schema, std::vector>({column})); + return ::arrow::Table::Make(schema, {column}); } template diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 0e0831ec..a8d38241 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -23,8 +23,8 @@ #include "gtest/gtest.h" -#include #include +#include #include "parquet/api/reader.h" #include "parquet/api/writer.h" @@ -1145,7 +1145,7 @@ void MakeDateTimeTypesTable(std::shared_ptr* out, bool nanos_as_micros = std::make_shared("f0", a0), std::make_shared("f1", a1), std::make_shared("f2", a2), std::make_shared("f3", a3), std::make_shared("f4", a4), std::make_shared("f5", a5)}; - *out = std::make_shared<::arrow::Table>(schema, columns); + *out = Table::Make(schema, columns); } TEST(TestArrowReadWrite, DateTimeTypes) { @@ -1199,31 +1199,28 @@ TEST(TestArrowReadWrite, CoerceTimestamps) { auto s1 = std::shared_ptr<::arrow::Schema>( new ::arrow::Schema({field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us), field("f_ns", t_ns)})); - auto input = std::make_shared<::arrow::Table>( - s1, ColumnVector({std::make_shared("f_s", a_s), - std::make_shared("f_ms", a_ms), - std::make_shared("f_us", a_us), - std::make_shared("f_ns", a_ns)})); + auto input = Table::Make( + s1, + {std::make_shared("f_s", a_s), std::make_shared("f_ms", a_ms), + std::make_shared("f_us", a_us), std::make_shared("f_ns", a_ns)}); // Result when coercing to milliseconds auto s2 = std::shared_ptr<::arrow::Schema>( new ::arrow::Schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms), field("f_ns", t_ms)})); - auto ex_milli_result = std::make_shared<::arrow::Table>( - s2, ColumnVector({std::make_shared("f_s", a_ms), - std::make_shared("f_ms", a_ms), - std::make_shared("f_us", a_ms), - std::make_shared("f_ns", a_ms)})); + auto ex_milli_result = Table::Make( + s2, + {std::make_shared("f_s", a_ms), std::make_shared("f_ms", a_ms), + std::make_shared("f_us", a_ms), std::make_shared("f_ns", a_ms)}); // Result when coercing to microseconds auto s3 = std::shared_ptr<::arrow::Schema>( new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us), field("f_ns", t_us)})); - auto ex_micro_result = std::make_shared<::arrow::Table>( - s3, ColumnVector({std::make_shared("f_s", a_us), - std::make_shared("f_ms", a_us), - std::make_shared("f_us", a_us), - std::make_shared("f_ns", a_us)})); + auto ex_micro_result = Table::Make( + s3, + {std::make_shared("f_s", a_us), std::make_shared("f_ms", a_us), + std::make_shared("f_us", a_us), std::make_shared("f_ns", a_us)}); std::shared_ptr
milli_result; DoSimpleRoundtrip( @@ -1276,10 +1273,10 @@ TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) { auto c3 = std::make_shared("f_us", a_us); auto c4 = std::make_shared("f_ns", a_ns); - auto t1 = std::make_shared<::arrow::Table>(s1, ColumnVector({c1})); - auto t2 = std::make_shared<::arrow::Table>(s2, ColumnVector({c2})); - auto t3 = std::make_shared<::arrow::Table>(s3, ColumnVector({c3})); - auto t4 = std::make_shared<::arrow::Table>(s4, ColumnVector({c4})); + auto t1 = Table::Make(s1, {c1}); + auto t2 = Table::Make(s2, {c2}); + auto t3 = Table::Make(s3, {c3}); + auto t4 = Table::Make(s4, {c4}); auto sink = std::make_shared(); @@ -1327,7 +1324,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) { std::vector> columns = { std::make_shared("f0", a0), std::make_shared("f1", a1)}; - auto table = std::make_shared<::arrow::Table>(schema, columns); + auto table = Table::Make(schema, columns); // Expected schema and values auto e0 = field("f0", ::arrow::date32()); @@ -1341,7 +1338,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) { std::vector> ex_columns = { std::make_shared("f0", x0), std::make_shared("f1", x1)}; - auto ex_table = std::make_shared<::arrow::Table>(ex_schema, ex_columns); + auto ex_table = Table::Make(ex_schema, ex_columns); std::shared_ptr
result; DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result); @@ -1372,7 +1369,7 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks, fields[i] = column->field(); } auto schema = std::make_shared<::arrow::Schema>(fields); - *out = std::make_shared
(schema, columns); + *out = Table::Make(schema, columns); } TEST(TestArrowReadWrite, MultithreadedRead) { @@ -1459,9 +1456,9 @@ TEST(TestArrowReadWrite, ReadColumnSubset) { ex_fields.push_back(table->column(i)->field()); } - auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields); - Table expected(ex_schema, ex_columns); - AssertTablesEqual(expected, *result); + auto ex_schema = ::arrow::schema(ex_fields); + auto expected = Table::Make(ex_schema, ex_columns); + AssertTablesEqual(*expected, *result); } void MakeListTable(int num_rows, std::shared_ptr
* out) { @@ -1501,7 +1498,7 @@ void MakeListTable(int num_rows, std::shared_ptr
* out) { auto f1 = ::arrow::field("a", ::arrow::list(::arrow::int8())); auto schema = ::arrow::schema({f1}); std::vector> arrays = {list_array}; - *out = std::make_shared
(schema, arrays); + *out = Table::Make(schema, arrays); } TEST(TestArrowReadWrite, ListLargeRecords) { @@ -1544,7 +1541,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) { auto chunked_col = std::make_shared<::arrow::Column>(table->schema()->field(0), chunked); std::vector> columns = {chunked_col}; - auto chunked_table = std::make_shared
(table->schema(), columns); + auto chunked_table = Table::Make(table->schema(), columns); ASSERT_TRUE(table->Equals(*chunked_table)); } diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index 7ed9ad83..129eccfd 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test { for (int i = 0; i < expected_schema->num_fields(); ++i) { auto lhs = result_schema_->field(i); auto rhs = expected_schema->field(i); - EXPECT_TRUE(lhs->Equals(rhs)) - << i << " " << lhs->ToString() << " != " << rhs->ToString(); + EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString() + << " != " << rhs->ToString(); } } diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc index 3ca49cb4..e13a094d 100644 --- a/src/parquet/arrow/reader.cc +++ b/src/parquet/arrow/reader.cc @@ -431,7 +431,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index, RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc)); } - *out = std::make_shared
(schema, columns); + *out = Table::Make(schema, columns); return Status::OK(); } @@ -466,7 +466,7 @@ Status FileReader::Impl::ReadTable(const std::vector& indices, RETURN_NOT_OK(ParallelFor(nthreads, num_fields, ReadColumnFunc)); } - *table = std::make_shared
(schema, columns); + *table = Table::Make(schema, columns); return Status::OK(); } diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h index 8611a303..7264324d 100644 --- a/src/parquet/arrow/test-util.h +++ b/src/parquet/arrow/test-util.h @@ -414,7 +414,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr& va std::vector> columns({column}); std::vector> fields({column->field()}); auto schema = std::make_shared<::arrow::Schema>(fields); - return std::make_shared<::arrow::Table>(schema, columns); + return ::arrow::Table::Make(schema, columns); } template diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc index 4ec48a45..9b9bde9f 100644 --- a/src/parquet/file/reader.cc +++ b/src/parquet/file/reader.cc @@ -45,9 +45,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr contents) : contents_(std::move(contents)) {} std::shared_ptr RowGroupReader::Column(int i) { - DCHECK(i < metadata()->num_columns()) - << "The RowGroup only has " << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " + << metadata()->num_columns() + << "columns, requested column: " << i; const ColumnDescriptor* descr = metadata()->schema()->Column(i); std::unique_ptr page_reader = contents_->GetColumnPageReader(i); @@ -57,9 +57,9 @@ std::shared_ptr RowGroupReader::Column(int i) { } std::unique_ptr RowGroupReader::GetColumnPageReader(int i) { - DCHECK(i < metadata()->num_columns()) - << "The RowGroup only has " << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " + << metadata()->num_columns() + << "columns, requested column: " << i; return contents_->GetColumnPageReader(i); } @@ -127,9 +127,9 @@ std::shared_ptr ParquetFileReader::metadata() const { } std::shared_ptr ParquetFileReader::RowGroup(int i) { - DCHECK(i < metadata()->num_row_groups()) - << "The file only has " << metadata()->num_row_groups() - << "row groups, requested reader for: " << i; + DCHECK(i < metadata()->num_row_groups()) << "The file only has " + << metadata()->num_row_groups() + << "row groups, requested reader for: " << i; return contents_->GetRowGroup(i); }