Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ if (NOT ARROW_FOUND)
-DARROW_BUILD_TESTS=OFF)

if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "")
set(ARROW_VERSION "f2806fa518583907a129b2ecb0b7ec8758b69e17")
set(ARROW_VERSION "fc4e2c36d2c56a8bd5d1ab17eeb406826924d3e5")
else()
set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}")
endif()
Expand Down
9 changes: 3 additions & 6 deletions src/parquet/arrow/arrow-reader-writer-benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,9 @@ std::shared_ptr<::arrow::Table> TableFromVector(
EXIT_NOT_OK(builder.Finish(&array));

auto field = ::arrow::field("column", type, nullable);
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
auto schema = ::arrow::schema({field});
auto column = std::make_shared<::arrow::Column>(field, array);
return std::make_shared<::arrow::Table>(
schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
return ::arrow::Table::Make(schema, {column});
}

template <>
Expand All @@ -139,8 +137,7 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
auto schema = std::make_shared<::arrow::Schema>(
std::vector<std::shared_ptr<::arrow::Field>>({field}));
auto column = std::make_shared<::arrow::Column>(field, array);
return std::make_shared<::arrow::Table>(
schema, std::vector<std::shared_ptr<::arrow::Column>>({column}));
return ::arrow::Table::Make(schema, {column});
}

template <bool nullable, typename ParquetType>
Expand Down
55 changes: 26 additions & 29 deletions src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

#include "gtest/gtest.h"

#include <sstream>
#include <arrow/compute/api.h>
#include <sstream>

#include "parquet/api/reader.h"
#include "parquet/api/writer.h"
Expand Down Expand Up @@ -1145,7 +1145,7 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool nanos_as_micros =
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
std::make_shared<Column>("f2", a2), std::make_shared<Column>("f3", a3),
std::make_shared<Column>("f4", a4), std::make_shared<Column>("f5", a5)};
*out = std::make_shared<::arrow::Table>(schema, columns);
*out = Table::Make(schema, columns);
}

TEST(TestArrowReadWrite, DateTimeTypes) {
Expand Down Expand Up @@ -1199,31 +1199,28 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
auto s1 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us),
field("f_ns", t_ns)}));
auto input = std::make_shared<::arrow::Table>(
s1, ColumnVector({std::make_shared<Column>("f_s", a_s),
std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_us),
std::make_shared<Column>("f_ns", a_ns)}));
auto input = Table::Make(
s1,
{std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});

// Result when coercing to milliseconds
auto s2 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms),
field("f_ns", t_ms)}));
auto ex_milli_result = std::make_shared<::arrow::Table>(
s2, ColumnVector({std::make_shared<Column>("f_s", a_ms),
std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_ms),
std::make_shared<Column>("f_ns", a_ms)}));
auto ex_milli_result = Table::Make(
s2,
{std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms", a_ms),
std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns", a_ms)});

// Result when coercing to microseconds
auto s3 = std::shared_ptr<::arrow::Schema>(
new ::arrow::Schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
field("f_ns", t_us)}));
auto ex_micro_result = std::make_shared<::arrow::Table>(
s3, ColumnVector({std::make_shared<Column>("f_s", a_us),
std::make_shared<Column>("f_ms", a_us),
std::make_shared<Column>("f_us", a_us),
std::make_shared<Column>("f_ns", a_us)}));
auto ex_micro_result = Table::Make(
s3,
{std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms", a_us),
std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_us)});

std::shared_ptr<Table> milli_result;
DoSimpleRoundtrip(
Expand Down Expand Up @@ -1276,10 +1273,10 @@ TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) {
auto c3 = std::make_shared<Column>("f_us", a_us);
auto c4 = std::make_shared<Column>("f_ns", a_ns);

auto t1 = std::make_shared<::arrow::Table>(s1, ColumnVector({c1}));
auto t2 = std::make_shared<::arrow::Table>(s2, ColumnVector({c2}));
auto t3 = std::make_shared<::arrow::Table>(s3, ColumnVector({c3}));
auto t4 = std::make_shared<::arrow::Table>(s4, ColumnVector({c4}));
auto t1 = Table::Make(s1, {c1});
auto t2 = Table::Make(s2, {c2});
auto t3 = Table::Make(s3, {c3});
auto t4 = Table::Make(s4, {c4});

auto sink = std::make_shared<InMemoryOutputStream>();

Expand Down Expand Up @@ -1327,7 +1324,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {

std::vector<std::shared_ptr<::arrow::Column>> columns = {
std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1)};
auto table = std::make_shared<::arrow::Table>(schema, columns);
auto table = Table::Make(schema, columns);

// Expected schema and values
auto e0 = field("f0", ::arrow::date32());
Expand All @@ -1341,7 +1338,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {

std::vector<std::shared_ptr<::arrow::Column>> ex_columns = {
std::make_shared<Column>("f0", x0), std::make_shared<Column>("f1", x1)};
auto ex_table = std::make_shared<::arrow::Table>(ex_schema, ex_columns);
auto ex_table = Table::Make(ex_schema, ex_columns);

std::shared_ptr<Table> result;
DoSimpleRoundtrip(table, 1, table->num_rows(), {}, &result);
Expand Down Expand Up @@ -1372,7 +1369,7 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
fields[i] = column->field();
}
auto schema = std::make_shared<::arrow::Schema>(fields);
*out = std::make_shared<Table>(schema, columns);
*out = Table::Make(schema, columns);
}

TEST(TestArrowReadWrite, MultithreadedRead) {
Expand Down Expand Up @@ -1459,9 +1456,9 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
ex_fields.push_back(table->column(i)->field());
}

auto ex_schema = std::make_shared<::arrow::Schema>(ex_fields);
Table expected(ex_schema, ex_columns);
AssertTablesEqual(expected, *result);
auto ex_schema = ::arrow::schema(ex_fields);
auto expected = Table::Make(ex_schema, ex_columns);
AssertTablesEqual(*expected, *result);
}

void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
Expand Down Expand Up @@ -1501,7 +1498,7 @@ void MakeListTable(int num_rows, std::shared_ptr<Table>* out) {
auto f1 = ::arrow::field("a", ::arrow::list(::arrow::int8()));
auto schema = ::arrow::schema({f1});
std::vector<std::shared_ptr<Array>> arrays = {list_array};
*out = std::make_shared<Table>(schema, arrays);
*out = Table::Make(schema, arrays);
}

TEST(TestArrowReadWrite, ListLargeRecords) {
Expand Down Expand Up @@ -1544,7 +1541,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
auto chunked_col =
std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
std::vector<std::shared_ptr<::arrow::Column>> columns = {chunked_col};
auto chunked_table = std::make_shared<Table>(table->schema(), columns);
auto chunked_table = Table::Make(table->schema(), columns);

ASSERT_TRUE(table->Equals(*chunked_table));
}
Expand Down
4 changes: 2 additions & 2 deletions src/parquet/arrow/arrow-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test {
for (int i = 0; i < expected_schema->num_fields(); ++i) {
auto lhs = result_schema_->field(i);
auto rhs = expected_schema->field(i);
EXPECT_TRUE(lhs->Equals(rhs))
<< i << " " << lhs->ToString() << " != " << rhs->ToString();
EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString()
<< " != " << rhs->ToString();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
RETURN_NOT_OK(ParallelFor(nthreads, num_columns, ReadColumnFunc));
}

*out = std::make_shared<Table>(schema, columns);
*out = Table::Make(schema, columns);
return Status::OK();
}

Expand Down Expand Up @@ -466,7 +466,7 @@ Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
RETURN_NOT_OK(ParallelFor(nthreads, num_fields, ReadColumnFunc));
}

*table = std::make_shared<Table>(schema, columns);
*table = Table::Make(schema, columns);
return Status::OK();
}

Expand Down
2 changes: 1 addition & 1 deletion src/parquet/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& va
std::vector<std::shared_ptr<::arrow::Column>> columns({column});
std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
auto schema = std::make_shared<::arrow::Schema>(fields);
return std::make_shared<::arrow::Table>(schema, columns);
return ::arrow::Table::Make(schema, columns);
}

template <typename T>
Expand Down
18 changes: 9 additions & 9 deletions src/parquet/file/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents)
: contents_(std::move(contents)) {}

std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
DCHECK(i < metadata()->num_columns())
<< "The RowGroup only has " << metadata()->num_columns()
<< "columns, requested column: " << i;
DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
<< metadata()->num_columns()
<< "columns, requested column: " << i;
const ColumnDescriptor* descr = metadata()->schema()->Column(i);

std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i);
Expand All @@ -57,9 +57,9 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
}

std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
DCHECK(i < metadata()->num_columns())
<< "The RowGroup only has " << metadata()->num_columns()
<< "columns, requested column: " << i;
DCHECK(i < metadata()->num_columns()) << "The RowGroup only has "
<< metadata()->num_columns()
<< "columns, requested column: " << i;
return contents_->GetColumnPageReader(i);
}

Expand Down Expand Up @@ -127,9 +127,9 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const {
}

std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) {
DCHECK(i < metadata()->num_row_groups())
<< "The file only has " << metadata()->num_row_groups()
<< "row groups, requested reader for: " << i;
DCHECK(i < metadata()->num_row_groups()) << "The file only has "
<< metadata()->num_row_groups()
<< "row groups, requested reader for: " << i;
return contents_->GetRowGroup(i);
}

Expand Down