Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions c_glib/test/test-orc-file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def all_columns
test("select fields") do
require_gi_bindings(3, 2, 6)
@reader.field_indices = [1, 3]
assert_equal(build_table("boolean1" => build_boolean_array([false, true]),
"short1" => build_int16_array([1024, 2048])),
assert_equal(build_table("byte1" => build_int8_array([1, 100]),
"int1" => build_int32_array([65536, 65536])),
@reader.read_stripes)
end
end
Expand All @@ -200,10 +200,8 @@ def all_columns
test("select fields") do
require_gi_bindings(3, 2, 6)
@reader.field_indices = [1, 3]
boolean1 = build_boolean_array([false, true])
short1 = build_int16_array([1024, 2048])
assert_equal(build_record_batch("boolean1" => boolean1,
"short1" => short1),
assert_equal(build_record_batch("byte1" => build_int8_array([1, 100]),
"int1" => build_int32_array([65536, 65536])),
@reader.read_stripe(0))
end
end
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/adapters/orc/adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ class ORCFileReader::Impl {
ARROW_RETURN_IF(*it < 0, Status::Invalid("Negative field index"));
include_indices_list.push_back(*it);
}
opts->includeTypes(include_indices_list);
opts->include(include_indices_list);
return Status::OK();
}

Expand Down
40 changes: 38 additions & 2 deletions cpp/src/arrow/adapters/orc/adapter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ std::shared_ptr<Table> GenerateRandomTable(const std::shared_ptr<Schema>& schema

void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
const std::shared_ptr<Table>& expected_output_table,
const int64_t max_size = kDefaultSmallMemStreamSize) {
const int64_t max_size = kDefaultSmallMemStreamSize,
std::vector<int>* opt_selected_read_indices = nullptr) {
EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
io::BufferOutputStream::Create(max_size));
auto write_options = adapters::orc::WriteOptions();
Expand All @@ -250,7 +251,11 @@ void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
ASSERT_EQ(reader->GetCompression(), write_options.compression);
ASSERT_EQ(reader->GetCompressionSize(), write_options.compression_block_size);
ASSERT_EQ(reader->GetRowIndexStride(), write_options.row_index_stride);
EXPECT_OK_AND_ASSIGN(auto actual_output_table, reader->Read());
EXPECT_OK_AND_ASSIGN(auto actual_output_table,
opt_selected_read_indices == nullptr
? reader->Read()
: reader->Read(*opt_selected_read_indices));
ASSERT_OK(actual_output_table->ValidateFull());
AssertTablesEqual(*expected_output_table, *actual_output_table, false, false);
}

Expand Down Expand Up @@ -451,6 +456,37 @@ TEST_F(TestORCWriterTrivialNoConversion, writeChunkless) {
std::shared_ptr<Table> table = TableFromJSON(table_schema, {});
AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
}
TEST_F(TestORCWriterTrivialNoConversion, writeTrivialChunkAndSelectField) {
std::shared_ptr<Table> table = TableFromJSON(table_schema, {R"([])"});
std::shared_ptr<Schema> schema_selected =
schema({field("int8", int8()), field("int32", int32())});
std::shared_ptr<Table> table_selected = TableFromJSON(schema_selected, {R"([])"});
std::vector<int> selected_indices = {1, 3};
AssertTableWriteReadEqual(table, table_selected, kDefaultSmallMemStreamSize / 16,
&selected_indices);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the test but:

  • can we also test with non-empty data?
  • can we test selecting a field that's after the struct (to ensure field numbering is as expected)?

TEST_F(TestORCWriterTrivialNoConversion, writeFilledChunkAndSelectField) {
std::vector<int> selected_indices = {1, 7};
random::RandomArrayGenerator rand(kRandomSeed);
std::shared_ptr<Schema> local_schema = schema({
field("bool", boolean()),
field("int32", int32()),
field("int64", int64()),
field("float", float32()),
field("struct", struct_({field("a", utf8()), field("b", int64())})),
field("double", float64()),
field("date32", date32()),
field("ts3", timestamp(TimeUnit::NANO)),
field("string", utf8()),
field("binary", binary()),
});
auto batch = rand.BatchOf(local_schema->fields(), 100);
std::shared_ptr<Table> table = Table::Make(local_schema, batch->columns());
EXPECT_OK_AND_ASSIGN(auto table_selected, table->SelectColumns(selected_indices));
AssertTableWriteReadEqual(table, table_selected, kDefaultSmallMemStreamSize,
&selected_indices);
}

class TestORCWriterTrivialWithConversion : public ::testing::Test {
public:
TestORCWriterTrivialWithConversion() {
Expand Down
4 changes: 2 additions & 2 deletions ruby/red-arrow/test/test-orc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ def pp_values(values)
]
end
assert_equal([
["boolean1: bool", [pp_values([false, true])]],
["short1: int16", [pp_values([1024, 2048])]],
["byte1: int8", [pp_values([1, 100])]],
["int1: int32", [pp_values([65536, 65536])]],
],
dump)
end
Expand Down