Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ CUDF_KERNEL void __launch_bounds__(128)
util::round_up_unsafe(page_g.max_hdr_size + page_g.max_data_size, page_align);
if (not comp_page_sizes.empty()) {
comp_page_offset += page_g.max_hdr_size + comp_page_sizes[ck_g.first_page];
page_g.comp_data_size = comp_page_sizes[ck_g.first_page + num_pages];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missed this one in #18644, because the test did not cover dictionary pages.

}
page_headers_size += page_g.max_hdr_size;
max_page_data_size = max(max_page_data_size, page_g.max_data_size);
Expand Down
110 changes: 110 additions & 0 deletions cpp/tests/io/compression_common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/io/types.hpp>
#include <cudf/utilities/error.hpp>

#include <gtest/gtest.h>

#include <cstdlib>
#include <list>
#include <string>
#include <tuple>

class tmp_env_var {
public:
explicit tmp_env_var(std::string name, std::string const& value) : name_(std::move(name))
{
auto const previous_value = std::getenv(name_.c_str());
if (previous_value != nullptr) { previous_value_ = std::string(previous_value); }

setenv(name_.c_str(), value.c_str(), 1);
}

tmp_env_var(tmp_env_var const&) = delete;
tmp_env_var& operator=(tmp_env_var const&) = delete;
tmp_env_var(tmp_env_var&&) = delete;
tmp_env_var& operator=(tmp_env_var&&) = delete;
Comment on lines +39 to +42
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deleted these to remove the sharp edges, like having a vector of tmp_env_var that gets resized.
If we need to copy or move these eventually, we can implement these properly.


~tmp_env_var()
{
if (previous_value_.has_value()) {
setenv(name_.c_str(), previous_value_->c_str(), 1);
} else {
unsetenv(name_.c_str());
}
}

private:
std::string name_;
std::optional<std::string> previous_value_;
};

static constexpr char const* host_comp_env_var = "LIBCUDF_HOST_COMPRESSION";
static constexpr char const* host_decomp_env_var = "LIBCUDF_HOST_DECOMPRESSION";
static constexpr char const* nvcomp_policy_env_var = "LIBCUDF_NVCOMP_POLICY";

template <typename Base>
struct CompressionTest
: public Base,
public ::testing::WithParamInterface<std::tuple<std::string, cudf::io::compression_type>> {
CompressionTest()
{
auto const comp_impl = std::get<0>(GetParam());

if (comp_impl == "NVCOMP") {
env_vars.emplace_back(host_comp_env_var, "OFF");
env_vars.emplace_back(nvcomp_policy_env_var, "ALWAYS");
} else if (comp_impl == "DEVICE_INTERNAL") {
env_vars.emplace_back(host_comp_env_var, "OFF");
env_vars.emplace_back(nvcomp_policy_env_var, "OFF");
} else if (comp_impl == "HOST") {
env_vars.emplace_back(host_comp_env_var, "ON");
} else {
CUDF_FAIL("Invalid test parameter");
}
}

private:
std::list<tmp_env_var> env_vars;
};

template <typename Base>
struct DecompressionTest
: public Base,
public ::testing::WithParamInterface<std::tuple<std::string, cudf::io::compression_type>> {
DecompressionTest()
{
auto const comp_impl = std::get<0>(GetParam());

if (comp_impl == "NVCOMP") {
env_vars.emplace_back(host_decomp_env_var, "OFF");
env_vars.emplace_back(nvcomp_policy_env_var, "ALWAYS");
} else if (comp_impl == "DEVICE_INTERNAL") {
env_vars.emplace_back(host_decomp_env_var, "OFF");
env_vars.emplace_back(nvcomp_policy_env_var, "OFF");
} else if (comp_impl == "HOST") {
env_vars.emplace_back(host_decomp_env_var, "ON");
} else {
CUDF_FAIL("Invalid test parameter");
}
}

private:
std::list<tmp_env_var> env_vars;
};
65 changes: 64 additions & 1 deletion cpp/tests/io/orc_chunked_reader_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include "compression_common.hpp"

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
Expand Down Expand Up @@ -161,6 +163,8 @@ auto chunked_read(std::string const& filepath,

struct OrcChunkedReaderTest : public cudf::test::BaseFixture {};

using OrcChunkedDecompressionTest = DecompressionTest<OrcChunkedReaderTest>;

TEST_F(OrcChunkedReaderTest, TestChunkedReadNoData)
{
std::vector<std::unique_ptr<cudf::column>> input_columns;
Expand Down Expand Up @@ -1477,3 +1481,62 @@ TEST_F(OrcChunkedReaderInputLimitTest, SizeTypeRowsOverflow)

#endif // LOCAL_TEST
}

TEST_P(OrcChunkedDecompressionTest, RoundTripBasic)
{
auto const compression_type = std::get<1>(GetParam());

auto const num_rows = 12'345;

std::vector<std::unique_ptr<cudf::column>> input_columns;
auto value_iter = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i / 4; });
input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release());
auto expected = std::make_unique<cudf::table>(std::move(input_columns));

auto const filepath = temp_env->get_temp_filepath("chunked_read_compressions.orc");
auto const write_opts =
cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, *expected)
.compression(compression_type)
.stripe_size_rows(2'000)
.row_index_stride(1'000)
.build();
cudf::io::write_orc(write_opts);

{
auto const [result, num_chunks] =
chunked_read(filepath, output_limit{0}, input_limit{2'400'000});
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
}

{
auto const [result, num_chunks] = chunked_read(filepath, output_limit{0}, input_limit{240'000});
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
}

{
auto const [result, num_chunks] = chunked_read(filepath, output_limit{0}, input_limit{24'000});
CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
}
}

INSTANTIATE_TEST_CASE_P(Nvcomp,
OrcChunkedDecompressionTest,
::testing::Combine(::testing::Values("NVCOMP"),
::testing::Values(cudf::io::compression_type::AUTO,
cudf::io::compression_type::SNAPPY,
cudf::io::compression_type::LZ4,
cudf::io::compression_type::ZSTD)));

INSTANTIATE_TEST_CASE_P(DeviceInternal,
OrcChunkedDecompressionTest,
::testing::Combine(::testing::Values("DEVICE_INTERNAL"),
::testing::Values(cudf::io::compression_type::AUTO,
cudf::io::compression_type::SNAPPY)));

INSTANTIATE_TEST_CASE_P(Host,
OrcChunkedDecompressionTest,
::testing::Combine(::testing::Values("HOST"),
::testing::Values(cudf::io::compression_type::AUTO,
cudf::io::compression_type::SNAPPY,
cudf::io::compression_type::ZSTD)));
Loading