diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index 3c6713e33b2..c328263c7f3 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -798,6 +798,7 @@ CUDF_KERNEL void __launch_bounds__(128) page_g.page_data = ck_g.uncompressed_bfr + page_offset; if (not comp_page_sizes.empty()) { page_g.compressed_data = ck_g.compressed_bfr + comp_page_offset; + page_g.comp_data_size = comp_page_sizes[ck_g.first_page + num_pages]; } page_g.start_row = cur_row; page_g.num_rows = rows_in_page; @@ -1600,7 +1601,7 @@ __device__ void finish_page_encode(state_buf* s, auto const bytes_to_compress = static_cast(end_ptr - c_base); comp_in[blockIdx.x] = {c_base, bytes_to_compress}; comp_out[blockIdx.x] = {s->page.compressed_data + s->page.max_hdr_size + s->page.max_lvl_size, - 0}; // size is unused + s->page.comp_data_size}; } pages[blockIdx.x] = s->page; if (not comp_results.empty()) { diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp index fa6f04eed73..2c9a914439f 100644 --- a/cpp/src/io/utilities/config_utils.cpp +++ b/cpp/src/io/utilities/config_utils.cpp @@ -58,7 +58,7 @@ enum class usage_policy : uint8_t { OFF, STABLE, ALWAYS }; */ usage_policy get_env_policy() { - static auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE"); + auto const env_val = getenv_or("LIBCUDF_NVCOMP_POLICY", "STABLE"); if (env_val == "OFF") return usage_policy::OFF; if (env_val == "STABLE") return usage_policy::STABLE; if (env_val == "ALWAYS") return usage_policy::ALWAYS; diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index 584f52b2b39..98423593867 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -37,6 +37,33 @@ using cudf::test::iterators::no_nulls; +struct CompressionTest + : public ParquetWriterTest, + public ::testing::WithParamInterface> { + CompressionTest() + { + auto const comp_impl = std::get<0>(GetParam()); + + if (comp_impl == "NVCOMP") { + setenv("LIBCUDF_HOST_COMPRESSION", "OFF", 1); + setenv("LIBCUDF_NVCOMP_POLICY", "ALWAYS", 1); + } else if (comp_impl == "DEVICE_INTERNAL") { + setenv("LIBCUDF_HOST_COMPRESSION", "OFF", 1); + setenv("LIBCUDF_NVCOMP_POLICY", "OFF", 1); + } else if (comp_impl == "HOST") { + setenv("LIBCUDF_HOST_COMPRESSION", "ON", 1); + setenv("LIBCUDF_NVCOMP_POLICY", "OFF", 1); + } else { + CUDF_FAIL("Invalid test parameter"); + } + } + ~CompressionTest() override + { + unsetenv("LIBCUDF_HOST_COMPRESSION"); + unsetenv("LIBCUDF_NVCOMP_POLICY"); + } +}; + template void test_durations(mask_op_t mask_op, bool use_byte_stream_split, bool arrow_schema) { @@ -1334,15 +1361,18 @@ TEST_F(ParquetWriterTest, UserNullabilityInvalid) EXPECT_THROW(cudf::io::write_parquet(write_opts), cudf::logic_error); } -TEST_F(ParquetWriterTest, CompStats) +TEST_P(CompressionTest, CompStats) { - auto table = create_random_fixed_table(1, 100000, true); + auto const compression_type = std::get<1>(GetParam()); + + auto table = create_random_fixed_table(1, 55000, true); auto const stats = std::make_shared(); - std::vector unused_buffer; + std::vector buffer; cudf::io::parquet_writer_options opts = - cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table->view()) + cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buffer}, table->view()) + .compression(compression_type) .compression_statistics(stats); cudf::io::write_parquet(opts); @@ -1350,10 +1380,18 @@ TEST_F(ParquetWriterTest, CompStats) EXPECT_EQ(stats->num_failed_bytes(), 0); EXPECT_EQ(stats->num_skipped_bytes(), 0); EXPECT_FALSE(std::isnan(stats->compression_ratio())); + + cudf::io::parquet_reader_options in_opts = + cudf::io::parquet_reader_options::builder(cudf::io::source_info{buffer.data(), buffer.size()}); + auto result = cudf::io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table->view()); } -TEST_F(ParquetWriterTest, CompStatsEmptyTable) +TEST_P(CompressionTest, CompStatsEmptyTable) { + auto const compression_type = std::get<1>(GetParam()); + auto table_no_rows = create_random_fixed_table(20, 0, false); auto const stats = std::make_shared(); @@ -1362,12 +1400,28 @@ TEST_F(ParquetWriterTest, CompStatsEmptyTable) cudf::io::parquet_writer_options opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&unused_buffer}, table_no_rows->view()) + .compression(compression_type) .compression_statistics(stats); cudf::io::write_parquet(opts); expect_compression_stats_empty(stats); } +INSTANTIATE_TEST_CASE_P(NvcompCompressionTest, + CompressionTest, + ::testing::Combine(::testing::Values("NVCOMP"), + ::testing::Values(cudf::io::compression_type::AUTO, + cudf::io::compression_type::SNAPPY, + cudf::io::compression_type::LZ4, + cudf::io::compression_type::ZSTD))); + +INSTANTIATE_TEST_CASE_P(OtherCompressionTest, + CompressionTest, + ::testing::Combine(::testing::Values("DEVICE_INTERNAL", "HOST"), + ::testing::Values(cudf::io::compression_type::AUTO, + cudf::io::compression_type::SNAPPY, + cudf::io::compression_type::ZSTD))); + TEST_F(ParquetWriterTest, SkipCompression) { constexpr auto page_rows = 1000;