Skip to content

Commit

Permalink
add config
Browse files Browse the repository at this point in the history
  • Loading branch information
BePPPower committed Jun 11, 2024
1 parent acbfcf7 commit f978b8f
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 3 deletions.
3 changes: 3 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,9 @@ DEFINE_mBool(skip_loading_stale_rowset_meta, "false");

DEFINE_Bool(enable_file_logger, "true");

// The minimum row group size when exporting Parquet files. default 128MB
DEFINE_Int64(min_row_group_size, "134217728");

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,9 @@ DECLARE_mBool(skip_loading_stale_rowset_meta);
// Only works when starting BE with --console.
DECLARE_Bool(enable_file_logger);

// The minimum row group size when exporting Parquet files.
DECLARE_Int64(min_row_group_size);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
6 changes: 3 additions & 3 deletions be/src/vec/runtime/vparquet_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <ostream>
#include <string>

#include "common/config.h"
#include "common/status.h"
#include "gutil/endian.h"
#include "io/fs/file_writer.h"
Expand Down Expand Up @@ -70,8 +71,6 @@

namespace doris::vectorized {

const uint64_t min_row_group_size = 128 * 1024 * 1024; // 128MB

ParquetOutputStream::ParquetOutputStream(doris::io::FileWriter* file_writer)
: _file_writer(file_writer), _cur_pos(0), _written_len(0) {
set_mode(arrow::io::FileMode::WRITE);
Expand Down Expand Up @@ -247,6 +246,7 @@ Status VParquetTransformer::_parse_properties() {
}
builder.created_by(
fmt::format("{}({})", doris::get_short_version(), parquet::DEFAULT_CREATED_BY));
builder.max_row_group_length(std::numeric_limits<uint64_t>::max());
_parquet_writer_properties = builder.build();
_arrow_properties = parquet::ArrowWriterProperties::Builder()
.enable_deprecated_int96_timestamps()
Expand Down Expand Up @@ -296,7 +296,7 @@ Status VParquetTransformer::write(const Block& block) {

RETURN_DORIS_STATUS_IF_ERROR(_writer->WriteRecordBatch(*result));
_write_size += block.bytes();
if (_write_size >= min_row_group_size) {
if (_write_size >= doris::config::min_row_group_size) {
RETURN_DORIS_STATUS_IF_ERROR(_writer->NewBufferedRowGroup());
_write_size = 0;
}
Expand Down

0 comments on commit f978b8f

Please sign in to comment.