From 769c37a907f25fbc3c9fac15fe93b12179b417e1 Mon Sep 17 00:00:00 2001 From: kanga333 Date: Thu, 8 Oct 2020 11:38:25 +0900 Subject: [PATCH] [Ruby] Use a table size as the default for parquet chunk_size A chunk_size that is too small will cause metadata bloat in the parquet file, leading to poor read performance. Set the chunk_size to be the same value as the table size so that one file becomes one row_group. Signed-off-by: kanga333 --- ruby/red-parquet/lib/parquet/arrow-table-savable.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb index 0163b15ed82..70c5975273f 100644 --- a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb +++ b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb @@ -33,7 +33,7 @@ def save_as_parquet properties.__send__(set_method_name, value) end end - chunk_size = @options[:chunk_size] || 1024 # TODO + chunk_size = @options[:chunk_size] || @table.n_rows open_raw_output_stream do |output| ArrowFileWriter.open(@table.schema, output,