From f4720f5a36ec1dfd235a865825c50749ed91f6ec Mon Sep 17 00:00:00 2001 From: luffy-zh Date: Wed, 28 Aug 2024 15:13:53 +0800 Subject: [PATCH] add more comments --- c++/include/orc/Writer.hh | 2 ++ c++/src/ColumnWriter.cc | 6 +----- c++/src/ColumnWriter.hh | 3 ++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh index b8ea584f7d..78f06739bc 100644 --- a/c++/include/orc/Writer.hh +++ b/c++/include/orc/Writer.hh @@ -293,6 +293,8 @@ namespace orc { /** * Set whether the compression block should be aligned to row group boundary. + * The boolean type may not be aligned to row group boundary due to the + * requirement of the Boolean RLE encoder to pack input bits into bytes */ WriterOptions& setAlignBlockBoundToRowGroup(bool alignBlockBoundToRowGroup); diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc index 21e546d8fd..599f5f9e93 100644 --- a/c++/src/ColumnWriter.cc +++ b/c++/src/ColumnWriter.cc @@ -1271,11 +1271,7 @@ namespace orc { void StringColumnWriter::finishStreams() { ColumnWriter::finishStreams(); - if (useDictionary) { - dictDataEncoder->finishEncode(); - dictLengthEncoder->finishEncode(); - dictStream->finishStream(); - } else { + if (!useDictionary) { directDataStream->finishStream(); directLengthEncoder->finishEncode(); } diff --git a/c++/src/ColumnWriter.hh b/c++/src/ColumnWriter.hh index 6f61fc1b02..6d8ad00325 100644 --- a/c++/src/ColumnWriter.hh +++ b/c++/src/ColumnWriter.hh @@ -183,7 +183,8 @@ namespace orc { * Finalize the encoding and compressing process. This function should be * called after all data required for encoding has been added. It ensures * that any remaining data is processed and the final state of the streams - * is set. + * is set. Note: the boolean type may break this spec due to some trailing bits will be written + * to the next compression block. */ virtual void finishStreams();