@@ -374,7 +374,18 @@ HiveDataSink::HiveDataSink(
374374 *insertTableHandle->bucketProperty (),
375375 inputType)
376376 : nullptr,
377- getNonPartitionChannels(insertTableHandle)) {}
377+ getPartitionChannels(insertTableHandle),
378+ getNonPartitionChannels(insertTableHandle),
379+ !getPartitionChannels(insertTableHandle).empty()
380+ ? std::make_unique<PartitionIdGenerator>(
381+ inputType,
382+ getPartitionChannels (insertTableHandle),
383+ hiveConfig->maxPartitionsPerWriters(
384+ connectorQueryCtx->sessionProperties ()),
385+ connectorQueryCtx->memoryPool(),
386+ hiveConfig->isPartitionPathAsLowerCase(
387+ connectorQueryCtx->sessionProperties ()))
388+ : nullptr) {}
378389
379390HiveDataSink::HiveDataSink (
380391 RowTypePtr inputType,
@@ -384,7 +395,9 @@ HiveDataSink::HiveDataSink(
384395 const std::shared_ptr<const HiveConfig>& hiveConfig,
385396 uint32_t bucketCount,
386397 std::unique_ptr<core::PartitionFunction> bucketFunction,
387- const std::vector<column_index_t >& dataChannels)
398+ const std::vector<column_index_t >& partitionChannels,
399+ const std::vector<column_index_t >& dataChannels,
400+ std::unique_ptr<PartitionIdGenerator> partitionIdGenerator)
388401 : inputType_(std::move(inputType)),
389402 insertTableHandle_(std::move(insertTableHandle)),
390403 connectorQueryCtx_(connectorQueryCtx),
@@ -393,17 +406,8 @@ HiveDataSink::HiveDataSink(
393406 updateMode_(getUpdateMode()),
394407 maxOpenWriters_(hiveConfig_->maxPartitionsPerWriters (
395408 connectorQueryCtx->sessionProperties ())),
396- partitionChannels_(getPartitionChannels(insertTableHandle_)),
397- partitionIdGenerator_(
398- !partitionChannels_.empty()
399- ? std::make_unique<PartitionIdGenerator>(
400- inputType_,
401- partitionChannels_,
402- maxOpenWriters_,
403- connectorQueryCtx_->memoryPool (),
404- hiveConfig_->isPartitionPathAsLowerCase(
405- connectorQueryCtx->sessionProperties ()))
406- : nullptr),
409+ partitionChannels_(partitionChannels),
410+ partitionIdGenerator_(std::move(partitionIdGenerator)),
407411 dataChannels_(dataChannels),
408412 bucketCount_(static_cast <int32_t >(bucketCount)),
409413 bucketFunction_(std::move(bucketFunction)),
@@ -753,6 +757,32 @@ uint32_t HiveDataSink::appendWriter(const HiveWriterId& id) {
753757 ioStats_.emplace_back (std::make_shared<io::IoStatistics>());
754758 setMemoryReclaimers (writerInfo_.back ().get (), ioStats_.back ().get ());
755759
760+ auto options = createWriterOptions ();
761+ // Prevents the memory allocation during the writer creation.
762+ WRITER_NON_RECLAIMABLE_SECTION_GUARD (writerInfo_.size () - 1 );
763+ auto writer = writerFactory_->createWriter (
764+ dwio::common::FileSink::create (
765+ writePath,
766+ {
767+ .bufferWrite = false ,
768+ .connectorProperties = hiveConfig_->config (),
769+ .fileCreateConfig = hiveConfig_->writeFileCreateConfig (),
770+ .pool = writerInfo_.back ()->sinkPool .get (),
771+ .metricLogger = dwio::common::MetricsLog::voidLog (),
772+ .stats = ioStats_.back ().get (),
773+ }),
774+ options);
775+ writer = maybeCreateBucketSortWriter (std::move (writer));
776+ writers_.emplace_back (std::move (writer));
777+
778+ extendBuffersForPartitionedTables ();
779+
780+ writerIndexMap_.emplace (id, writers_.size () - 1 );
781+ return writerIndexMap_[id];
782+ }
783+
784+ std::shared_ptr<dwio::common::WriterOptions> HiveDataSink::createWriterOptions ()
785+ const {
756786 // Take the writer options provided by the user as a starting point, or
757787 // allocate a new one.
758788 auto options = insertTableHandle_->writerOptions ();
@@ -802,28 +832,7 @@ uint32_t HiveDataSink::appendWriter(const HiveWriterId& id) {
802832 options->adjustTimestampToTimezone =
803833 connectorQueryCtx_->adjustTimestampToTimezone ();
804834 options->processConfigs (*hiveConfig_->config (), *connectorSessionProperties);
805-
806- // Prevents the memory allocation during the writer creation.
807- WRITER_NON_RECLAIMABLE_SECTION_GUARD (writerInfo_.size () - 1 );
808- auto writer = writerFactory_->createWriter (
809- dwio::common::FileSink::create (
810- writePath,
811- {
812- .bufferWrite = false ,
813- .connectorProperties = hiveConfig_->config (),
814- .fileCreateConfig = hiveConfig_->writeFileCreateConfig (),
815- .pool = writerInfo_.back ()->sinkPool .get (),
816- .metricLogger = dwio::common::MetricsLog::voidLog (),
817- .stats = ioStats_.back ().get (),
818- }),
819- options);
820- writer = maybeCreateBucketSortWriter (std::move (writer));
821- writers_.emplace_back (std::move (writer));
822-
823- extendBuffersForPartitionedTables ();
824-
825- writerIndexMap_.emplace (id, writers_.size () - 1 );
826- return writerIndexMap_[id];
835+ return options;
827836}
828837
829838std::optional<std::string> HiveDataSink::getPartitionName (
0 commit comments