3131import io .trino .parquet .writer .ParquetWriterOptions ;
3232import io .trino .plugin .base .metrics .FileFormatDataSourceStats ;
3333import io .trino .plugin .hive .HiveCompressionCodec ;
34+ import io .trino .plugin .hive .HiveCompressionOption ;
3435import io .trino .plugin .hive .NodeVersion ;
3536import io .trino .plugin .hive .orc .OrcWriterConfig ;
3637import io .trino .plugin .iceberg .fileio .ForwardingOutputFile ;
6061import static io .trino .plugin .iceberg .IcebergErrorCode .ICEBERG_INVALID_METADATA ;
6162import static io .trino .plugin .iceberg .IcebergErrorCode .ICEBERG_WRITER_OPEN_ERROR ;
6263import static io .trino .plugin .iceberg .IcebergErrorCode .ICEBERG_WRITE_VALIDATION_FAILED ;
63- import static io .trino .plugin .iceberg .IcebergSessionProperties .getCompressionCodec ;
64+ import static io .trino .plugin .iceberg .IcebergFileFormat .AVRO ;
65+ import static io .trino .plugin .iceberg .IcebergFileFormat .ORC ;
66+ import static io .trino .plugin .iceberg .IcebergFileFormat .PARQUET ;
6467import static io .trino .plugin .iceberg .IcebergSessionProperties .getOrcStringStatisticsLimit ;
6568import static io .trino .plugin .iceberg .IcebergSessionProperties .getOrcWriterMaxDictionaryMemory ;
6669import static io .trino .plugin .iceberg .IcebergSessionProperties .getOrcWriterMaxRowGroupRows ;
7477import static io .trino .plugin .iceberg .IcebergSessionProperties .getParquetWriterPageValueCount ;
7578import static io .trino .plugin .iceberg .IcebergSessionProperties .isOrcWriterValidate ;
7679import static io .trino .plugin .iceberg .IcebergTableProperties .ORC_BLOOM_FILTER_FPP_PROPERTY ;
80+ import static io .trino .plugin .iceberg .IcebergUtil .getHiveCompressionCodec ;
7781import static io .trino .plugin .iceberg .IcebergUtil .getOrcBloomFilterColumns ;
7882import static io .trino .plugin .iceberg .IcebergUtil .getOrcBloomFilterFpp ;
7983import static io .trino .plugin .iceberg .IcebergUtil .getParquetBloomFilterColumns ;
@@ -97,19 +101,22 @@ public class IcebergFileWriterFactory
97101 private final NodeVersion nodeVersion ;
98102 private final FileFormatDataSourceStats readStats ;
99103 private final OrcWriterStats orcWriterStats = new OrcWriterStats ();
104+ private final HiveCompressionOption hiveCompressionOption ;
100105 private final OrcWriterOptions orcWriterOptions ;
101106
102107 @ Inject
103108 public IcebergFileWriterFactory (
104109 TypeManager typeManager ,
105110 NodeVersion nodeVersion ,
106111 FileFormatDataSourceStats readStats ,
112+ IcebergConfig icebergConfig ,
107113 OrcWriterConfig orcWriterConfig )
108114 {
109115 checkArgument (!orcWriterConfig .isUseLegacyVersion (), "the ORC writer shouldn't be configured to use a legacy version" );
110116 this .typeManager = requireNonNull (typeManager , "typeManager is null" );
111117 this .nodeVersion = requireNonNull (nodeVersion , "nodeVersion is null" );
112118 this .readStats = requireNonNull (readStats , "readStats is null" );
119+ this .hiveCompressionOption = icebergConfig .getCompressionCodec ();
113120 this .orcWriterOptions = orcWriterConfig .toOrcWriterOptions ();
114121 }
115122
@@ -132,7 +139,7 @@ public IcebergFileWriter createDataFileWriter(
132139 // TODO use metricsConfig https://github.com/trinodb/trino/issues/9791
133140 case PARQUET -> createParquetWriter (MetricsConfig .getDefault (), fileSystem , outputPath , icebergSchema , session , storageProperties );
134141 case ORC -> createOrcWriter (metricsConfig , fileSystem , outputPath , icebergSchema , session , storageProperties , getOrcStringStatisticsLimit (session ));
135- case AVRO -> createAvroWriter (fileSystem , outputPath , icebergSchema , session );
142+ case AVRO -> createAvroWriter (fileSystem , outputPath , icebergSchema , storageProperties );
136143 };
137144 }
138145
@@ -146,7 +153,7 @@ public IcebergFileWriter createPositionDeleteWriter(
146153 return switch (fileFormat ) {
147154 case PARQUET -> createParquetWriter (FULL_METRICS_CONFIG , fileSystem , outputPath , POSITION_DELETE_SCHEMA , session , storageProperties );
148155 case ORC -> createOrcWriter (FULL_METRICS_CONFIG , fileSystem , outputPath , POSITION_DELETE_SCHEMA , session , storageProperties , DataSize .ofBytes (Integer .MAX_VALUE ));
149- case AVRO -> createAvroWriter (fileSystem , outputPath , POSITION_DELETE_SCHEMA , session );
156+ case AVRO -> createAvroWriter (fileSystem , outputPath , POSITION_DELETE_SCHEMA , storageProperties );
150157 };
151158 }
152159
@@ -178,7 +185,9 @@ private IcebergFileWriter createParquetWriter(
178185 .setBloomFilterColumns (getParquetBloomFilterColumns (storageProperties ))
179186 .build ();
180187
181- HiveCompressionCodec hiveCompressionCodec = toCompressionCodec (getCompressionCodec (session ));
188+ HiveCompressionCodec compressionCodec = getHiveCompressionCodec (PARQUET , storageProperties )
189+ .orElse (toCompressionCodec (hiveCompressionOption ));
190+
182191 return new IcebergParquetFileWriter (
183192 metricsConfig ,
184193 outputFile ,
@@ -189,8 +198,8 @@ private IcebergFileWriter createParquetWriter(
189198 makeTypeMap (fileColumnTypes , fileColumnNames ),
190199 parquetWriterOptions ,
191200 IntStream .range (0 , fileColumnNames .size ()).toArray (),
192- hiveCompressionCodec .getParquetCompressionCodec ()
193- .orElseThrow (() -> new TrinoException (NOT_SUPPORTED , "Compression codec %s not supported for Parquet" .formatted (hiveCompressionCodec ))),
201+ compressionCodec .getParquetCompressionCodec ()
202+ .orElseThrow (() -> new TrinoException (NOT_SUPPORTED , "Compression codec %s not supported for Parquet" .formatted (compressionCodec ))),
194203 nodeVersion .toString ());
195204 }
196205 catch (IOException | UncheckedIOException e ) {
@@ -234,6 +243,9 @@ private IcebergFileWriter createOrcWriter(
234243 });
235244 }
236245
246+ HiveCompressionCodec compressionCodec = getHiveCompressionCodec (ORC , storageProperties )
247+ .orElse (toCompressionCodec (hiveCompressionOption ));
248+
237249 return new IcebergOrcFileWriter (
238250 metricsConfig ,
239251 icebergSchema ,
@@ -242,7 +254,7 @@ private IcebergFileWriter createOrcWriter(
242254 fileColumnNames ,
243255 fileColumnTypes ,
244256 toOrcType (icebergSchema ),
245- toCompressionCodec ( getCompressionCodec ( session )) .getOrcCompressionKind (),
257+ compressionCodec .getOrcCompressionKind (),
246258 withBloomFilterOptions (orcWriterOptions , storageProperties )
247259 .withStripeMinSize (getOrcWriterMinStripeSize (session ))
248260 .withStripeMaxSize (getOrcWriterMaxStripeSize (session ))
@@ -287,19 +299,22 @@ private IcebergFileWriter createAvroWriter(
287299 TrinoFileSystem fileSystem ,
288300 Location outputPath ,
289301 Schema icebergSchema ,
290- ConnectorSession session )
302+ Map < String , String > storageProperties )
291303 {
292304 Closeable rollbackAction = () -> fileSystem .deleteFile (outputPath );
293305
294306 List <Type > columnTypes = icebergSchema .columns ().stream ()
295307 .map (column -> toTrinoType (column .type (), typeManager ))
296308 .collect (toImmutableList ());
297309
310+ HiveCompressionCodec compressionCodec = getHiveCompressionCodec (AVRO , storageProperties )
311+ .orElse (toCompressionCodec (hiveCompressionOption ));
312+
298313 return new IcebergAvroFileWriter (
299314 new ForwardingOutputFile (fileSystem , outputPath ),
300315 rollbackAction ,
301316 icebergSchema ,
302317 columnTypes ,
303- toCompressionCodec ( getCompressionCodec ( session )) );
318+ compressionCodec );
304319 }
305320}
0 commit comments