Skip to content

Commit 3b6bec0

Browse files
davve14hantangwangd
authored andcommitted
Add session property to toggle Orc use column names feature
There is an existing HiveClientConfig property hive.orc.use-column-names to access ORC file by column names, but no session property. This commit moves the existing HiveClientConfig property to HiveCommonClientConfig and introduces a session property in HiveCommonSessionProperties. It also implements changes accordingly in DwrfAggregatedPageSourceFactory, OrcAggregatedPageSourceFactory, OrcSelectivePageSourceFactory and OrcBatchPageSourceFactory. Constructors in those classes do not take boolean useOrcColumnNames anymore. Tests where those are used have also been changed. Hive connector documentation has been changed. An integration test has been added to TestHiveDistributedQueries.java. Helper function created in HiveTestUtils to replace function in TestHiveIntegrationSmokeTest. Remove superfluous constructors that have hiveClientConfig in parameter list from DwrfAggregatedPageSourceFactory.java and OrcAggregatedPageSourceFactory.java and change explicit calls in HiveTestUtils.java. Closes-Issue: prestodb#24134 Remove superfluous constructors that have hiveClientConfig in parameter list from DwrfAggregatedPageSourceFactory.java and OrcAggregatedPageSourceFactory.java and change explicit calls in HiveTestUtils.java. Add additional test with different column names to TestHiveDistributedQueries.java
1 parent 0c72600 commit 3b6bec0

File tree

16 files changed

+187
-108
lines changed

16 files changed

+187
-108
lines changed

presto-docs/src/main/sphinx/connector/hive.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ Property Name Description
224224
CopyOnFirstWriteConfiguration, it can result in silent
225225
failures where critical configuration properties are not
226226
correctly propagated.
227+
228+
``hive.orc.use-column-names`` Enable accessing ORC columns by name in the ORC file ``false``
229+
metadata, instead of their ordinal position. Also toggleable
230+
through the ``hive.orc_use_column_names`` session property.
227231
======================================================== ============================================================ ============
228232
.. _constructor: https://github.com/apache/hadoop/blob/02a9190af5f8264e25966a80c8f9ea9bb6677899/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java#L844-L875
229233

presto-hive-common/src/main/java/com/facebook/presto/hive/HiveCommonClientConfig.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class HiveCommonClientConfig
3737
private DataSize orcStreamBufferSize = new DataSize(8, MEGABYTE);
3838
private OrcWriteValidationMode orcWriterValidationMode = OrcWriteValidationMode.BOTH;
3939
private double orcWriterValidationPercentage;
40+
private boolean useOrcColumnNames;
4041
private DataSize orcTinyStripeThreshold = new DataSize(8, MEGABYTE);
4142
private boolean parquetBatchReadOptimizationEnabled;
4243
private boolean parquetEnableBatchReaderVerification;
@@ -183,6 +184,19 @@ public HiveCommonClientConfig setOrcWriterValidationPercentage(double orcWriterV
183184
return this;
184185
}
185186

187+
public boolean isUseOrcColumnNames()
188+
{
189+
return useOrcColumnNames;
190+
}
191+
192+
@Config("hive.orc.use-column-names")
193+
@ConfigDescription("Access ORC columns using names from the file first, and fallback to Hive schema column names if not found to ensure backward compatibility with old data")
194+
public HiveCommonClientConfig setUseOrcColumnNames(boolean useOrcColumnNames)
195+
{
196+
this.useOrcColumnNames = useOrcColumnNames;
197+
return this;
198+
}
199+
186200
@NotNull
187201
public DataSize getOrcTinyStripeThreshold()
188202
{

presto-hive-common/src/main/java/com/facebook/presto/hive/HiveCommonSessionProperties.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ public class HiveCommonSessionProperties
4242
public static final String RANGE_FILTERS_ON_SUBSCRIPTS_ENABLED = "range_filters_on_subscripts_enabled";
4343
@VisibleForTesting
4444
public static final String PARQUET_BATCH_READ_OPTIMIZATION_ENABLED = "parquet_batch_read_optimization_enabled";
45+
@VisibleForTesting
46+
public static final String ORC_USE_COLUMN_NAMES = "orc_use_column_names";
4547

4648
public static final String NODE_SELECTION_STRATEGY = "node_selection_strategy";
4749
private static final String ORC_BLOOM_FILTERS_ENABLED = "orc_bloom_filters_enabled";
@@ -153,6 +155,11 @@ public HiveCommonSessionProperties(HiveCommonClientConfig hiveCommonClientConfig
153155
"use JNI based zstd decompression for reading ORC files",
154156
hiveCommonClientConfig.isZstdJniDecompressionEnabled(),
155157
true),
158+
booleanProperty(
159+
ORC_USE_COLUMN_NAMES,
160+
"Access ORC columns using names from the file first, and fallback to Hive schema column names if not found to ensure backward compatibility with old data",
161+
hiveCommonClientConfig.isUseOrcColumnNames(),
162+
false),
156163
booleanProperty(
157164
PARQUET_BATCH_READ_OPTIMIZATION_ENABLED,
158165
"Is Parquet batch read optimization enabled",
@@ -262,6 +269,11 @@ public static boolean isOrcZstdJniDecompressionEnabled(ConnectorSession session)
262269
return session.getProperty(ORC_ZSTD_JNI_DECOMPRESSION_ENABLED, Boolean.class);
263270
}
264271

272+
public static boolean isUseOrcColumnNames(ConnectorSession session)
273+
{
274+
return session.getProperty(ORC_USE_COLUMN_NAMES, Boolean.class);
275+
}
276+
265277
public static boolean isParquetBatchReadsEnabled(ConnectorSession session)
266278
{
267279
return session.getProperty(PARQUET_BATCH_READ_OPTIMIZATION_ENABLED, Boolean.class);

presto-hive-common/src/test/java/com/facebook/presto/hive/TestHiveCommonClientConfig.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public void testDefaults()
4545
.setOrcOptimizedWriterEnabled(true)
4646
.setOrcWriterValidationPercentage(0.0)
4747
.setOrcWriterValidationMode(OrcWriteValidation.OrcWriteValidationMode.BOTH)
48+
.setUseOrcColumnNames(false)
4849
.setZstdJniDecompressionEnabled(false)
4950
.setParquetBatchReaderVerificationEnabled(false)
5051
.setParquetBatchReadOptimizationEnabled(false)
@@ -71,6 +72,7 @@ public void testExplicitPropertyMappings()
7172
.put("hive.orc.optimized-writer.enabled", "false")
7273
.put("hive.orc.writer.validation-percentage", "0.16")
7374
.put("hive.orc.writer.validation-mode", "DETAILED")
75+
.put("hive.orc.use-column-names", "true")
7476
.put("hive.zstd-jni-decompression-enabled", "true")
7577
.put("hive.enable-parquet-batch-reader-verification", "true")
7678
.put("hive.parquet-batch-read-optimization-enabled", "true")
@@ -94,6 +96,7 @@ public void testExplicitPropertyMappings()
9496
.setOrcOptimizedWriterEnabled(false)
9597
.setOrcWriterValidationPercentage(0.16)
9698
.setOrcWriterValidationMode(OrcWriteValidation.OrcWriteValidationMode.DETAILED)
99+
.setUseOrcColumnNames(true)
97100
.setZstdJniDecompressionEnabled(true)
98101
.setParquetBatchReaderVerificationEnabled(true)
99102
.setParquetBatchReadOptimizationEnabled(true)

presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ public class HiveClientConfig
107107

108108
private DataSize textMaxLineLength = new DataSize(100, MEGABYTE);
109109
private boolean assumeCanonicalPartitionKeys;
110-
private boolean useOrcColumnNames;
111110
private double orcDefaultBloomFilterFpp = 0.05;
112111
private boolean rcfileOptimizedWriterEnabled = true;
113112
private boolean rcfileWriterValidate;
@@ -720,19 +719,6 @@ public HiveClientConfig setS3FileSystemType(S3FileSystemType s3FileSystemType)
720719
return this;
721720
}
722721

723-
public boolean isUseOrcColumnNames()
724-
{
725-
return useOrcColumnNames;
726-
}
727-
728-
@Config("hive.orc.use-column-names")
729-
@ConfigDescription("Access ORC columns using names from the file first, and fallback to Hive schema column names if not found to ensure backward compatibility with old data")
730-
public HiveClientConfig setUseOrcColumnNames(boolean useOrcColumnNames)
731-
{
732-
this.useOrcColumnNames = useOrcColumnNames;
733-
return this;
734-
}
735-
736722
public double getOrcDefaultBloomFilterFpp()
737723
{
738724
return orcDefaultBloomFilterFpp;

presto-hive/src/main/java/com/facebook/presto/hive/orc/DwrfAggregatedPageSourceFactory.java

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import com.facebook.presto.hive.FileFormatDataSourceStats;
2020
import com.facebook.presto.hive.HdfsEnvironment;
2121
import com.facebook.presto.hive.HiveAggregatedPageSourceFactory;
22-
import com.facebook.presto.hive.HiveClientConfig;
2322
import com.facebook.presto.hive.HiveColumnHandle;
2423
import com.facebook.presto.hive.HiveFileContext;
2524
import com.facebook.presto.hive.HiveFileSplit;
@@ -36,6 +35,7 @@
3635
import java.util.List;
3736
import java.util.Optional;
3837

38+
import static com.facebook.presto.hive.HiveCommonSessionProperties.isUseOrcColumnNames;
3939
import static com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA;
4040
import static com.facebook.presto.hive.orc.OrcAggregatedPageSourceFactory.createOrcPageSource;
4141
import static com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION;
@@ -47,7 +47,6 @@ public class DwrfAggregatedPageSourceFactory
4747
{
4848
private final TypeManager typeManager;
4949
private final StandardFunctionResolution functionResolution;
50-
private final boolean useOrcColumnNames;
5150
private final HdfsEnvironment hdfsEnvironment;
5251
private final FileFormatDataSourceStats stats;
5352
private final OrcFileTailSource orcFileTailSource;
@@ -57,34 +56,13 @@ public class DwrfAggregatedPageSourceFactory
5756
public DwrfAggregatedPageSourceFactory(
5857
TypeManager typeManager,
5958
StandardFunctionResolution functionResolution,
60-
HiveClientConfig config,
61-
HdfsEnvironment hdfsEnvironment,
62-
FileFormatDataSourceStats stats,
63-
OrcFileTailSource orcFileTailSource,
64-
StripeMetadataSourceFactory stripeMetadataSourceFactory)
65-
{
66-
this(
67-
typeManager,
68-
functionResolution,
69-
requireNonNull(config, "hiveClientConfig is null").isUseOrcColumnNames(),
70-
hdfsEnvironment,
71-
stats,
72-
orcFileTailSource,
73-
stripeMetadataSourceFactory);
74-
}
75-
76-
public DwrfAggregatedPageSourceFactory(
77-
TypeManager typeManager,
78-
StandardFunctionResolution functionResolution,
79-
boolean useOrcColumnNames,
8059
HdfsEnvironment hdfsEnvironment,
8160
FileFormatDataSourceStats stats,
8261
OrcFileTailSource orcFileTailSource,
8362
StripeMetadataSourceFactory stripeMetadataSourceFactory)
8463
{
8564
this.typeManager = requireNonNull(typeManager, "typeManager is null");
8665
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
87-
this.useOrcColumnNames = useOrcColumnNames;
8866
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
8967
this.stats = requireNonNull(stats, "stats is null");
9068
this.orcFileTailSource = requireNonNull(orcFileTailSource, "orcFileTailCache is null");
@@ -116,7 +94,7 @@ public Optional<? extends ConnectorPageSource> createPageSource(
11694
configuration,
11795
fileSplit,
11896
columns,
119-
useOrcColumnNames,
97+
isUseOrcColumnNames(session),
12098
typeManager,
12199
functionResolution,
122100
stats,

presto-hive/src/main/java/com/facebook/presto/hive/orc/OrcAggregatedPageSourceFactory.java

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import com.facebook.presto.hive.FileFormatDataSourceStats;
2020
import com.facebook.presto.hive.HdfsEnvironment;
2121
import com.facebook.presto.hive.HiveAggregatedPageSourceFactory;
22-
import com.facebook.presto.hive.HiveClientConfig;
2322
import com.facebook.presto.hive.HiveColumnHandle;
2423
import com.facebook.presto.hive.HiveFileContext;
2524
import com.facebook.presto.hive.HiveFileSplit;
@@ -49,6 +48,7 @@
4948
import static com.facebook.presto.hive.HiveCommonSessionProperties.getOrcMaxReadBlockSize;
5049
import static com.facebook.presto.hive.HiveCommonSessionProperties.getOrcTinyStripeThreshold;
5150
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcZstdJniDecompressionEnabled;
51+
import static com.facebook.presto.hive.HiveCommonSessionProperties.isUseOrcColumnNames;
5252
import static com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles;
5353
import static com.facebook.presto.hive.orc.OrcPageSourceFactoryUtils.getOrcDataSource;
5454
import static com.facebook.presto.hive.orc.OrcPageSourceFactoryUtils.getOrcReader;
@@ -62,7 +62,6 @@ public class OrcAggregatedPageSourceFactory
6262
{
6363
private final TypeManager typeManager;
6464
private final StandardFunctionResolution functionResolution;
65-
private final boolean useOrcColumnNames;
6665
private final HdfsEnvironment hdfsEnvironment;
6766
private final FileFormatDataSourceStats stats;
6867
private final OrcFileTailSource orcFileTailSource;
@@ -72,34 +71,13 @@ public class OrcAggregatedPageSourceFactory
7271
public OrcAggregatedPageSourceFactory(
7372
TypeManager typeManager,
7473
StandardFunctionResolution functionResolution,
75-
HiveClientConfig config,
76-
HdfsEnvironment hdfsEnvironment,
77-
FileFormatDataSourceStats stats,
78-
OrcFileTailSource orcFileTailSource,
79-
StripeMetadataSourceFactory stripeMetadataSourceFactory)
80-
{
81-
this(
82-
typeManager,
83-
functionResolution,
84-
requireNonNull(config, "hiveClientConfig is null").isUseOrcColumnNames(),
85-
hdfsEnvironment,
86-
stats,
87-
orcFileTailSource,
88-
stripeMetadataSourceFactory);
89-
}
90-
91-
public OrcAggregatedPageSourceFactory(
92-
TypeManager typeManager,
93-
StandardFunctionResolution functionResolution,
94-
boolean useOrcColumnNames,
9574
HdfsEnvironment hdfsEnvironment,
9675
FileFormatDataSourceStats stats,
9776
OrcFileTailSource orcFileTailSource,
9877
StripeMetadataSourceFactory stripeMetadataSourceFactory)
9978
{
10079
this.typeManager = requireNonNull(typeManager, "typeManager is null");
10180
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
102-
this.useOrcColumnNames = useOrcColumnNames;
10381
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
10482
this.stats = requireNonNull(stats, "stats is null");
10583
this.orcFileTailSource = requireNonNull(orcFileTailSource, "orcFileTailCache is null");
@@ -132,7 +110,7 @@ public Optional<? extends ConnectorPageSource> createPageSource(
132110
configuration,
133111
fileSplit,
134112
columns,
135-
useOrcColumnNames,
113+
isUseOrcColumnNames(session),
136114
typeManager,
137115
functionResolution,
138116
stats,

presto-hive/src/main/java/com/facebook/presto/hive/orc/OrcBatchPageSourceFactory.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import static com.facebook.presto.hive.HiveCommonSessionProperties.getOrcTinyStripeThreshold;
6363
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcBloomFiltersEnabled;
6464
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcZstdJniDecompressionEnabled;
65+
import static com.facebook.presto.hive.HiveCommonSessionProperties.isUseOrcColumnNames;
6566
import static com.facebook.presto.hive.HiveUtil.checkRowIDPartitionComponent;
6667
import static com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles;
6768
import static com.facebook.presto.hive.orc.OrcPageSourceFactoryUtils.getOrcDataSource;
@@ -78,7 +79,6 @@ public class OrcBatchPageSourceFactory
7879
implements HiveBatchPageSourceFactory
7980
{
8081
private final TypeManager typeManager;
81-
private final boolean useOrcColumnNames;
8282
private final HdfsEnvironment hdfsEnvironment;
8383
private final FileFormatDataSourceStats stats;
8484
private final int domainCompactionThreshold;
@@ -97,7 +97,6 @@ public OrcBatchPageSourceFactory(
9797
{
9898
this(
9999
typeManager,
100-
requireNonNull(config, "hiveClientConfig is null").isUseOrcColumnNames(),
101100
hdfsEnvironment,
102101
stats,
103102
config.getDomainCompactionThreshold(),
@@ -107,15 +106,13 @@ public OrcBatchPageSourceFactory(
107106

108107
public OrcBatchPageSourceFactory(
109108
TypeManager typeManager,
110-
boolean useOrcColumnNames,
111109
HdfsEnvironment hdfsEnvironment,
112110
FileFormatDataSourceStats stats,
113111
int domainCompactionThreshold,
114112
OrcFileTailSource orcFileTailSource,
115113
StripeMetadataSourceFactory stripeMetadataSourceFactory)
116114
{
117115
this.typeManager = requireNonNull(typeManager, "typeManager is null");
118-
this.useOrcColumnNames = useOrcColumnNames;
119116
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
120117
this.stats = requireNonNull(stats, "stats is null");
121118
this.domainCompactionThreshold = domainCompactionThreshold;
@@ -153,7 +150,7 @@ public Optional<? extends ConnectorPageSource> createPageSource(
153150
configuration,
154151
fileSplit,
155152
columns,
156-
useOrcColumnNames,
153+
isUseOrcColumnNames(session),
157154
effectivePredicate,
158155
hiveStorageTimeZone,
159156
typeManager,

presto-hive/src/main/java/com/facebook/presto/hive/orc/OrcSelectivePageSourceFactory.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
import static com.facebook.presto.hive.HiveCommonSessionProperties.getOrcTinyStripeThreshold;
105105
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcBloomFiltersEnabled;
106106
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcZstdJniDecompressionEnabled;
107+
import static com.facebook.presto.hive.HiveCommonSessionProperties.isUseOrcColumnNames;
107108
import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES;
108109
import static com.facebook.presto.hive.HiveSessionProperties.isAdaptiveFilterReorderingEnabled;
109110
import static com.facebook.presto.hive.HiveSessionProperties.isLegacyTimestampBucketing;
@@ -132,7 +133,6 @@ public class OrcSelectivePageSourceFactory
132133
private final TypeManager typeManager;
133134
private final StandardFunctionResolution functionResolution;
134135
private final RowExpressionService rowExpressionService;
135-
private final boolean useOrcColumnNames;
136136
private final HdfsEnvironment hdfsEnvironment;
137137
private final FileFormatDataSourceStats stats;
138138
private final int domainCompactionThreshold;
@@ -156,7 +156,6 @@ public OrcSelectivePageSourceFactory(
156156
typeManager,
157157
functionResolution,
158158
rowExpressionService,
159-
requireNonNull(config, "hiveClientConfig is null").isUseOrcColumnNames(),
160159
hdfsEnvironment,
161160
stats,
162161
config.getDomainCompactionThreshold(),
@@ -169,7 +168,6 @@ public OrcSelectivePageSourceFactory(
169168
TypeManager typeManager,
170169
StandardFunctionResolution functionResolution,
171170
RowExpressionService rowExpressionService,
172-
boolean useOrcColumnNames,
173171
HdfsEnvironment hdfsEnvironment,
174172
FileFormatDataSourceStats stats,
175173
int domainCompactionThreshold,
@@ -180,7 +178,6 @@ public OrcSelectivePageSourceFactory(
180178
this.typeManager = requireNonNull(typeManager, "typeManager is null");
181179
this.functionResolution = requireNonNull(functionResolution, "functionResolution is null");
182180
this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null");
183-
this.useOrcColumnNames = useOrcColumnNames;
184181
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
185182
this.stats = requireNonNull(stats, "stats is null");
186183
this.domainCompactionThreshold = domainCompactionThreshold;
@@ -230,7 +227,7 @@ public Optional<? extends ConnectorPageSource> createPageSource(
230227
outputColumns,
231228
domainPredicate,
232229
remainingPredicate,
233-
useOrcColumnNames,
230+
isUseOrcColumnNames(session),
234231
hiveStorageTimeZone,
235232
typeManager,
236233
functionResolution,

0 commit comments

Comments
 (0)