Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 90 additions & 12 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
<dep.pinot.version>0.9.3</dep.pinot.version>
<dep.druid.version>0.19.0</dep.druid.version>
<dep.jaxb.version>2.3.1</dep.jaxb.version>
<dep.hudi.version>0.10.1</dep.hudi.version>
<dep.hudi.version>0.9.0</dep.hudi.version>
<dep.testcontainers.version>1.15.1</dep.testcontainers.version>
<!--
America/Bahia_Banderas has:
Expand Down Expand Up @@ -1207,31 +1207,109 @@
</exclusions>
</dependency>

<!-- Start dependencies for querying Hudi table-->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-presto-bundle</artifactId>
<artifactId>hudi-common</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
</exclusion>
<exclusion>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
</exclusion>
<exclusion>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr-bundle</artifactId>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- End dependencies for querying Hudi table-->

<dependency>
<groupId>net.sf.opencsv</groupId>
Expand Down
7 changes: 6 additions & 1 deletion presto-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-presto-bundle</artifactId>
<artifactId>hudi-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr</artifactId>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.google.common.collect.ImmutableSet;
import io.airlift.units.Duration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.weakref.jmx.Managed;

import javax.inject.Inject;
Expand Down Expand Up @@ -73,14 +74,15 @@ public Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext)
{
List<HiveFileInfo> files = cache.getIfPresent(path);
if (files != null) {
return files.iterator();
}

Iterator<HiveFileInfo> iterator = delegate.list(fileSystem, table, path, namenodeStats, hiveDirectoryContext);
Iterator<HiveFileInfo> iterator = delegate.list(fileSystem, table, path, namenodeStats, pathFilter, hiveDirectoryContext);
if (hiveDirectoryContext.isCacheable() && cachedTableChecker.isCachedTable(table.getSchemaTableName())) {
return cachingIterator(iterator, path);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.facebook.presto.hive.filesystem.ExtendedFileSystem;
import com.facebook.presto.hive.metastore.Table;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

import java.util.Iterator;

Expand All @@ -26,5 +27,6 @@ Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext);
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.facebook.presto.hive.util.HiveFileIterator;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;

import java.io.IOException;
Expand All @@ -36,13 +37,15 @@ public Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext)
{
return new HiveFileIterator(
path,
p -> new HadoopFileInfoIterator(fileSystem.listLocatedStatus(p)),
namenodeStats,
hiveDirectoryContext.getNestedDirectoryPolicy());
hiveDirectoryContext.getNestedDirectoryPolicy(),
pathFilter);
}

public static class HadoopFileInfoIterator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ public class HiveClientConfig

private boolean verboseRuntimeStatsEnabled;
private boolean useRecordPageSourceForCustomSplit = true;
private boolean hudiMetadataEnabled;

private boolean sizeBasedSplitWeightsEnabled = true;
private double minimumAssignedSplitWeight = 0.05;
Expand Down Expand Up @@ -1787,17 +1786,4 @@ public HiveClientConfig setFileSplittable(boolean fileSplittable)
this.fileSplittable = fileSplittable;
return this;
}

@Config("hive.hudi-metadata-enabled")
@ConfigDescription("For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage")
public HiveClientConfig setHudiMetadataEnabled(boolean hudiMetadataEnabled)
{
this.hudiMetadataEnabled = hudiMetadataEnabled;
return this;
}

public boolean isHudiMetadataEnabled()
{
return this.hudiMetadataEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ public final class HiveSessionProperties
private static final String USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT = "use_record_page_source_for_custom_split";
public static final String MAX_INITIAL_SPLITS = "max_initial_splits";
public static final String FILE_SPLITTABLE = "file_splittable";
private static final String HUDI_METADATA_ENABLED = "hudi_metadata_enabled";

private final List<PropertyMetadata<?>> sessionProperties;

Expand Down Expand Up @@ -684,12 +683,7 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
FILE_SPLITTABLE,
"If a hive file is splittable when coordinator schedules splits",
hiveClientConfig.isFileSplittable(),
true),
booleanProperty(
HUDI_METADATA_ENABLED,
"For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage",
hiveClientConfig.isHudiMetadataEnabled(),
false));
true));
}

public List<PropertyMetadata<?>> getSessionProperties()
Expand Down Expand Up @@ -1193,9 +1187,4 @@ public static boolean isFileSplittable(ConnectorSession session)
{
return session.getProperty(FILE_SPLITTABLE, Boolean.class);
}

public static boolean isHudiMetadataEnabled(ConnectorSession session)
{
return session.getProperty(HUDI_METADATA_ENABLED, Boolean.class);
}
}
22 changes: 9 additions & 13 deletions presto-hive/src/main/java/com/facebook/presto/hive/HiveUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -385,33 +385,29 @@ static boolean shouldUseRecordReaderFromInputFormat(Configuration configuration,
.anyMatch(USE_RECORD_READER_FROM_INPUT_FORMAT_ANNOTATION::equals);
}

static boolean shouldUseFileSplitsFromInputFormat(InputFormat<?, ?> inputFormat, DirectoryLister directoryLister)
static boolean shouldUseFileSplitsFromInputFormat(InputFormat<?, ?> inputFormat, Configuration conf, String tablePath)
{
if (directoryLister instanceof HudiDirectoryLister) {
boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
.map(Annotation::annotationType)
.map(Class::getSimpleName)
.anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);

return hasUseSplitsAnnotation &&
(!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, ((HudiDirectoryLister) directoryLister).getMetaClient()));
}
boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
.map(Annotation::annotationType)
.map(Class::getSimpleName)
.anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);

return false;
return hasUseSplitsAnnotation && (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, conf, tablePath));
}

static boolean isHudiParquetInputFormat(InputFormat<?, ?> inputFormat)
{
return inputFormat instanceof HoodieParquetInputFormat;
}

private static boolean shouldUseFileSplitsForHudi(InputFormat<?, ?> inputFormat, HoodieTableMetaClient metaClient)
private static boolean shouldUseFileSplitsForHudi(InputFormat<?, ?> inputFormat, Configuration conf, String tablePath)
{
if (inputFormat instanceof HoodieParquetRealtimeInputFormat) {
return true;
}

return metaClient.getTableConfig().getBootstrapBasePath().isPresent();
HoodieTableMetaClient hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(conf).setBasePath(tablePath).build();
return hoodieTableMetaClient.getTableConfig().getBootstrapBasePath().isPresent();
}

public static long parseHiveDate(String value)
Expand Down
Loading