Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 12 additions & 90 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
<dep.pinot.version>0.9.3</dep.pinot.version>
<dep.druid.version>0.19.0</dep.druid.version>
<dep.jaxb.version>2.3.1</dep.jaxb.version>
<dep.hudi.version>0.9.0</dep.hudi.version>
<dep.hudi.version>0.10.1</dep.hudi.version>
<dep.testcontainers.version>1.15.1</dep.testcontainers.version>
<!--
America/Bahia_Banderas has:
Expand Down Expand Up @@ -1206,109 +1206,31 @@
</exclusions>
</dependency>

<!-- Start dependencies for querying Hudi table-->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<artifactId>hudi-presto-bundle</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
</exclusion>
<exclusion>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
</exclusion>
<exclusion>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr</artifactId>
<version>${dep.hudi.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</exclusion>
<exclusion>
<groupId>org.rocksdb</groupId>
<artifactId>rocksdbjni</artifactId>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
</exclusion>
<exclusion>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr-bundle</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- End dependencies for querying Hudi table-->

<dependency>
<groupId>net.sf.opencsv</groupId>
Expand Down
7 changes: 1 addition & 6 deletions presto-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,7 @@

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr</artifactId>
<artifactId>hudi-presto-bundle</artifactId>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import com.google.common.collect.ImmutableSet;
import io.airlift.units.Duration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.weakref.jmx.Managed;

import javax.inject.Inject;
Expand Down Expand Up @@ -74,15 +73,14 @@ public Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext)
{
List<HiveFileInfo> files = cache.getIfPresent(path);
if (files != null) {
return files.iterator();
}

Iterator<HiveFileInfo> iterator = delegate.list(fileSystem, table, path, namenodeStats, pathFilter, hiveDirectoryContext);
Iterator<HiveFileInfo> iterator = delegate.list(fileSystem, table, path, namenodeStats, hiveDirectoryContext);
if (hiveDirectoryContext.isCacheable() && cachedTableChecker.isCachedTable(table.getSchemaTableName())) {
return cachingIterator(iterator, path);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import com.facebook.presto.hive.filesystem.ExtendedFileSystem;
import com.facebook.presto.hive.metastore.Table;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

import java.util.Iterator;

Expand All @@ -27,6 +26,5 @@ Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext);
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import com.facebook.presto.hive.util.HiveFileIterator;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;

import java.io.IOException;
Expand All @@ -37,15 +36,13 @@ public Iterator<HiveFileInfo> list(
Table table,
Path path,
NamenodeStats namenodeStats,
PathFilter pathFilter,
HiveDirectoryContext hiveDirectoryContext)
{
return new HiveFileIterator(
path,
p -> new HadoopFileInfoIterator(fileSystem.listLocatedStatus(p)),
namenodeStats,
hiveDirectoryContext.getNestedDirectoryPolicy(),
pathFilter);
hiveDirectoryContext.getNestedDirectoryPolicy());
}

public static class HadoopFileInfoIterator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ public class HiveClientConfig

private boolean verboseRuntimeStatsEnabled;
private boolean useRecordPageSourceForCustomSplit = true;
private boolean hudiMetadataEnabled;

private boolean sizeBasedSplitWeightsEnabled = true;
private double minimumAssignedSplitWeight = 0.05;
Expand Down Expand Up @@ -1786,4 +1787,17 @@ public HiveClientConfig setFileSplittable(boolean fileSplittable)
this.fileSplittable = fileSplittable;
return this;
}

@Config("hive.hudi-metadata-enabled")
@ConfigDescription("For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage")
public HiveClientConfig setHudiMetadataEnabled(boolean hudiMetadataEnabled)
{
this.hudiMetadataEnabled = hudiMetadataEnabled;
return this;
}

public boolean isHudiMetadataEnabled()
{
return this.hudiMetadataEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ public final class HiveSessionProperties
private static final String USE_RECORD_PAGE_SOURCE_FOR_CUSTOM_SPLIT = "use_record_page_source_for_custom_split";
public static final String MAX_INITIAL_SPLITS = "max_initial_splits";
public static final String FILE_SPLITTABLE = "file_splittable";
private static final String HUDI_METADATA_ENABLED = "hudi_metadata_enabled";

private final List<PropertyMetadata<?>> sessionProperties;

Expand Down Expand Up @@ -675,7 +676,12 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
FILE_SPLITTABLE,
"If a hive file is splittable when coordinator schedules splits",
hiveClientConfig.isFileSplittable(),
true));
true),
booleanProperty(
HUDI_METADATA_ENABLED,
"For Hudi tables prefer to fetch the list of file names, sizes and other metadata from the internal metadata table rather than storage",
hiveClientConfig.isHudiMetadataEnabled(),
false));
}

public List<PropertyMetadata<?>> getSessionProperties()
Expand Down Expand Up @@ -1170,4 +1176,9 @@ public static boolean isFileSplittable(ConnectorSession session)
{
return session.getProperty(FILE_SPLITTABLE, Boolean.class);
}

public static boolean isHudiMetadataEnabled(ConnectorSession session)
{
return session.getProperty(HUDI_METADATA_ENABLED, Boolean.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -385,29 +385,33 @@ static boolean shouldUseRecordReaderFromInputFormat(Configuration configuration,
.anyMatch(USE_RECORD_READER_FROM_INPUT_FORMAT_ANNOTATION::equals);
}

static boolean shouldUseFileSplitsFromInputFormat(InputFormat<?, ?> inputFormat, Configuration conf, String tablePath)
static boolean shouldUseFileSplitsFromInputFormat(InputFormat<?, ?> inputFormat, DirectoryLister directoryLister)
{
boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
.map(Annotation::annotationType)
.map(Class::getSimpleName)
.anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);
if (directoryLister instanceof HudiDirectoryLister) {
boolean hasUseSplitsAnnotation = Arrays.stream(inputFormat.getClass().getAnnotations())
.map(Annotation::annotationType)
.map(Class::getSimpleName)
.anyMatch(USE_FILE_SPLITS_FROM_INPUT_FORMAT_ANNOTATION::equals);

return hasUseSplitsAnnotation &&
(!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, ((HudiDirectoryLister) directoryLister).getMetaClient()));
}

return hasUseSplitsAnnotation && (!isHudiParquetInputFormat(inputFormat) || shouldUseFileSplitsForHudi(inputFormat, conf, tablePath));
return false;
}

static boolean isHudiParquetInputFormat(InputFormat<?, ?> inputFormat)
{
return inputFormat instanceof HoodieParquetInputFormat;
}

private static boolean shouldUseFileSplitsForHudi(InputFormat<?, ?> inputFormat, Configuration conf, String tablePath)
private static boolean shouldUseFileSplitsForHudi(InputFormat<?, ?> inputFormat, HoodieTableMetaClient metaClient)
{
if (inputFormat instanceof HoodieParquetRealtimeInputFormat) {
return true;
}

HoodieTableMetaClient hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(conf).setBasePath(tablePath).build();
return hoodieTableMetaClient.getTableConfig().getBootstrapBasePath().isPresent();
return metaClient.getTableConfig().getBootstrapBasePath().isPresent();
}

public static long parseHiveDate(String value)
Expand Down
Loading