Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3d19618
HUDI-1827 - Add ORC support to Bootstrap Op
manasaks Jul 6, 2021
bc7d0fd
Merge remote-tracking branch 'upstream/master'
manasaks Jul 6, 2021
24891c7
Modified to support orcin bootstrap oper
manasaks Jul 6, 2021
1ef7dcc
Support bootstrap oper
manasaks Jul 10, 2021
4fa3e4e
Merge branch 'apache:master' into master
manasaks Jul 10, 2021
6e92bbf
Merge branch 'master' of https://github.com/manasaks/hudi
manasaks Jul 10, 2021
ce5e820
Delete Test Files
manasaks Jul 10, 2021
f533ba2
Remove unused imports
manasaks Jul 10, 2021
d3c215f
Merge branch 'apache:master' into master
manasaks Jul 13, 2021
851081d
Merge branch 'apache:master' into master
manasaks Jul 14, 2021
9489532
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 14, 2021
bb85d2f
Merge branch 'master' of https://github.com/manasaks/hudi
manasaks Jul 14, 2021
5ebb411
Added test cases to test ORC bootstrap functionality
manasaks Jul 14, 2021
5a5c1ad
Merge branch 'apache:master' into master
manasaks Jul 14, 2021
b6f9e81
Merge branch 'apache:master' into master
manasaks Jul 14, 2021
ce6aebb
Merge branch 'apache:master' into master
manasaks Jul 15, 2021
54e18d6
Merge branch 'apache:master' into master
manasaks Jul 15, 2021
82fd3c3
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 15, 2021
9507664
Merge branch 'master' of https://github.com/manasaks/hudi
manasaks Jul 15, 2021
015206e
Merge branch 'apache:master' into master
manasaks Jul 15, 2021
624c7a8
Merge remote-tracking branch 'origin/master'
manasaks Jul 15, 2021
a7c392c
Merge branch 'apache:master' into master
manasaks Jul 19, 2021
9e4ade0
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 19, 2021
06140ae
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 19, 2021
119b4ac
Merge branch 'master' of https://github.com/manasaks/hudi
manasaks Jul 19, 2021
3c64539
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 21, 2021
1721e3a
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 26, 2021
33bd14e
Merge branch 'master' of https://github.com/apache/hudi
manasaks Jul 29, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .codecov.yml → .codecov.yml.broken
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ coverage:
precision: 2
round: down
range: "50...100"
status:
project: # settings affecting project coverage
enabled: no

# do not run coverage on patch nor changes
patch: no
changes: no

# Ignoring Paths
# --------------
Expand All @@ -54,7 +47,14 @@ ignore:
- "hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java"
- "hudi-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java"

comment: false
comment:
layout: "reach, diff, flags, files"
behavior: default
require_changes: false # if true: only post the comment if coverage changes
require_base: no # [yes :: must have a base report to post]
require_head: no # [yes :: must have a head report to post]
branches: # https://docs.codecov.io/docs/pull-request-comments#branches
- "master"

flags:
hudicli:
Expand Down
1 change: 0 additions & 1 deletion hudi-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
<args>
<arg>-nobootcp</arg>
</args>
<checkMultipleScalaVersions>false</checkMultipleScalaVersions>
</configuration>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
Expand Down Expand Up @@ -212,8 +211,6 @@ public String showLogFileRecords(
.withMaxMemorySizeInBytes(
HoodieMemoryConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES)
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH_PROP.defaultValue())
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
.build();
for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) {
Option<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ object SparkHelpers {
val schema: Schema = sourceRecords.get(0).getSchema
val filter: BloomFilter = BloomFilterFactory.createBloomFilter(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES.defaultValue.toInt, HoodieIndexConfig.BLOOM_FILTER_FPP.defaultValue.toDouble,
HoodieIndexConfig.HOODIE_BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.defaultValue.toInt, HoodieIndexConfig.BLOOM_INDEX_FILTER_TYPE.defaultValue);
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(fs.getConf).convert(schema), schema, org.apache.hudi.common.util.Option.of(filter))
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter(fs.getConf).convert(schema), schema, filter)
val parquetConfig: HoodieAvroParquetConfig = new HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP, HoodieStorageConfig.PARQUET_BLOCK_SIZE_BYTES.defaultValue.toInt, HoodieStorageConfig.PARQUET_PAGE_SIZE_BYTES.defaultValue.toInt, HoodieStorageConfig.PARQUET_FILE_MAX_BYTES.defaultValue.toInt, fs.getConf, HoodieStorageConfig.PARQUET_COMPRESSION_RATIO.defaultValue.toDouble)

// Add current classLoad for config, if not will throw classNotFound of 'HoodieWrapperFileSystem'.
parquetConfig.getHadoopConf().setClassLoader(Thread.currentThread.getContextClassLoader)

val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](instantTime, destinationFile, parquetConfig, schema, new SparkTaskContextSupplier(),
true)
val writer = new HoodieParquetWriter[HoodieJsonPayload, IndexedRecord](instantTime, destinationFile, parquetConfig, schema, new SparkTaskContextSupplier())
for (rec <- sourceRecords) {
val key: String = rec.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString
if (!keysToSkip.contains(key)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.hudi.cli.TableHeader;
import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
Expand Down Expand Up @@ -214,8 +213,6 @@ public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedExc
HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLED_PROP.defaultValue()))
.withBufferSize(HoodieMemoryConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP.defaultValue())
.withSpillableMapBasePath(HoodieMemoryConfig.SPILLABLE_MAP_BASE_PATH_PROP.defaultValue())
.withDiskMapType(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.defaultValue())
.withBitCaskDiskMapCompressionEnabled(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())
.build();

Iterator<HoodieRecord<? extends HoodieRecordPayload>> records = scanner.iterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig
public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig,
Option<EmbeddedTimelineService> timelineService) {
super(context, writeConfig, timelineService);
this.metrics = new HoodieMetrics(config);
this.metrics = new HoodieMetrics(config, config.getTableName());
this.index = createIndex(writeConfig);
this.txnManager = new TransactionManager(config, fs);
}
Expand Down Expand Up @@ -172,11 +172,6 @@ public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Opti

public boolean commitStats(String instantTime, List<HoodieWriteStat> stats, Option<Map<String, String>> extraMetadata,
String commitActionType, Map<String, List<String>> partitionToReplaceFileIds) {
// Skip the empty commit if not allowed
if (!config.allowEmptyCommit() && stats.isEmpty()) {
return true;
}
LOG.info("Committing " + instantTime + " action " + commitActionType);
// Create a Hoodie table which encapsulated the commits and files visible
HoodieTable table = createTable(config, hadoopConf);
HoodieCommitMetadata metadata = CommitUtils.buildMetadata(stats, partitionToReplaceFileIds,
Expand Down Expand Up @@ -678,7 +673,8 @@ public String startCommit() {
}

/**
* Provides a new commit time for a write operation (insert/update/delete/insert_overwrite/insert_overwrite_table) without specified action.
* Provides a new commit time for a write operation (insert/update/delete).
*
* @param instantTime Instant time to be generated
*/
public void startCommitWithTime(String instantTime) {
Expand All @@ -687,7 +683,7 @@ public void startCommitWithTime(String instantTime) {
}

/**
* Completes a new commit time for a write operation (insert/update/delete/insert_overwrite/insert_overwrite_table) with specified action.
* Completes a new commit time for a write operation (insert/update/delete) with specified action.
*/
public void startCommitWithTime(String instantTime, String actionType) {
HoodieTableMetaClient metaClient = createMetaClient(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ public void markSuccess(String recordKey) {
totalRecords++;
}

public void markSuccess() {
totalRecords++;
}

public void markFailure(String recordKey, Throwable t) {
if (failedRecordKeys.isEmpty() || (random.nextDouble() <= failureFraction)) {
failedRecordKeys.add(Pair.of(recordKey, t));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ private static EmbeddedTimelineService startTimelineService(
Option<String> hostAddr = context.getProperty(EngineProperty.EMBEDDED_SERVER_HOST);
EmbeddedTimelineService timelineService = new EmbeddedTimelineService(
context, hostAddr.orElse(null),config.getEmbeddedTimelineServerPort(),
config.getMetadataConfig(), config.getCommonConfig(),
config.getClientSpecifiedViewStorageConfig(), config.getBasePath(),
config.getMetadataConfig(), config.getClientSpecifiedViewStorageConfig(), config.getBasePath(),
config.getEmbeddedTimelineServerThreads(), config.getEmbeddedTimelineServerCompressOutput(),
config.getEmbeddedTimelineServerUseAsync());
timelineService.startServer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.apache.hudi.client.embedded;

import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.config.SerializableConfiguration;
Expand Down Expand Up @@ -47,7 +46,6 @@ public class EmbeddedTimelineService {
private final SerializableConfiguration hadoopConf;
private final FileSystemViewStorageConfig config;
private final HoodieMetadataConfig metadataConfig;
private final HoodieCommonConfig commonConfig;
private final String basePath;

private final int numThreads;
Expand All @@ -57,14 +55,13 @@ public class EmbeddedTimelineService {
private transient TimelineService server;

public EmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, int embeddedTimelineServerPort,
HoodieMetadataConfig metadataConfig, HoodieCommonConfig commonConfig, FileSystemViewStorageConfig config, String basePath,
HoodieMetadataConfig metadataConfig, FileSystemViewStorageConfig config, String basePath,
int numThreads, boolean compressOutput, boolean useAsync) {
setHostAddr(embeddedTimelineServiceHostAddr);
this.context = context;
this.config = config;
this.basePath = basePath;
this.metadataConfig = metadataConfig;
this.commonConfig = commonConfig;
this.hadoopConf = context.getHadoopConf();
this.viewManager = createViewManager();
this.preferredPort = embeddedTimelineServerPort;
Expand All @@ -83,7 +80,7 @@ private FileSystemViewManager createViewManager() {
// Reset to default if set to Remote
builder.withStorageType(FileSystemViewStorageType.MEMORY);
}
return FileSystemViewManager.createViewManager(context, metadataConfig, builder.build(), commonConfig, basePath);
return FileSystemViewManager.createViewManager(context, metadataConfig, builder.build(), basePath);
}

public void startServer() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector;
import org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator;
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.keygen.constant.KeyGeneratorType;

Expand All @@ -37,11 +36,6 @@
/**
* Bootstrap specific configs.
*/
@ConfigClassProperty(name = "Bootstrap Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control how you want to bootstrap your existing tables for the first time into hudi. "
+ "The bootstrap operation can flexibly avoid copying data over before you can use Hudi and support running the existing "
+ " writers and new hudi writers in parallel, to validate the migration.")
public class HoodieBootstrapConfig extends HoodieConfig {

public static final ConfigProperty<String> BOOTSTRAP_BASE_PATH_PROP = ConfigProperty
Expand Down Expand Up @@ -106,6 +100,12 @@ public class HoodieBootstrapConfig extends HoodieConfig {
.sinceVersion("0.6.0")
.withDocumentation("Implementation to use, for mapping a skeleton base file to a boostrap base file.");

public static final ConfigProperty<HoodieFileFormat> HOODIE_BASE_FILE_FORMAT_PROP = ConfigProperty
.key("hoodie.table.base.file.format")
.defaultValue(HoodieFileFormat.PARQUET)
.withAlternatives("hoodie.table.ro.file.format")
.withDocumentation("");

private HoodieBootstrapConfig() {
super();
}
Expand Down Expand Up @@ -171,6 +171,11 @@ public Builder withBootstrapModeForRegexMatch(BootstrapMode modeForRegexMatch) {
return this;
}

public Builder withBaseFileFormat(HoodieFileFormat fileFormat) {
bootstrapConfig.setDefaultValue(HOODIE_BASE_FILE_FORMAT_PROP, fileFormat);
return this;
}

public Builder fromProperties(Properties props) {
this.bootstrapConfig.getProps().putAll(props);
return this;
Expand All @@ -184,4 +189,4 @@ public HoodieBootstrapConfig build() {
return bootstrapConfig;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;

Expand All @@ -31,10 +29,6 @@
/**
* Clustering specific configs.
*/
@ConfigClassProperty(name = "Clustering Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control the clustering table service in hudi, "
+ "which optimizes the storage layout for better query performance by sorting and sizing data files.")
public class HoodieClusteringConfig extends HoodieConfig {

// Any strategy specific params can be saved with this prefix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
Expand All @@ -43,11 +41,6 @@
* Compaction related config.
*/
@Immutable
@ConfigClassProperty(name = "Compaction Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control compaction "
+ "(merging of log files onto a new base files) as well as "
+ "cleaning (reclamation of older/unused file groups/slices).")
public class HoodieCompactionConfig extends HoodieConfig {

public static final ConfigProperty<String> AUTO_CLEAN_PROP = ConfigProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.index.hbase.DefaultHBaseQPSResourceAllocator;
Expand All @@ -29,11 +27,6 @@
import java.io.IOException;
import java.util.Properties;

@ConfigClassProperty(name = "HBase Index Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control indexing behavior "
+ "(when HBase based indexing is enabled), which tags incoming "
+ "records as either inserts or updates to older records.")
public class HoodieHBaseIndexConfig extends HoodieConfig {

public static final ConfigProperty<String> HBASE_ZKQUORUM_PROP = ConfigProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
package org.apache.hudi.config;

import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.engine.EngineType;
Expand All @@ -38,10 +36,6 @@
* Indexing related config.
*/
@Immutable
@ConfigClassProperty(name = "Index Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control indexing behavior, "
+ "which tags incoming records as either inserts or updates to older records.")
public class HoodieIndexConfig extends HoodieConfig {

public static final ConfigProperty<String> INDEX_TYPE_PROP = ConfigProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
import org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy;
import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.lock.LockProvider;
Expand Down Expand Up @@ -57,11 +55,6 @@
/**
* Hoodie Configs for Locks.
*/
@ConfigClassProperty(name = "Locks Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configs that control locking mechanisms required for concurrency control "
+ " between writers to a Hudi table. Concurrency between Hudi's own table services "
+ " are auto managed internally.")
public class HoodieLockConfig extends HoodieConfig {

public static final ConfigProperty<String> LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP = ConfigProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;

Expand All @@ -34,10 +32,6 @@
* Memory related config.
*/
@Immutable
@ConfigClassProperty(name = "Memory Configurations",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Controls memory usage for compaction "
+ "and merges, performed internally by Hudi.")
public class HoodieMemoryConfig extends HoodieConfig {

// Default max memory fraction during hash-merge, excess spills to disk
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hudi.config;

import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.metrics.MetricsReporterType;
Expand All @@ -35,10 +33,6 @@
* Fetch the configurations used by the Metrics system.
*/
@Immutable
@ConfigClassProperty(name = "Metrics Configurations",
groupName = ConfigGroups.Names.METRICS,
description = "Enables reporting on Hudi metrics. Hudi publishes metrics on "
+ "every commit, clean, rollback etc. The following sections list the supported reporters.")
public class HoodieMetricsConfig extends HoodieConfig {

public static final String METRIC_PREFIX = "hoodie.metrics";
Expand Down
Loading