Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class HoodieCLI {
* Enum for CLI state.
*/
public enum CLIState {
INIT, DATASET, SYNC
INIT, TABLE, SYNC
}

public static void setConsistencyGuardConfig(ConsistencyGuardConfig config) {
Expand Down Expand Up @@ -100,7 +100,7 @@ public static void connectTo(String basePath, Integer layoutVersion) {
*/
public static HoodieTableMetaClient getTableMetaClient() {
if (tableMetadata == null) {
throw new NullPointerException("There is no hudi dataset. Please use connect command to set dataset first");
throw new NullPointerException("There is no hudi table. Please use connect command to set table first");
}
return tableMetadata;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public String getPrompt() {
switch (HoodieCLI.state) {
case INIT:
return "hudi->";
case DATASET:
case TABLE:
return "hudi:" + tableName + "->";
case SYNC:
return "hudi:" + tableName + " <==> " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ public String showCommitFiles(@CliOption(key = {"commit"}, help = "Commit to sho
return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}

@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie table")
public String compareCommits(@CliOption(key = {"path"}, help = "Path of the table to compare to") final String path)
throws Exception {

HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();
Expand All @@ -234,8 +234,8 @@ public String compareCommits(@CliOption(key = {"path"}, help = "Path of the data
}
}

@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
public String syncCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path) {
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie table")
public String syncCommits(@CliOption(key = {"path"}, help = "Path of the table to compare to") final String path) {
HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.conf, path);
HoodieCLI.state = HoodieCLI.CLIState.SYNC;
return "Load sync state between " + HoodieCLI.getTableMetaClient().getTableConfig().getTableName() + " and "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public String compactionsAll(
@CliCommand(value = "compaction show", help = "Shows compaction details for a specific compaction instant")
public String compactionShow(
@CliOption(key = "instant", mandatory = true,
help = "Base path for the target hoodie dataset") final String compactionInstantTime,
help = "Base path for the target hoodie table") final String compactionInstantTime,
@CliOption(key = {"limit"}, help = "Limit commits",
unspecifiedDefaultValue = "-1") final Integer limit,
@CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
Expand Down Expand Up @@ -212,7 +212,7 @@ public String compact(
@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
help = "Spark executor memory") final String sparkMemory,
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,
@CliOption(key = "compactionInstant", help = "Base path for the target hoodie dataset") String compactionInstantTime,
@CliOption(key = "compactionInstant", help = "Base path for the target hoodie table") String compactionInstantTime,
@CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
unspecifiedDefaultValue = "") final String propsFilePath,
@CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
Expand Down Expand Up @@ -471,7 +471,7 @@ private String getRenamesToBePrinted(List<RenameOpResult> res, Integer limit, St
if (result.get()) {
System.out.println("All renames successfully completed to " + operation + " done !!");
} else {
System.out.println("Some renames failed. DataSet could be in inconsistent-state. Try running compaction repair");
System.out.println("Some renames failed. table could be in inconsistent-state. Try running compaction repair");
}

List<Comparable[]> rows = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@
import scala.collection.JavaConverters;

/**
* CLI command for importing parquet dataset to hudi dataset.
* CLI command for importing parquet table to hudi table.
*/
@Component
public class HDFSParquetImportCommand implements CommandMarker {

@CliCommand(value = "hdfsparquetimport", help = "Imports Parquet dataset to a hoodie dataset")
@CliCommand(value = "hdfsparquetimport", help = "Imports Parquet table to a hoodie table")
public String convert(
@CliOption(key = "upsert", unspecifiedDefaultValue = "false",
help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
@CliOption(key = "srcPath", mandatory = true, help = "Base path for the input table") final String srcPath,
@CliOption(key = "targetPath", mandatory = true,
help = "Base path for the target hoodie dataset") final String targetPath,
help = "Base path for the target hoodie table") final String targetPath,
@CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
@CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
@CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
Expand Down Expand Up @@ -85,8 +85,8 @@ public String convert(
InputStreamConsumer.captureOutput(process);
int exitCode = process.waitFor();
if (exitCode != 0) {
return "Failed to import dataset to hoodie format";
return "Failed to import table to hoodie format";
}
return "Dataset imported to hoodie format";
return "Table imported to hoodie format";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public String deduplicate(
return "Deduplication failed ";
}

@CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
@CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a table, if not present")
public String addPartitionMeta(
@CliOption(key = {"dryrun"}, help = "Should we actually add or just print what would be done",
unspecifiedDefaultValue = "true") final boolean dryRun)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.ConsistencyGuardConfig;
import org.apache.hudi.exception.DatasetNotFoundException;
import org.apache.hudi.exception.TableNotFoundException;

import org.springframework.shell.core.CommandMarker;
import org.springframework.shell.core.annotation.CliCommand;
Expand All @@ -37,18 +37,18 @@
import java.util.List;

/**
* CLI command to display hudi dataset options.
* CLI command to display hudi table options.
*/
@Component
public class DatasetsCommand implements CommandMarker {
public class TableCommand implements CommandMarker {

static {
System.out.println("DatasetsCommand getting loaded");
System.out.println("Table command getting loaded");
}

@CliCommand(value = "connect", help = "Connect to a hoodie dataset")
@CliCommand(value = "connect", help = "Connect to a hoodie table")
public String connect(
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path,
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the table") final String path,
@CliOption(key = {"layoutVersion"}, help = "Timeline Layout version") Integer layoutVersion,
@CliOption(key = {"eventuallyConsistent"}, unspecifiedDefaultValue = "false",
help = "Enable eventual consistency") final boolean eventuallyConsistent,
Expand All @@ -67,7 +67,7 @@ public String connect(
HoodieCLI.initConf();
HoodieCLI.connectTo(path, layoutVersion);
HoodieCLI.initFS(true);
HoodieCLI.state = HoodieCLI.CLIState.DATASET;
HoodieCLI.state = HoodieCLI.CLIState.TABLE;
return "Metadata for table " + HoodieCLI.getTableMetaClient().getTableConfig().getTableName() + " loaded";
}

Expand All @@ -81,7 +81,7 @@ public String connect(
*/
@CliCommand(value = "create", help = "Create a hoodie table if not present")
public String createTable(
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path,
@CliOption(key = {"path"}, mandatory = true, help = "Base Path of the table") final String path,
@CliOption(key = {"tableName"}, mandatory = true, help = "Hoodie Table Name") final String name,
@CliOption(key = {"tableType"}, unspecifiedDefaultValue = "COPY_ON_WRITE",
help = "Hoodie Table Type. Must be one of : COPY_ON_WRITE or MERGE_ON_READ") final String tableTypeStr,
Expand All @@ -98,13 +98,13 @@ public String createTable(
try {
new HoodieTableMetaClient(HoodieCLI.conf, path);
existing = true;
} catch (DatasetNotFoundException dfe) {
} catch (TableNotFoundException dfe) {
// expected
}

// Do not touch table that already exist
if (existing) {
throw new IllegalStateException("Dataset already existing in path : " + path);
throw new IllegalStateException("Table already existing in path : " + path);
}

final HoodieTableType tableType = HoodieTableType.valueOf(tableTypeStr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo

/**
* TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple
* basepath pointing to the dataset. Until, then just always assume a BloomIndex
* basepath pointing to the table. Until, then just always assume a BloomIndex
*/
private final transient HoodieIndex<T> index;
private final HoodieTimeline commitTimeline;
private HoodieTable hoodieTable;
private transient Option<SQLContext> sqlContextOpt;

/**
* @param basePath path to Hoodie dataset
* @param basePath path to Hoodie table
*/
public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTimelineService> timelineService) {
this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath)
Expand All @@ -80,7 +80,7 @@ public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTi
}

/**
* @param basePath path to Hoodie dataset
* @param basePath path to Hoodie table
*/
public HoodieReadClient(JavaSparkContext jsc, String basePath) {
this(jsc, basePath, Option.empty());
Expand Down
20 changes: 10 additions & 10 deletions hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@
import scala.Tuple2;

/**
* Hoodie Write Client helps you build datasets on HDFS [insert()] and then perform efficient mutations on an HDFS
* dataset [upsert()]
* Hoodie Write Client helps you build tables on HDFS [insert()] and then perform efficient mutations on an HDFS
* table [upsert()]
* <p>
* Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie dataset.
* Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie table.
*/
public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieWriteClient<T> {

Expand Down Expand Up @@ -242,7 +242,7 @@ public JavaRDD<WriteStatus> insertPreppedRecords(JavaRDD<HoodieRecord<T>> preppe

/**
* Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie
* table for the very first time (e.g: converting an existing dataset to Hoodie).
* table for the very first time (e.g: converting an existing table to Hoodie).
* <p>
* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
* the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}
Expand All @@ -257,7 +257,7 @@ public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final S

/**
* Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie
* table for the very first time (e.g: converting an existing dataset to Hoodie).
* table for the very first time (e.g: converting an existing table to Hoodie).
* <p>
* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
* the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}. Optionally
Expand Down Expand Up @@ -289,7 +289,7 @@ public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final S

/**
* Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie
* table for the very first time (e.g: converting an existing dataset to Hoodie). The input records should contain no
* table for the very first time (e.g: converting an existing table to Hoodie). The input records should contain no
* duplicates if needed.
* <p>
* This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
Expand Down Expand Up @@ -393,7 +393,7 @@ private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<H

/**
* Save the workload profile in an intermediate file (here re-using commit files) This is useful when performing
* rollback for MOR datasets. Only updates are recorded in the workload profile metadata since updates to log blocks
* rollback for MOR tables. Only updates are recorded in the workload profile metadata since updates to log blocks
* are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO :
* Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
*/
Expand Down Expand Up @@ -691,7 +691,7 @@ public boolean rollback(final String commitTime) throws HoodieRollbackException
}

/**
* NOTE : This action requires all writers (ingest and compact) to a dataset to be stopped before proceeding. Revert
* NOTE : This action requires all writers (ingest and compact) to a table to be stopped before proceeding. Revert
* the (inflight/committed) record changes for all commits after the provided @param. Three steps: (1) Atomically
* unpublish this commit (2) clean indexing data, (3) clean new generated parquet/log files and/or append rollback to
* existing log files. (4) Finally delete .commit, .inflight, .compaction.inflight or .compaction.requested file
Expand Down Expand Up @@ -765,7 +765,7 @@ private void finishRestore(final Timer.Context context, Map<String, List<HoodieR
AvroUtils.convertRestoreMetadata(startRestoreTime, durationInMs, commitsToRollback, commitToStats);
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRestoreTime),
AvroUtils.serializeRestoreMetadata(restoreMetadata));
LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored dataset to " + restoreToInstant);
LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored table to " + restoreToInstant);

if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
LOG.info("Cleaning up older restore meta files");
Expand Down Expand Up @@ -1108,7 +1108,7 @@ private HoodieCommitMetadata doCompactionCommit(HoodieTable<T> table, JavaRDD<Wr
}

/**
* Performs a compaction operation on a dataset, serially before or after an insert/upsert action.
* Performs a compaction operation on a table, serially before or after an insert/upsert action.
*/
private Option<String> forceCompact(Option<Map<String, String>> extraMetadata) throws IOException {
Option<String> compactionInstantTimeOpt = scheduleCompaction(extraMetadata);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ public boolean isImplicitWithStorage() {

/**
* For each incoming record, produce N output records, 1 each for each file against which the record's key needs to be
* checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
* checked. For tables, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
* to be compared gets cut down a lot from range pruning.
*
* Sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import scala.Tuple2;

/**
* This filter will only work with hoodie dataset since it will only load partitions with .hoodie_partition_metadata
* This filter will only work with hoodie table since it will only load partitions with .hoodie_partition_metadata
* file in it.
*/
public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends HoodieBloomIndex<T> {
Expand Down Expand Up @@ -71,7 +71,7 @@ List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitio

/**
* For each incoming record, produce N output records, 1 each for each file against which the record's key needs to be
* checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
* checked. For tables, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
* to be compared gets cut down a lot from range pruning.
* <p>
* Sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@
public class HoodieRealtimeTableCompactor implements HoodieCompactor {

private static final Logger LOG = LogManager.getLogger(HoodieRealtimeTableCompactor.class);
// Accumulator to keep track of total log files for a dataset
// Accumulator to keep track of total log files for a table
private AccumulatorV2<Long, Long> totalLogFiles;
// Accumulator to keep track of total log file slices for a dataset
// Accumulator to keep track of total log file slices for a table
private AccumulatorV2<Long, Long> totalFileSlices;

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

/**
* This strategy ensures that the last N partitions are picked up even if there are later partitions created for the
* dataset. lastNPartitions is defined as the N partitions before the currentDate. currentDay = 2018/01/01 The dataset
* table. lastNPartitions is defined as the N partitions before the currentDate. currentDay = 2018/01/01 The table
* has partitions for 2018/02/02 and 2018/03/03 beyond the currentDay This strategy will pick up the following
* partitions for compaction : (2018/01/01, allPartitionsInRange[(2018/01/01 - lastNPartitions) to 2018/01/01),
* 2018/02/02, 2018/03/03)
Expand Down
2 changes: 1 addition & 1 deletion hudi-client/src/test/java/HoodieClientExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void run() throws Exception {
client.delete(deleteRecords, newCommitTime);

/**
* Schedule a compaction and also perform compaction on a MOR dataset
* Schedule a compaction and also perform compaction on a MOR table
*/
if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
Option<String> instant = client.scheduleCompaction(Option.empty());
Expand Down
Loading