[HUDI-508] Standardizing on "Table" instead of "Dataset" across code #1197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

vinothchandar merged 1 commit into apache:master from vinothchandar:hudi-334-terminlogy-cleanup

Jan 7, 2020

hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -48,7 +48,7 @@ public class HoodieCLI {
  
       * Enum for CLI state.

       */

      public enum CLIState {

        INIT, DATASET, SYNC

        INIT, TABLE, SYNC

      }

      public static void setConsistencyGuardConfig(ConsistencyGuardConfig config) {

    @@ -100,7 +100,7 @@ public static void connectTo(String basePath, Integer layoutVersion) {
  
       */

      public static HoodieTableMetaClient getTableMetaClient() {

        if (tableMetadata == null) {

          throw new NullPointerException("There is no hudi dataset. Please use connect command to set dataset first");

          throw new NullPointerException("There is no hudi table. Please use connect command to set table first");

        }

        return tableMetadata;

      }

hudi-cli/src/main/java/org/apache/hudi/cli/HoodiePrompt.java

-Original file line number
+Diff line change
@@ Expand Up / @@ -37,7 +37,7 @@ public String getPrompt() { @@
           switch (HoodieCLI.state) {
             case INIT:
               return "hudi->";
-            case DATASET:
+            case TABLE:
               return "hudi:" + tableName + "->";
             case SYNC:
               return "hudi:" + tableName + " <==> " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
@@ Expand Down @@

hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -206,8 +206,8 @@ public String showCommitFiles(@CliOption(key = {"commit"}, help = "Commit to sho
  
        return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);

      }

      @CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")

      public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path)

      @CliCommand(value = "commits compare", help = "Compare commits with another Hoodie table")

      public String compareCommits(@CliOption(key = {"path"}, help = "Path of the table to compare to") final String path)

          throws Exception {

        HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();

    @@ -234,8 +234,8 @@ public String compareCommits(@CliOption(key = {"path"}, help = "Path of the data
  
        }

      }

      @CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")

      public String syncCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path) {

      @CliCommand(value = "commits sync", help = "Compare commits with another Hoodie table")

      public String syncCommits(@CliOption(key = {"path"}, help = "Path of the table to compare to") final String path) {

        HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.conf, path);

        HoodieCLI.state = HoodieCLI.CLIState.SYNC;

        return "Load sync state between " + HoodieCLI.getTableMetaClient().getTableConfig().getTableName() + " and "

hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -145,7 +145,7 @@ public String compactionsAll(
  
      @CliCommand(value = "compaction show", help = "Shows compaction details for a specific compaction instant")

      public String compactionShow(

          @CliOption(key = "instant", mandatory = true,

              help = "Base path for the target hoodie dataset") final String compactionInstantTime,

              help = "Base path for the target hoodie table") final String compactionInstantTime,

          @CliOption(key = {"limit"}, help = "Limit commits",

              unspecifiedDefaultValue = "-1") final Integer limit,

          @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,

    @@ -212,7 +212,7 @@ public String compact(
  
          @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",

              help = "Spark executor memory") final String sparkMemory,

          @CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,

          @CliOption(key = "compactionInstant", help = "Base path for the target hoodie dataset") String compactionInstantTime,

          @CliOption(key = "compactionInstant", help = "Base path for the target hoodie table") String compactionInstantTime,

          @CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",

            unspecifiedDefaultValue = "") final String propsFilePath,

          @CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",

    @@ -471,7 +471,7 @@ private String getRenamesToBePrinted(List<RenameOpResult> res, Integer limit, St
  
          if (result.get()) {

            System.out.println("All renames successfully completed to " + operation + " done !!");

          } else {

            System.out.println("Some renames failed. DataSet could be in inconsistent-state. Try running compaction repair");

            System.out.println("Some renames failed. table could be in inconsistent-state. Try running compaction repair");

          }

          List<Comparable[]> rows = new ArrayList<>();

hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -35,18 +35,18 @@
  
    import scala.collection.JavaConverters;

    /**

     * CLI command for importing parquet dataset to hudi dataset.

     * CLI command for importing parquet table to hudi table.

     */

    @Component

    public class HDFSParquetImportCommand implements CommandMarker {

      @CliCommand(value = "hdfsparquetimport", help = "Imports Parquet dataset to a hoodie dataset")

      @CliCommand(value = "hdfsparquetimport", help = "Imports Parquet table to a hoodie table")

      public String convert(

          @CliOption(key = "upsert", unspecifiedDefaultValue = "false",

              help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,

          @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,

          @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input table") final String srcPath,

          @CliOption(key = "targetPath", mandatory = true,

              help = "Base path for the target hoodie dataset") final String targetPath,

              help = "Base path for the target hoodie table") final String targetPath,

          @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,

          @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,

          @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,

    @@ -85,8 +85,8 @@ public String convert(
  
        InputStreamConsumer.captureOutput(process);

        int exitCode = process.waitFor();

        if (exitCode != 0) {

          return "Failed to import dataset to hoodie format";

          return "Failed to import table to hoodie format";

        }

        return "Dataset imported to hoodie format";

        return "Table imported to hoodie format";

      }

    }

hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java

-Original file line number
+Diff line change
@@ Expand Up / @@ -65,7 +65,7 @@ public String deduplicate( @@
         return "Deduplication failed ";
       }
-      @CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
+      @CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a table, if not present")
       public String addPartitionMeta(
           @CliOption(key = {"dryrun"}, help = "Should we actually add or just print what would be done",
               unspecifiedDefaultValue = "true") final boolean dryRun)
@@ Expand Down @@

...he/hudi/cli/commands/DatasetsCommand.java → ...pache/hudi/cli/commands/TableCommand.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -24,7 +24,7 @@
  
    import org.apache.hudi.common.model.HoodieTableType;

    import org.apache.hudi.common.table.HoodieTableMetaClient;

    import org.apache.hudi.common.util.ConsistencyGuardConfig;

    import org.apache.hudi.exception.DatasetNotFoundException;

    import org.apache.hudi.exception.TableNotFoundException;

    import org.springframework.shell.core.CommandMarker;

    import org.springframework.shell.core.annotation.CliCommand;

    @@ -37,18 +37,18 @@
  
    import java.util.List;

    /**

     * CLI command to display hudi dataset options.

     * CLI command to display hudi table options.

     */

    @Component

    public class DatasetsCommand implements CommandMarker {

    public class TableCommand implements CommandMarker {

      static {

        System.out.println("DatasetsCommand getting loaded");

        System.out.println("Table command getting loaded");

      }

      @CliCommand(value = "connect", help = "Connect to a hoodie dataset")

      @CliCommand(value = "connect", help = "Connect to a hoodie table")

      public String connect(

          @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path,

          @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the table") final String path,

          @CliOption(key = {"layoutVersion"}, help = "Timeline Layout version") Integer layoutVersion,

          @CliOption(key = {"eventuallyConsistent"}, unspecifiedDefaultValue = "false",

              help = "Enable eventual consistency") final boolean eventuallyConsistent,

    @@ -67,7 +67,7 @@ public String connect(
  
        HoodieCLI.initConf();

        HoodieCLI.connectTo(path, layoutVersion);

        HoodieCLI.initFS(true);

        HoodieCLI.state = HoodieCLI.CLIState.DATASET;

        HoodieCLI.state = HoodieCLI.CLIState.TABLE;

        return "Metadata for table " + HoodieCLI.getTableMetaClient().getTableConfig().getTableName() + " loaded";

      }

    @@ -81,7 +81,7 @@ public String connect(
  
       */

      @CliCommand(value = "create", help = "Create a hoodie table if not present")

      public String createTable(

          @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path,

          @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the table") final String path,

          @CliOption(key = {"tableName"}, mandatory = true, help = "Hoodie Table Name") final String name,

          @CliOption(key = {"tableType"}, unspecifiedDefaultValue = "COPY_ON_WRITE",

              help = "Hoodie Table Type. Must be one of : COPY_ON_WRITE or MERGE_ON_READ") final String tableTypeStr,

    @@ -98,13 +98,13 @@ public String createTable(
  
        try {

          new HoodieTableMetaClient(HoodieCLI.conf, path);

          existing = true;

        } catch (DatasetNotFoundException dfe) {

        } catch (TableNotFoundException dfe) {

          // expected

        }

        // Do not touch table that already exist

        if (existing) {

          throw new IllegalStateException("Dataset already existing in path : " + path);

          throw new IllegalStateException("Table already existing in path : " + path);

        }

        final HoodieTableType tableType = HoodieTableType.valueOf(tableTypeStr);

hudi-client/src/main/java/org/apache/hudi/HoodieReadClient.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -62,15 +62,15 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  
      /**

       * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple

       * basepath pointing to the dataset. Until, then just always assume a BloomIndex

       * basepath pointing to the table. Until, then just always assume a BloomIndex

       */

      private final transient HoodieIndex<T> index;

      private final HoodieTimeline commitTimeline;

      private HoodieTable hoodieTable;

      private transient Option<SQLContext> sqlContextOpt;

      /**

       * @param basePath path to Hoodie dataset

       * @param basePath path to Hoodie table

       */

      public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTimelineService> timelineService) {

        this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath)

    @@ -80,7 +80,7 @@ public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTi
  
      }

      /**

       * @param basePath path to Hoodie dataset

       * @param basePath path to Hoodie table

       */

      public HoodieReadClient(JavaSparkContext jsc, String basePath) {

        this(jsc, basePath, Option.empty());

hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -85,10 +85,10 @@
  
    import scala.Tuple2;

    /**

     * Hoodie Write Client helps you build datasets on HDFS [insert()] and then perform efficient mutations on an HDFS

     * dataset [upsert()]

     * Hoodie Write Client helps you build tables on HDFS [insert()] and then perform efficient mutations on an HDFS

     * table [upsert()]

     * <p>

     * Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie dataset.

     * Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie table.

     */

    public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieWriteClient<T> {

    @@ -242,7 +242,7 @@ public JavaRDD<WriteStatus> insertPreppedRecords(JavaRDD<HoodieRecord<T>> preppe
  
      /**

       * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie

       * table for the very first time (e.g: converting an existing dataset to Hoodie).

       * table for the very first time (e.g: converting an existing table to Hoodie).

       * <p>

       * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control

       * the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}

    @@ -257,7 +257,7 @@ public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final S
  
      /**

       * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie

       * table for the very first time (e.g: converting an existing dataset to Hoodie).

       * table for the very first time (e.g: converting an existing table to Hoodie).

       * <p>

       * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control

       * the numbers of files with less memory compared to the {@link HoodieWriteClient#insert(JavaRDD, String)}. Optionally

    @@ -289,7 +289,7 @@ public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final S
  
      /**

       * Loads the given HoodieRecords, as inserts into the table. This is suitable for doing big bulk loads into a Hoodie

       * table for the very first time (e.g: converting an existing dataset to Hoodie). The input records should contain no

       * table for the very first time (e.g: converting an existing table to Hoodie). The input records should contain no

       * duplicates if needed.

       * <p>

       * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control

    @@ -393,7 +393,7 @@ private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<H
  
      /**

       * Save the workload profile in an intermediate file (here re-using commit files) This is useful when performing

       * rollback for MOR datasets. Only updates are recorded in the workload profile metadata since updates to log blocks

       * rollback for MOR tables. Only updates are recorded in the workload profile metadata since updates to log blocks

       * are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO :

       * Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata

       */

    @@ -691,7 +691,7 @@ public boolean rollback(final String commitTime) throws HoodieRollbackException
  
      }

      /**

       * NOTE : This action requires all writers (ingest and compact) to a dataset to be stopped before proceeding. Revert

       * NOTE : This action requires all writers (ingest and compact) to a table to be stopped before proceeding. Revert

       * the (inflight/committed) record changes for all commits after the provided @param. Three steps: (1) Atomically

       * unpublish this commit (2) clean indexing data, (3) clean new generated parquet/log files and/or append rollback to

       * existing log files. (4) Finally delete .commit, .inflight, .compaction.inflight or .compaction.requested file

    @@ -765,7 +765,7 @@ private void finishRestore(final Timer.Context context, Map<String, List<HoodieR
  
            AvroUtils.convertRestoreMetadata(startRestoreTime, durationInMs, commitsToRollback, commitToStats);

        table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRestoreTime),

            AvroUtils.serializeRestoreMetadata(restoreMetadata));

        LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored dataset to " + restoreToInstant);

        LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored table to " + restoreToInstant);

        if (!table.getActiveTimeline().getCleanerTimeline().empty()) {

          LOG.info("Cleaning up older restore meta files");

    @@ -1108,7 +1108,7 @@ private HoodieCommitMetadata doCompactionCommit(HoodieTable<T> table, JavaRDD<Wr
  
      }

      /**

       * Performs a compaction operation on a dataset, serially before or after an insert/upsert action.

       * Performs a compaction operation on a table, serially before or after an insert/upsert action.

       */

      private Option<String> forceCompact(Option<Map<String, String>> extraMetadata) throws IOException {

        Option<String> compactionInstantTimeOpt = scheduleCompaction(extraMetadata);

hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java

-Original file line number
+Diff line change
@@ Expand Up / @@ -302,7 +302,7 @@ public boolean isImplicitWithStorage() { @@
       /**
        * For each incoming record, produce N output records, 1 each for each file against which the record's key needs to be
-       * checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
+       * checked. For tables, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files
        * to be compared gets cut down a lot from range pruning.
        *
        * Sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on
@@ Expand Down @@

hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -43,7 +43,7 @@
  
    import scala.Tuple2;

    /**

     * This filter will only work with hoodie dataset since it will only load partitions with .hoodie_partition_metadata

     * This filter will only work with hoodie table since it will only load partitions with .hoodie_partition_metadata

     * file in it.

     */

    public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends HoodieBloomIndex<T> {

    @@ -71,7 +71,7 @@ List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitio
  
      /**

       * For each incoming record, produce N output records, 1 each for each file against which the record's key needs to be

       * checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files

       * checked. For tables, where the keys have a definite insert order (e.g: timestamp as prefix), the number of files

       * to be compared gets cut down a lot from range pruning.

       * <p>

       * Sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on

hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -75,9 +75,9 @@
  
    public class HoodieRealtimeTableCompactor implements HoodieCompactor {

      private static final Logger LOG = LogManager.getLogger(HoodieRealtimeTableCompactor.class);

      // Accumulator to keep track of total log files for a dataset

      // Accumulator to keep track of total log files for a table

      private AccumulatorV2<Long, Long> totalLogFiles;

      // Accumulator to keep track of total log file slices for a dataset

      // Accumulator to keep track of total log file slices for a table

      private AccumulatorV2<Long, Long> totalFileSlices;

      @Override

...ain/java/org/apache/hudi/io/compact/strategy/BoundedPartitionAwareCompactionStrategy.java

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -34,7 +34,7 @@
  
    /**

     * This strategy ensures that the last N partitions are picked up even if there are later partitions created for the

     * dataset. lastNPartitions is defined as the N partitions before the currentDate. currentDay = 2018/01/01 The dataset

     * table. lastNPartitions is defined as the N partitions before the currentDate. currentDay = 2018/01/01 The table

     * has partitions for 2018/02/02 and 2018/03/03 beyond the currentDay This strategy will pick up the following

     * partitions for compaction : (2018/01/01, allPartitionsInRange[(2018/01/01 - lastNPartitions) to 2018/01/01),

     * 2018/02/02, 2018/03/03)

hudi-client/src/test/java/HoodieClientExample.java

-Original file line number
+Diff line change
@@ Expand Up / @@ -132,7 +132,7 @@ public void run() throws Exception { @@
         client.delete(deleteRecords, newCommitTime);
         /**
-         * Schedule a compaction and also perform compaction on a MOR dataset
+         * Schedule a compaction and also perform compaction on a MOR table
          */
         if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
           Option<String> instant = client.scheduleCompaction(Option.empty());
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[HUDI-508] Standardizing on "Table" instead of "Dataset" across code #1197

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!