diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java index 53c6529b5c699..e4fa695ecef5b 100644 --- a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java +++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/DataHubSyncConfig.java @@ -30,6 +30,9 @@ import java.util.Properties; +import static org.apache.hudi.sync.datahub.config.HoodieDataHubDatasetIdentifier.DEFAULT_DATAHUB_ENV; +import static org.apache.hudi.sync.datahub.config.HoodieDataHubDatasetIdentifier.DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME; + public class DataHubSyncConfig extends HoodieSyncConfig { public static final ConfigProperty META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS = ConfigProperty @@ -52,6 +55,17 @@ public class DataHubSyncConfig extends HoodieSyncConfig { .noDefaultValue() .withDocumentation("Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs."); + public static final ConfigProperty META_SYNC_DATAHUB_DATAPLATFORM_NAME = ConfigProperty + .key("hoodie.meta.sync.datahub.dataplatform.name") + .defaultValue(DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME) + .withDocumentation("String used to represent Hudi when creating its corresponding DataPlatform entity " + + "within Datahub"); + + public static final ConfigProperty META_SYNC_DATAHUB_DATASET_ENV = ConfigProperty + .key("hoodie.meta.sync.datahub.dataset.env") + .defaultValue(DEFAULT_DATAHUB_ENV.name()) + .withDocumentation("Environment to use when pushing entities to Datahub"); + public final HoodieDataHubDatasetIdentifier datasetIdentifier; public DataHubSyncConfig(Properties props) { @@ -87,6 +101,13 @@ public static class DataHubSyncConfigParams { @Parameter(names = {"--emitter-supplier-class"}, description = "Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs.") public String emitterSupplierClass; + @Parameter(names = {"--data-platform-name"}, description = "String used to represent Hudi when creating its " + + "corresponding DataPlatform entity within Datahub") + public String dataPlatformName; + + @Parameter(names = {"--dataset-env"}, description = "Which Datahub Environment to use when pushing entities") + public String datasetEnv; + public boolean isHelp() { return hoodieSyncConfigParams.isHelp(); } @@ -97,6 +118,8 @@ public Properties toProps() { props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_SERVER.key(), emitterServer); props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_TOKEN.key(), emitterToken); props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_SUPPLIER_CLASS.key(), emitterSupplierClass); + props.setPropertyIfNonNull(META_SYNC_DATAHUB_DATAPLATFORM_NAME.key(), dataPlatformName); + props.setPropertyIfNonNull(META_SYNC_DATAHUB_DATASET_ENV.key(), datasetEnv); return props; } } diff --git a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java index c7e121ea0f001..6c8ea076ffc0d 100644 --- a/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java +++ b/hudi-sync/hudi-datahub-sync/src/main/java/org/apache/hudi/sync/datahub/config/HoodieDataHubDatasetIdentifier.java @@ -27,6 +27,8 @@ import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME; +import static org.apache.hudi.sync.datahub.config.DataHubSyncConfig.META_SYNC_DATAHUB_DATAPLATFORM_NAME; +import static org.apache.hudi.sync.datahub.config.DataHubSyncConfig.META_SYNC_DATAHUB_DATASET_ENV; /** * Construct and provide the default {@link DatasetUrn} to identify the Dataset on DataHub. @@ -36,6 +38,7 @@ public class HoodieDataHubDatasetIdentifier { public static final String DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME = "hudi"; + public static final FabricType DEFAULT_DATAHUB_ENV = FabricType.DEV; protected final Properties props; @@ -44,8 +47,20 @@ public HoodieDataHubDatasetIdentifier(Properties props) { } public DatasetUrn getDatasetUrn() { - DataPlatformUrn dataPlatformUrn = new DataPlatformUrn(DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME); DataHubSyncConfig config = new DataHubSyncConfig(props); - return new DatasetUrn(dataPlatformUrn, String.format("%s.%s", config.getString(META_SYNC_DATABASE_NAME), config.getString(META_SYNC_TABLE_NAME)), FabricType.DEV); + + return new DatasetUrn( + createDataPlatformUrn(config.getStringOrDefault(META_SYNC_DATAHUB_DATAPLATFORM_NAME)), + createDatasetName(config.getString(META_SYNC_DATABASE_NAME), config.getString(META_SYNC_TABLE_NAME)), + FabricType.valueOf(config.getStringOrDefault(META_SYNC_DATAHUB_DATASET_ENV)) + ); + } + + private static DataPlatformUrn createDataPlatformUrn(String platformUrn) { + return new DataPlatformUrn(platformUrn); + } + + private static String createDatasetName(String databaseName, String tableName) { + return String.format("%s.%s", databaseName, tableName); } }