diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java index 4e477b873339..bada6fd5bb52 100644 --- a/core/src/main/java/org/apache/iceberg/CatalogUtil.java +++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java @@ -220,6 +220,11 @@ public static Catalog buildIcebergCatalog(String name, Map optio default: throw new UnsupportedOperationException("Unknown catalog type: " + catalogType); } + } else { + String catalogType = options.get(ICEBERG_CATALOG_TYPE); + Preconditions.checkArgument(catalogType == null, + "Cannot create catalog %s, both type and catalog-impl are set: type=%s, catalog-impl=%s", + name, catalogType, catalogImpl); } return CatalogUtil.loadCatalog(catalogImpl, name, options, conf); diff --git a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java index a008f9ba7df4..eb393cf33589 100644 --- a/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java +++ b/core/src/test/java/org/apache/iceberg/TestCatalogUtil.java @@ -159,6 +159,18 @@ public void loadCustomFileIO_badClass() { () -> CatalogUtil.loadFileIO(TestFileIONotImpl.class.getName(), Maps.newHashMap(), null)); } + @Test + public void buildCustomCatalog_withTypeSet() { + Map options = new HashMap<>(); + options.put(CatalogProperties.CATALOG_IMPL, "CustomCatalog"); + options.put(CatalogUtil.ICEBERG_CATALOG_TYPE, "hive"); + Configuration hadoopConf = new Configuration(); + String name = "custom"; + + AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class, + "both type and catalog-impl are set", () -> CatalogUtil.buildIcebergCatalog(name, options, hadoopConf)); + } + public static class TestCatalog extends BaseMetastoreCatalog { private String catalogName; diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java index 2e54cf584475..e02a07038f92 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -202,7 +202,11 @@ public static boolean dropTable(Configuration conf, Properties props) { */ public static boolean hiveCatalog(Configuration conf, Properties props) { String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME); - return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, catalogName)); + String catalogType = getCatalogType(conf, catalogName); + if (catalogType != null) { + return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(catalogType); + } + return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(getCatalogType(conf, ICEBERG_DEFAULT_CATALOG_NAME)); } @VisibleForTesting @@ -279,9 +283,7 @@ private static String getCatalogType(Configuration conf, String catalogName) { } } else { String catalogType = conf.get(InputFormatConfig.CATALOG); - if (catalogType == null) { - return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE; - } else if (catalogType.equals(LOCATION)) { + if (catalogType != null && catalogType.equals(LOCATION)) { return NO_CATALOG_TYPE; } else { return catalogType; diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index 0d5b1d63bb23..6ac9808e87f8 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -256,6 +256,17 @@ public void testLoadCatalogHive() { Assertions.assertThat(hiveCatalog.get()).isInstanceOf(HiveCatalog.class); } + @Test + public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() { + String catalogName = "barCatalog"; + conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); + conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); + conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); + AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class, + "both type and catalog-impl are set", () -> Catalogs.loadCatalog(conf, catalogName)); + } + @Test public void testLoadCatalogHadoop() { String catalogName = "barCatalog"; diff --git a/site/docs/hive.md b/site/docs/hive.md index ea4b90a1335e..b05f389410e1 100644 --- a/site/docs/hive.md +++ b/site/docs/hive.md @@ -98,8 +98,8 @@ To globally register different catalogs, set the following Hadoop configurations | Config Key | Description | | --------------------------------------------- | ------------------------------------------------------ | -| iceberg.catalog..type | type of catalog: `hive` or `hadoop` | -| iceberg.catalog..catalog-impl | catalog implementation, must not be null if type is null | +| iceberg.catalog..type | type of catalog: `hive`, `hadoop`, or left unset if using a custom catalog | +| iceberg.catalog..catalog-impl | catalog implementation, must not be null if type is empty | | iceberg.catalog.. | any config key and value pairs for the catalog | Here are some examples using Hive CLI: diff --git a/site/docs/spark-configuration.md b/site/docs/spark-configuration.md index 93a87597b38a..67befeab574e 100644 --- a/site/docs/spark-configuration.md +++ b/site/docs/spark-configuration.md @@ -54,8 +54,8 @@ Both catalogs are configured using properties nested under the catalog name. Com | Property | Values | Description | | -------------------------------------------------- | ----------------------------- | -------------------------------------------------------------------- | -| spark.sql.catalog._catalog-name_.type | `hive` or `hadoop` | The underlying Iceberg catalog implementation, `HiveCatalog` or `HadoopCatalog` | -| spark.sql.catalog._catalog-name_.catalog-impl | | The underlying Iceberg catalog implementation. When set, the value of `type` property is ignored | +| spark.sql.catalog._catalog-name_.type | `hive` or `hadoop` | The underlying Iceberg catalog implementation, `HiveCatalog`, `HadoopCatalog` or left unset if using a custom catalog | +| spark.sql.catalog._catalog-name_.catalog-impl | | The underlying Iceberg catalog implementation.| | spark.sql.catalog._catalog-name_.default-namespace | default | The default current namespace for the catalog | | spark.sql.catalog._catalog-name_.uri | thrift://host:port | Metastore connect URI; default from `hive-site.xml` | | spark.sql.catalog._catalog-name_.warehouse | hdfs://nn:8020/warehouse/path | Base path for the warehouse directory | @@ -104,8 +104,7 @@ spark.sql.catalog.hadoop_prod.hadoop.fs.s3a.endpoint = http://aws-local:9000 ### Loading a custom catalog -Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property. -When `catalog-impl` is set, the value of `type` is ignored. Here is an example: +Spark supports loading a custom Iceberg `Catalog` implementation by specifying the `catalog-impl` property. Here is an example: ```plain spark.sql.catalog.custom_prod = org.apache.iceberg.spark.SparkCatalog