diff --git a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java index 66fa8e80c515..e485f2619ca8 100644 --- a/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java +++ b/spark3-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestSnapshotTableProcedure.java @@ -96,6 +96,40 @@ public void testSnapshotWithProperties() throws IOException { sql("SELECT * FROM %s ORDER BY id", tableName)); } + @Test + public void testSnapshotWithPathOverrides() throws IOException { + String location = temp.newFolder().toString(); + sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'" + + " TBLPROPERTIES ('%s'='true', '%s'='%s', '%s'='%s', '%s'='%s')", + sourceName, location, + TableProperties.OBJECT_STORE_ENABLED, + TableProperties.WRITE_METADATA_LOCATION, location + "/metadata-folder", + TableProperties.WRITE_NEW_DATA_LOCATION, location + "/folder-location", + TableProperties.OBJECT_STORE_PATH, location + "/object-location"); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", sourceName); + Object result = scalarSql( + "CALL %s.system.snapshot(source_table => '%s', table => '%s')", + catalogName, sourceName, tableName); + + Assert.assertEquals("Should have added one file", 1L, result); + + Table createdTable = validationCatalog.loadTable(tableIdent); + + String tableLocation = createdTable.location(); + Assert.assertNotEquals("Table should not have the original location", location, tableLocation); + + Map props = createdTable.properties(); + Assert.assertFalse("Table should not have metadata path override", + props.containsKey(TableProperties.WRITE_METADATA_LOCATION)); + Assert.assertFalse("Table should not have folder storage path override", + props.containsKey(TableProperties.WRITE_NEW_DATA_LOCATION)); + Assert.assertFalse("Table should not have object storage path override", + props.containsKey(TableProperties.OBJECT_STORE_PATH)); + Assert.assertEquals("Table should still have object storage mode enabled", + "true", props.get(TableProperties.OBJECT_STORE_ENABLED)); + } + @Test public void testSnapshotWithAlternateLocation() throws IOException { Assume.assumeTrue("No Snapshoting with Alternate locations with Hadoop Catalogs", !catalogName.contains("hadoop")); diff --git a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java index 73cfa59feff6..3e227c34e0d4 100644 --- a/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java +++ b/spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java @@ -168,6 +168,7 @@ protected Map destTableProps() { properties.remove(LOCATION); properties.remove(TableProperties.WRITE_METADATA_LOCATION); properties.remove(TableProperties.WRITE_NEW_DATA_LOCATION); + properties.remove(TableProperties.OBJECT_STORE_PATH); // set default and user-provided props properties.put(TableCatalog.PROP_PROVIDER, "iceberg"); diff --git a/spark3/src/test/java/org/apache/iceberg/spark/sql/TestAlterTable.java b/spark3/src/test/java/org/apache/iceberg/spark/sql/TestAlterTable.java index 6b3ce437d83a..4535bcf3e7e6 100644 --- a/spark3/src/test/java/org/apache/iceberg/spark/sql/TestAlterTable.java +++ b/spark3/src/test/java/org/apache/iceberg/spark/sql/TestAlterTable.java @@ -21,9 +21,12 @@ import java.util.Map; import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.spark.SparkCatalogTestBase; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; @@ -225,4 +228,45 @@ public void testSetTableProperties() { UnsupportedOperationException.class, () -> sql("ALTER TABLE %s SET TBLPROPERTIES ('sort-order'='value')", tableName)); } + + @Test + public void testUpdateDataStoragePath() { + String objectStoragePath = "/folder/storage/path"; + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='true', '%s'='%s')", + tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_PATH, objectStoragePath); + + Table table = validationCatalog.loadTable(tableIdent); + LocationProvider locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table object storage path", + locationProvider.newDataLocation("file").contains(objectStoragePath)); + + String folderStoragePath = "/folder/storage/path"; + sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')", + tableName, TableProperties.OBJECT_STORE_PATH); + sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='%s')", + tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath); + + table.refresh(); + locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table folder storage path", + locationProvider.newDataLocation("file").contains(folderStoragePath)); + + + sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')", + tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath); + + table.refresh(); + locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table default data path", + locationProvider.newDataLocation("file").contains(table.location() + "/data/")); + } } diff --git a/spark3/src/test/java/org/apache/iceberg/spark/sql/TestCreateTable.java b/spark3/src/test/java/org/apache/iceberg/spark/sql/TestCreateTable.java index 303cbb5f932b..9055a7a103ec 100644 --- a/spark3/src/test/java/org/apache/iceberg/spark/sql/TestCreateTable.java +++ b/spark3/src/test/java/org/apache/iceberg/spark/sql/TestCreateTable.java @@ -29,6 +29,7 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.spark.SparkCatalogTestBase; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; @@ -279,4 +280,63 @@ public void testDowngradeTableToFormatV1ThroughTablePropertyFails() { "Cannot downgrade v2 table to v1", () -> sql("ALTER TABLE %s SET TBLPROPERTIES ('format-version'='1')", tableName)); } + + @Test + public void testCreateTableWithObjectStorageModeDefault() { + Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent)); + + sql("CREATE TABLE %s " + + "(id BIGINT NOT NULL, data STRING) " + + "USING iceberg " + + "TBLPROPERTIES ('%s'='true')", + tableName, TableProperties.OBJECT_STORE_ENABLED); + + Table table = validationCatalog.loadTable(tableIdent); + LocationProvider locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table default data location", + locationProvider.newDataLocation("file").contains(table.location() + "/data/")); + } + + @Test + public void testCreateTableWithObjectStorageModeFolderStoragePath() { + Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent)); + + String folderStoragePath = "/folder/storage/path"; + sql("CREATE TABLE %s " + + "(id BIGINT NOT NULL, data STRING) " + + "USING iceberg " + + "TBLPROPERTIES ('%s'='true', '%s'='%s')", + tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath); + + Table table = validationCatalog.loadTable(tableIdent); + LocationProvider locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table folder storage path", + locationProvider.newDataLocation("file").contains(folderStoragePath)); + } + + @Test + public void testCreateTableWithObjectStorageModeObjectStoragePath() { + Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent)); + + String objectStoragePath = "/object/storage/path"; + sql("CREATE TABLE %s " + + "(id BIGINT NOT NULL, data STRING) " + + "USING iceberg " + + "TBLPROPERTIES ('%s'='true', '%s'='%s')", + tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_PATH, objectStoragePath); + + Table table = validationCatalog.loadTable(tableIdent); + LocationProvider locationProvider = table.locationProvider(); + Assert.assertEquals("should use object storage location provider", + "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", + locationProvider.getClass().getName()); + Assert.assertTrue("should use table object storage path", + locationProvider.newDataLocation("file").contains(objectStoragePath)); + } }