Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,40 @@ public void testSnapshotWithProperties() throws IOException {
sql("SELECT * FROM %s ORDER BY id", tableName));
}

@Test
public void testSnapshotWithPathOverrides() throws IOException {
String location = temp.newFolder().toString();
sql("CREATE TABLE %s (id bigint NOT NULL, data string) USING parquet LOCATION '%s'" +
" TBLPROPERTIES ('%s'='true', '%s'='%s', '%s'='%s', '%s'='%s')",
sourceName, location,
TableProperties.OBJECT_STORE_ENABLED,
TableProperties.WRITE_METADATA_LOCATION, location + "/metadata-folder",
TableProperties.WRITE_NEW_DATA_LOCATION, location + "/folder-location",
TableProperties.OBJECT_STORE_PATH, location + "/object-location");

sql("INSERT INTO TABLE %s VALUES (1, 'a')", sourceName);
Object result = scalarSql(
"CALL %s.system.snapshot(source_table => '%s', table => '%s')",
catalogName, sourceName, tableName);

Assert.assertEquals("Should have added one file", 1L, result);

Table createdTable = validationCatalog.loadTable(tableIdent);

String tableLocation = createdTable.location();
Assert.assertNotEquals("Table should not have the original location", location, tableLocation);

Map<String, String> props = createdTable.properties();
Assert.assertFalse("Table should not have metadata path override",
props.containsKey(TableProperties.WRITE_METADATA_LOCATION));
Assert.assertFalse("Table should not have folder storage path override",
props.containsKey(TableProperties.WRITE_NEW_DATA_LOCATION));
Assert.assertFalse("Table should not have object storage path override",
props.containsKey(TableProperties.OBJECT_STORE_PATH));
Assert.assertEquals("Table should still have object storage mode enabled",
"true", props.get(TableProperties.OBJECT_STORE_ENABLED));
}

@Test
public void testSnapshotWithAlternateLocation() throws IOException {
Assume.assumeTrue("No Snapshoting with Alternate locations with Hadoop Catalogs", !catalogName.contains("hadoop"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ protected Map<String, String> destTableProps() {
properties.remove(LOCATION);
properties.remove(TableProperties.WRITE_METADATA_LOCATION);
properties.remove(TableProperties.WRITE_NEW_DATA_LOCATION);
properties.remove(TableProperties.OBJECT_STORE_PATH);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocking: it might make sense to add a comment here that we’re explicitly choosing not to bring along OBJECT_STORE_PATH in the snapshot?

Either a comment, or possibly updating the ObjectStorageLocationProvider docs / snapshot docs with this detail would be great 🙂. Documentation updates can be done in a separate PR of course (and happy to assist there if you’d like).


// set default and user-provided props
properties.put(TableCatalog.PROP_PROVIDER, "iceberg");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@

import java.util.Map;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.spark.SparkCatalogTestBase;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
Expand Down Expand Up @@ -225,4 +228,45 @@ public void testSetTableProperties() {
UnsupportedOperationException.class,
() -> sql("ALTER TABLE %s SET TBLPROPERTIES ('sort-order'='value')", tableName));
}

@Test
public void testUpdateDataStoragePath() {
String objectStoragePath = "/folder/storage/path";
sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='true', '%s'='%s')",
tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_PATH, objectStoragePath);

Table table = validationCatalog.loadTable(tableIdent);
LocationProvider locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table object storage path",
locationProvider.newDataLocation("file").contains(objectStoragePath));

String folderStoragePath = "/folder/storage/path";
sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')",
tableName, TableProperties.OBJECT_STORE_PATH);
sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='%s')",
tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath);

table.refresh();
locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table folder storage path",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: we might want to further clarify what we’re testing for in the assertion.

Something like Should use table folder storage path after unsetting the object storage location path or `should use table folder storage path if present when object storage path is not present”.

One could argue that these assertions could be subject to the same problems as comment rot if tests get changed, so I’ll defer to your judgement.

Also: Given that the names of the constants and their string representations are a little funky (particularly folder storage path / WRITE_NEW_DATA_LOCATION), it might make sense to refer to both at some point? Again, will leave that to your discretion but I think it might help clarify for readers. 🙂

locationProvider.newDataLocation("file").contains(folderStoragePath));


sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')",
tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath);

table.refresh();
locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table default data path",
locationProvider.newDataLocation("file").contains(table.location() + "/data/"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.iceberg.io.LocationProvider;
import org.apache.iceberg.spark.SparkCatalogTestBase;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
Expand Down Expand Up @@ -279,4 +280,63 @@ public void testDowngradeTableToFormatV1ThroughTablePropertyFails() {
"Cannot downgrade v2 table to v1",
() -> sql("ALTER TABLE %s SET TBLPROPERTIES ('format-version'='1')", tableName));
}

@Test
public void testCreateTableWithObjectStorageModeDefault() {
Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent));

sql("CREATE TABLE %s " +
"(id BIGINT NOT NULL, data STRING) " +
"USING iceberg " +
"TBLPROPERTIES ('%s'='true')",
tableName, TableProperties.OBJECT_STORE_ENABLED);

Table table = validationCatalog.loadTable(tableIdent);
LocationProvider locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table default data location",
locationProvider.newDataLocation("file").contains(table.location() + "/data/"));
}

@Test
public void testCreateTableWithObjectStorageModeFolderStoragePath() {
Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent));

String folderStoragePath = "/folder/storage/path";
sql("CREATE TABLE %s " +
"(id BIGINT NOT NULL, data STRING) " +
"USING iceberg " +
"TBLPROPERTIES ('%s'='true', '%s'='%s')",
tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath);

Table table = validationCatalog.loadTable(tableIdent);
LocationProvider locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table folder storage path",
locationProvider.newDataLocation("file").contains(folderStoragePath));
}

@Test
public void testCreateTableWithObjectStorageModeObjectStoragePath() {
Assert.assertFalse("Table should not already exist", validationCatalog.tableExists(tableIdent));

String objectStoragePath = "/object/storage/path";
sql("CREATE TABLE %s " +
"(id BIGINT NOT NULL, data STRING) " +
"USING iceberg " +
"TBLPROPERTIES ('%s'='true', '%s'='%s')",
tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_PATH, objectStoragePath);

Table table = validationCatalog.loadTable(tableIdent);
LocationProvider locationProvider = table.locationProvider();
Assert.assertEquals("should use object storage location provider",
"org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider",
locationProvider.getClass().getName());
Assert.assertTrue("should use table object storage path",
locationProvider.newDataLocation("file").contains(objectStoragePath));
}
}