-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Spark: remove object storage data path in destination table for snapshot table action #2966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,9 +21,12 @@ | |
|
|
||
| import java.util.Map; | ||
| import org.apache.iceberg.AssertHelpers; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.TableProperties; | ||
| import org.apache.iceberg.catalog.Namespace; | ||
| import org.apache.iceberg.catalog.TableIdentifier; | ||
| import org.apache.iceberg.hadoop.HadoopCatalog; | ||
| import org.apache.iceberg.io.LocationProvider; | ||
| import org.apache.iceberg.spark.SparkCatalogTestBase; | ||
| import org.apache.iceberg.types.Types; | ||
| import org.apache.iceberg.types.Types.NestedField; | ||
|
|
@@ -225,4 +228,45 @@ public void testSetTableProperties() { | |
| UnsupportedOperationException.class, | ||
| () -> sql("ALTER TABLE %s SET TBLPROPERTIES ('sort-order'='value')", tableName)); | ||
| } | ||
|
|
||
| @Test | ||
| public void testUpdateDataStoragePath() { | ||
| String objectStoragePath = "/folder/storage/path"; | ||
| sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='true', '%s'='%s')", | ||
| tableName, TableProperties.OBJECT_STORE_ENABLED, TableProperties.OBJECT_STORE_PATH, objectStoragePath); | ||
|
|
||
| Table table = validationCatalog.loadTable(tableIdent); | ||
| LocationProvider locationProvider = table.locationProvider(); | ||
| Assert.assertEquals("should use object storage location provider", | ||
| "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", | ||
| locationProvider.getClass().getName()); | ||
| Assert.assertTrue("should use table object storage path", | ||
| locationProvider.newDataLocation("file").contains(objectStoragePath)); | ||
|
|
||
| String folderStoragePath = "/folder/storage/path"; | ||
| sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')", | ||
| tableName, TableProperties.OBJECT_STORE_PATH); | ||
| sql("ALTER TABLE %s SET TBLPROPERTIES ('%s'='%s')", | ||
| tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath); | ||
|
|
||
| table.refresh(); | ||
| locationProvider = table.locationProvider(); | ||
| Assert.assertEquals("should use object storage location provider", | ||
| "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", | ||
| locationProvider.getClass().getName()); | ||
| Assert.assertTrue("should use table folder storage path", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: we might want to further clarify what we’re testing for in the assertion. Something like One could argue that these assertions could be subject to the same problems as comment rot if tests get changed, so I’ll defer to your judgement. Also: Given that the names of the constants and their string representations are a little funky (particularly folder storage path / WRITE_NEW_DATA_LOCATION), it might make sense to refer to both at some point? Again, will leave that to your discretion but I think it might help clarify for readers. 🙂 |
||
| locationProvider.newDataLocation("file").contains(folderStoragePath)); | ||
|
|
||
|
|
||
| sql("ALTER TABLE %s UNSET TBLPROPERTIES ('%s')", | ||
| tableName, TableProperties.WRITE_NEW_DATA_LOCATION, folderStoragePath); | ||
|
|
||
| table.refresh(); | ||
| locationProvider = table.locationProvider(); | ||
| Assert.assertEquals("should use object storage location provider", | ||
| "org.apache.iceberg.LocationProviders$ObjectStoreLocationProvider", | ||
| locationProvider.getClass().getName()); | ||
| Assert.assertTrue("should use table default data path", | ||
| locationProvider.newDataLocation("file").contains(table.location() + "/data/")); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Non-blocking: it might make sense to add a comment here that we’re explicitly choosing not to bring along
OBJECT_STORE_PATHin the snapshot?Either a comment, or possibly updating the ObjectStorageLocationProvider docs / snapshot docs with this detail would be great 🙂. Documentation updates can be done in a separate PR of course (and happy to assist there if you’d like).