Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,7 @@ public class HiveTableOperations extends BaseMetastoreTableOperations
private String tableKeyId;
private int encryptionDekLength;

// keys loaded from the latest metadata
private Optional<List<EncryptedKey>> encryptedKeysFromMetadata = Optional.empty();

// keys added to EM (e.g. as a result of a FileAppend) but not committed into the latest metadata
// yet
private Optional<List<EncryptedKey>> encryptedKeysPending = Optional.empty();
private List<EncryptedKey> encryptedKeys = List.of();

protected HiveTableOperations(
Configuration conf,
Expand Down Expand Up @@ -156,12 +151,9 @@ public EncryptionManager encryption() {
encryptionProperties.put(
TableProperties.ENCRYPTION_DEK_LENGTH, String.valueOf(encryptionDekLength));

List<EncryptedKey> keys = Lists.newLinkedList();
encryptedKeysFromMetadata.ifPresent(keys::addAll);
encryptedKeysPending.ifPresent(keys::addAll);

encryptionManager =
EncryptionUtil.createEncryptionManager(keys, encryptionProperties, keyManagementClient);
EncryptionUtil.createEncryptionManager(
encryptedKeys, encryptionProperties, keyManagementClient);
} else {
return PlaintextEncryptionManager.instance();
}
Expand Down Expand Up @@ -215,24 +207,20 @@ the table key parameter (along with existing snapshots) in the file, making the
? Integer.parseInt(dekLengthFromHMS)
: TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT;

encryptedKeysFromMetadata = Optional.ofNullable(current().encryptionKeys());
encryptedKeys =
Copy link
Contributor Author

@smaheshwar-pltr smaheshwar-pltr Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can assume that encryption properties are unchanging (aside from keys) and therefore do not need to be updated on refresh, there's a actually simpler approach that just adds metadata keys to the current EM's transient state: https://github.com/apache/iceberg/compare/main...smaheshwar-pltr:iceberg:sm/add-keys?expand=1.

We could also go with that approach, and introduce setters in case we can't make that assumption. But then I suspect just re-initialising the EM on refresh makes more sense.

Curious though for thoughts on this assumption.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I like the idea of rather re-initializing the EM on refresh, so I'd rather stick to it. I suspect the transient state was never intended to be accesible from outside with set/put methods.

@ggershinsky let me know if you agree.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @smaheshwar-pltr and @szlta , I also think it's cleaner to re-init the EM upon refresh.

Optional.ofNullable(current().encryptionKeys())
.map(Lists::newLinkedList)
.orElseGet(Lists::newLinkedList);

if (encryptionManager != null) {
encryptedKeysPending = Optional.of(Lists.newLinkedList());

Set<String> keyIdsFromMetadata =
encryptedKeysFromMetadata.orElseGet(Lists::newLinkedList).stream()
.map(EncryptedKey::keyId)
.collect(Collectors.toSet());
encryptedKeys.stream().map(EncryptedKey::keyId).collect(Collectors.toSet());

for (EncryptedKey keyFromEM : EncryptionUtil.encryptionKeys(encryptionManager).values()) {
if (!keyIdsFromMetadata.contains(keyFromEM.keyId())) {
encryptedKeysPending.get().add(keyFromEM);
encryptedKeys.add(keyFromEM);
}
}

} else {
encryptedKeysPending = Optional.empty();
}

// Force re-creation of encryption manager with updated keys
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ private static List<DataFile> currentDataFiles(Table table) {

@TestTemplate
public void testRefresh() {
catalog.initialize(catalogName, catalogConfig);
Table table = catalog.loadTable(tableIdent);
validationCatalog.initialize(catalogName, catalogConfig);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, the catalog -> validationCatalog isn't needed here, but figure it doesn't hurt to set things up for #13225.

Table table = validationCatalog.loadTable(tableIdent);

assertThat(currentDataFiles(table)).isNotEmpty();

Expand All @@ -117,21 +117,73 @@ public void testRefresh() {
}

@TestTemplate
public void testTransaction() {
catalog.initialize(catalogName, catalogConfig);
public void testAppendTransaction() {
validationCatalog.initialize(catalogName, catalogConfig);
Table table = validationCatalog.loadTable(tableIdent);

Table table = catalog.loadTable(tableIdent);
List<DataFile> dataFiles = currentDataFiles(table);
Transaction transaction = table.newTransaction();
AppendFiles append = transaction.newAppend();

// add an arbitrary datafile
append.appendFile(dataFiles.get(0));
append.commit();
transaction.commitTransaction();

assertThat(currentDataFiles(table)).hasSize(dataFiles.size() + 1);
}

@TestTemplate
public void testConcurrentAppendTransactions() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tests an append retry

validationCatalog.initialize(catalogName, catalogConfig);
Table table = validationCatalog.loadTable(tableIdent);

List<DataFile> dataFiles = currentDataFiles(table);
Transaction transaction = table.newTransaction();
AppendFiles append = transaction.newAppend();

// add an arbitrary datafile
append.appendFile(dataFiles.get(0));

// append to the table in the meantime. use a separate load to avoid shared operations
validationCatalog.loadTable(tableIdent).newFastAppend().appendFile(dataFiles.get(0)).commit();

append.commit();
transaction.commitTransaction();

assertThat(currentDataFiles(table).size()).isEqualTo(dataFiles.size() + 1);
assertThat(currentDataFiles(table)).hasSize(dataFiles.size() + 2);
}

// See CatalogTests#testConcurrentReplaceTransactions
@TestTemplate
public void testConcurrentReplaceTransactions() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tests a replace retry - note that replace retries don't re-apply updates to refreshed metadata, because they'll anyway replace the whole table. #13225 didn't handle this case at one point, because it requires keys to be maintained on refreshing different metadata, so I think valuable to test

validationCatalog.initialize(catalogName, catalogConfig);

Table table = validationCatalog.loadTable(tableIdent);
DataFile file = currentDataFiles(table).get(0);
Schema schema = table.schema();

// Write data for a replace transaction that will be committed later
Transaction secondReplace =
validationCatalog
.buildTable(tableIdent, schema)
.withProperty("encryption.key-id", UnitestKMS.MASTER_KEY_NAME1)
.replaceTransaction();
secondReplace.newFastAppend().appendFile(file).commit();

// Commit another replace transaction first
Transaction firstReplace =
validationCatalog
.buildTable(tableIdent, schema)
.withProperty("encryption.key-id", UnitestKMS.MASTER_KEY_NAME1)
.replaceTransaction();
firstReplace.newFastAppend().appendFile(file).commit();
firstReplace.commitTransaction();

secondReplace.commitTransaction();

Table afterSecondReplace = validationCatalog.loadTable(tableIdent);
assertThat(currentDataFiles(afterSecondReplace)).hasSize(1);
}

@TestTemplate
Expand Down