Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.exceptions;

import com.google.errorprone.annotations.FormatMethod;

/**
* REST exception thrown when a request is well-formed but cannot be applied.
* <p>
* For example, this is used when a property update requests that properties are simultaneously set and removed.
*/
public class UnprocessableEntityException extends RESTException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might make sense to mention that this is HTTP 422. Somewhat implied by the name as well as extending RESTException though.

@FormatMethod
public UnprocessableEntityException(String message, Object... args) {
super(message, args);
}
}
10 changes: 9 additions & 1 deletion core/src/main/java/org/apache/iceberg/BaseTransaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS;
import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT;

class BaseTransaction implements Transaction {
public class BaseTransaction implements Transaction {
private static final Logger LOG = LoggerFactory.getLogger(BaseTransaction.class);

enum TransactionType {
Expand Down Expand Up @@ -90,6 +90,14 @@ public Table table() {
return transactionTable;
}

public TableMetadata startMetadata() {
return current;
}

public TableOperations underlyingOps() {
return ops;
}

private void checkLastOperationCommitted(String operation) {
Preconditions.checkState(hasLastOpCommitted,
"Cannot create new %s: last operation has not committed", operation);
Expand Down
314 changes: 314 additions & 0 deletions core/src/main/java/org/apache/iceberg/rest/CatalogHandlers.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.rest;

import java.time.OffsetDateTime;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.BaseTransaction;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.rest.requests.CreateNamespaceRequest;
import org.apache.iceberg.rest.requests.CreateTableRequest;
import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest;
import org.apache.iceberg.rest.requests.UpdateTableRequest;
import org.apache.iceberg.rest.responses.CreateNamespaceResponse;
import org.apache.iceberg.rest.responses.DropNamespaceResponse;
import org.apache.iceberg.rest.responses.DropTableResponse;
import org.apache.iceberg.rest.responses.GetNamespaceResponse;
import org.apache.iceberg.rest.responses.ListNamespacesResponse;
import org.apache.iceberg.rest.responses.ListTablesResponse;
import org.apache.iceberg.rest.responses.LoadTableResponse;
import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse;
import org.apache.iceberg.util.Tasks;

import static org.apache.iceberg.TableProperties.COMMIT_MAX_RETRY_WAIT_MS_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_MIN_RETRY_WAIT_MS_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES_DEFAULT;
import static org.apache.iceberg.TableProperties.COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT;

public class CatalogHandlers {
private static final Schema EMPTY_SCHEMA = new Schema();

private CatalogHandlers() {
}

/**
* Exception used to avoid retrying commits when assertions fail.
* <p>
* When a REST assertion fails, it will throw CommitFailedException to send back to the client. But the assertion
* checks happen in the block that is retried if {@link TableOperations#commit(TableMetadata, TableMetadata)} throws
* CommitFailedException. This is used to avoid retries for assertion failures, which are unwrapped and rethrown
* outside of the commit loop.
*/
private static class ValidationFailureException extends RuntimeException {
private final CommitFailedException wrapped;

private ValidationFailureException(CommitFailedException cause) {
super(cause);
this.wrapped = cause;
}

public CommitFailedException wrapped() {
return wrapped;
}
}

public static ListNamespacesResponse listNamespaces(SupportsNamespaces catalog, Namespace parent) {
List<Namespace> results;
if (parent.isEmpty()) {
results = catalog.listNamespaces();
} else {
results = catalog.listNamespaces(parent);
}

return ListNamespacesResponse.builder().addAll(results).build();
}

public static CreateNamespaceResponse createNamespace(SupportsNamespaces catalog, CreateNamespaceRequest request) {
Namespace namespace = request.namespace();
catalog.createNamespace(namespace, request.properties());
return CreateNamespaceResponse.builder()
.withNamespace(namespace)
.setProperties(catalog.loadNamespaceMetadata(namespace))
.build();
}

public static GetNamespaceResponse loadNamespace(SupportsNamespaces catalog, Namespace namespace) {
Map<String, String> properties = catalog.loadNamespaceMetadata(namespace);
return GetNamespaceResponse.builder()
.withNamespace(namespace)
.setProperties(properties)
.build();
}

public static DropNamespaceResponse dropNamespace(SupportsNamespaces catalog, Namespace namespace) {
boolean dropped = catalog.dropNamespace(namespace);
return DropNamespaceResponse.builder()
.dropped(dropped)
.build();
}

public static UpdateNamespacePropertiesResponse updateNamespaceProperties(
SupportsNamespaces catalog, Namespace namespace, UpdateNamespacePropertiesRequest request) {
request.validate();

Set<String> removals = Sets.newHashSet(request.removals());
Map<String, String> updates = request.updates();

Map<String, String> startProperties = catalog.loadNamespaceMetadata(namespace);
Set<String> missing = Sets.difference(removals, startProperties.keySet());

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another potential validation here, we are allowed to request the removal of a property that is not set, is that ok? Seems fine to me since I think a lot of property structures let you remove a non-existing key without error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is for these to be idempotent, so it is fine to request removing a property that is not set.

If we didn't allow that, then you'd get incorrect errors when a user does something like SHOW TBLPROPERTIES followed by concurrent UNSET commands. Retries would also cause problems.

if (!updates.isEmpty()) {
catalog.setProperties(namespace, updates);
}

if (!removals.isEmpty()) {
// remove the original set just in case there was an update just after loading properties
catalog.removeProperties(namespace, removals);
}

return UpdateNamespacePropertiesResponse.builder()
.addMissing(missing)
.addUpdated(updates.keySet())
.addRemoved(Sets.difference(removals, missing))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not be true at this point since (as your comment alludes to above) we may have removed properties that were set in between our determination of missing and our execution of catalog.removeProperties. Not sure if we can make a better response though unless we change catalog.removeProperties to return which properties actually got removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is something for implementations to improve on. Right now there's no way to do this through the catalog API.

.build();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: This line and the ones above it are over indented.

}

public static ListTablesResponse listTables(Catalog catalog, Namespace namespace) {
List<TableIdentifier> idents = catalog.listTables(namespace);
return ListTablesResponse.builder().addAll(idents).build();
}

public static LoadTableResponse stageTableCreate(Catalog catalog, Namespace namespace, CreateTableRequest request) {
request.validate();

TableIdentifier ident = TableIdentifier.of(namespace, request.name());
if (catalog.tableExists(ident)) {
throw new AlreadyExistsException("Table already exists: %s", ident);
}

Map<String, String> properties = Maps.newHashMap();
properties.put("created-at", OffsetDateTime.now().toString());
properties.putAll(request.properties());

TableMetadata metadata = TableMetadata.newTableMetadata(
request.schema(),
request.spec() != null ? request.spec() : PartitionSpec.unpartitioned(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should just add these to the "spec() and writeOrder()" methods rather than null checking here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. I'm reluctant to make the request return defaults instead of null. I'd probably leave this as it is here and let the implementation decide how to handle requests without these. That, or update newTableMetadata to do it.

request.writeOrder() != null ? request.writeOrder() : SortOrder.unsorted(),
request.location(),
properties);

return LoadTableResponse.builder()
.withTableMetadata(metadata)
.build();
}

public static LoadTableResponse createTable(Catalog catalog, Namespace namespace, CreateTableRequest request) {
request.validate();

TableIdentifier ident = TableIdentifier.of(namespace, request.name());
Table table = catalog.buildTable(ident, request.schema())
.withLocation(request.location())
.withPartitionSpec(request.spec())
.withSortOrder(request.writeOrder())
.withProperties(request.properties())
.create();

if (table instanceof BaseTable) {
return LoadTableResponse.builder()
.withTableMetadata(((BaseTable) table).operations().current())
.build();
}

throw new IllegalStateException("Cannot wrap catalog that does not produce BaseTable");
}

public static DropTableResponse dropTable(Catalog catalog, TableIdentifier ident) {
boolean dropped = catalog.dropTable(ident);
return DropTableResponse.builder().dropped(dropped).build();
}

public static LoadTableResponse loadTable(Catalog catalog, TableIdentifier ident) {
Table table = catalog.loadTable(ident);

if (table instanceof BaseTable) {
return LoadTableResponse.builder()
.withTableMetadata(((BaseTable) table).operations().current())
.build();
}

throw new IllegalStateException("Cannot wrap catalog that does not produce BaseTable");
}

public static LoadTableResponse updateTable(Catalog catalog, TableIdentifier ident, UpdateTableRequest request) {
TableMetadata finalMetadata;
if (isCreate(request)) {
// this is a hacky way to get TableOperations for an uncommitted table
Transaction transaction = catalog.buildTable(ident, EMPTY_SCHEMA).createOrReplaceTransaction();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the plan here that we end up making a replace table transaction here which replaces the newly created table with the updates? I'm not quite sure I follow this branch

Copy link
Contributor Author

@rdblue rdblue Mar 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace transactions are already working (using assertions and the normal commit path).

What's happening here is that the existing Catalog implementations don't allow you to get TableOperations. For all other commits, we load the table and grab TableOperations from BaseTable (see line 236). But for a create transaction, there is no table to load. The way I found to create TableOperations for a table that doesn't exist yet is to start a create transaction and get TableOperations from there.

As the comment says, this is a hacky way to get TableOperations, but I don't have a better one. Other options are: (1) don't support create transactions when wrapping Catalog, or (2) expose TableOperations in Catalog and force implementations to add a factory method. I think this is the cleanest way.

if (transaction instanceof BaseTransaction) {
BaseTransaction baseTransaction = (BaseTransaction) transaction;
finalMetadata = create(baseTransaction.underlyingOps(), baseTransaction.startMetadata(), request);
} else {
throw new IllegalStateException("Cannot wrap catalog that does not produce BaseTransaction");
}

} else {
Table table = catalog.loadTable(ident);
if (table instanceof BaseTable) {
TableOperations ops = ((BaseTable) table).operations();
finalMetadata = commit(ops, request);
} else {
throw new IllegalStateException("Cannot wrap catalog that does not produce BaseTable");
}
}

return LoadTableResponse.builder()
.withTableMetadata(finalMetadata)
.build();
}

private static boolean isCreate(UpdateTableRequest request) {
boolean isCreate = request.requirements().stream()
.anyMatch(UpdateTableRequest.UpdateRequirement.AssertTableDoesNotExist.class::isInstance);

if (isCreate) {
List<UpdateTableRequest.UpdateRequirement> invalidRequirements = request.requirements().stream()
.filter(req -> !(req instanceof UpdateTableRequest.UpdateRequirement.AssertTableDoesNotExist))
.collect(Collectors.toList());
Preconditions.checkArgument(invalidRequirements.isEmpty(),
"Invalid create requirements: %s", invalidRequirements);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So either all requirements assert that the table does not exist, or none do? If only some do we have an invalid Create request?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct.

If you're asserting that the table does not exist, then it makes no sense to assert that it has a certain current schema ID, for example.

}

return isCreate;
}

private static TableMetadata create(TableOperations ops, TableMetadata start, UpdateTableRequest request) {
TableMetadata.Builder builder = TableMetadata.buildFrom(start);

// the only valid requirement is that the table will be created
request.updates().forEach(update -> update.applyTo(builder));

// create transactions do not retry. if the table exists, retrying is not a solution
ops.commit(null, builder.build());

return ops.current();
}

private static TableMetadata commit(TableOperations ops, UpdateTableRequest request) {
AtomicBoolean isRetry = new AtomicBoolean(false);
try {
Tasks.foreach(ops)
.retry(COMMIT_NUM_RETRIES_DEFAULT)
.exponentialBackoff(
COMMIT_MIN_RETRY_WAIT_MS_DEFAULT, COMMIT_MAX_RETRY_WAIT_MS_DEFAULT,
COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT, 2.0 /* exponential */)
.onlyRetryOn(CommitFailedException.class)
.run(taskOps -> {
TableMetadata base = isRetry.get() ? taskOps.refresh() : taskOps.current();
isRetry.set(true);

// validate requirements
try {
request.requirements().forEach(requirement -> requirement.validate(base));
} catch (CommitFailedException e) {
// wrap and rethrow outside of tasks to avoid unnecessary retry
throw new ValidationFailureException(e);
}

// apply changes
TableMetadata.Builder metadataBuilder = TableMetadata.buildFrom(base);
request.updates().forEach(update -> update.applyTo(metadataBuilder));

TableMetadata updated = metadataBuilder.build();
if (updated.changes().isEmpty()) {
// do not commit if the metadata has not changed
return;
}

// commit
taskOps.commit(base, updated);
});

} catch (ValidationFailureException e) {
throw e.wrapped();
}

return ops.current();
}
}
Loading