-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HADOOP-18679. Add API for bulk/paged object deletion #6494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0787a7c
89826f8
593b791
ea19f43
1420c99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs; | ||
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
||
| import org.apache.hadoop.classification.InterfaceAudience; | ||
| import org.apache.hadoop.classification.InterfaceStability; | ||
| import org.apache.hadoop.fs.statistics.IOStatisticsSource; | ||
|
|
||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| /** | ||
| * API for bulk deletion of objects/files, | ||
| * <i>but not directories</i>. | ||
| * After use, call {@code close()} to release any resources and | ||
| * to guarantee store IOStatistics are updated. | ||
| * <p> | ||
| * Callers MUST have no expectation that parent directories will exist after the | ||
| * operation completes; if an object store needs to explicitly look for and create | ||
| * directory markers, that step will be omitted. | ||
| * <p> | ||
| * Be aware that on some stores (AWS S3) each object listed in a bulk delete counts | ||
| * against the write IOPS limit; large page sizes are counterproductive here, as | ||
| * are attempts at parallel submissions across multiple threads. | ||
| * @see <a href="https://issues.apache.org/jira/browse/HADOOP-16823">HADOOP-16823. | ||
| * Large DeleteObject requests are their own Thundering Herd</a> | ||
| * <p> | ||
| */ | ||
| @InterfaceAudience.Public | ||
| @InterfaceStability.Unstable | ||
| public interface BulkDelete extends IOStatisticsSource, Closeable { | ||
|
|
||
| /** | ||
| * The maximum number of objects/files to delete in a single request. | ||
| * @return a number greater than or equal to zero. | ||
| */ | ||
| int pageSize(); | ||
|
|
||
| /** | ||
| * Base path of a bulk delete operation. | ||
| * All paths submitted in {@link #bulkDelete(List)} must be under this path. | ||
| */ | ||
| Path basePath(); | ||
|
|
||
| /** | ||
| * Delete a list of files/objects. | ||
| * <ul> | ||
| * <li>Files must be under the path provided in {@link #basePath()}.</li> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. writing contract tests for this locally., can't find the implementation of this in S3A. |
||
| * <li>The size of the list must be equal to or less than the page size | ||
| * declared in {@link #pageSize()}.</li> | ||
| * <li>Directories are not supported; the outcome of attempting to delete | ||
| * directories is undefined (ignored; undetected, listed as failures...).</li> | ||
| * <li>The operation is not atomic.</li> | ||
| * <li>The operation is treated as idempotent: network failures may | ||
| * trigger resubmission of the request -any new objects created under a | ||
| * path in the list may then be deleted.</li> | ||
| * <li>There is no guarantee that any parent directories exist after this call. | ||
| * </li> | ||
| * </ul> | ||
| * @param paths list of paths which must be absolute and under the base path. | ||
| * provided in {@link #basePath()}. | ||
| * @throws IOException IO problems including networking, authentication and more. | ||
| * @throws IllegalArgumentException if a path argument is invalid. | ||
| */ | ||
| List<Map.Entry<Path, String>> bulkDelete(List<Path> paths) | ||
| throws IOException, IllegalArgumentException; | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.fs; | ||
|
|
||
| import java.io.IOException; | ||
|
|
||
| import org.apache.hadoop.classification.InterfaceAudience; | ||
| import org.apache.hadoop.classification.InterfaceStability; | ||
|
|
||
| /** | ||
| * Interface for bulk deletion. | ||
| * Filesystems which support bulk deletion should implement this interface | ||
| * and MUST also declare their support in the path capability | ||
| * {@link CommonPathCapabilities#BULK_DELETE}. | ||
| * Exporting the interface does not guarantee that the operation is supported; | ||
| * returning a {@link BulkDelete} object from the call {@link #createBulkDelete(Path)} | ||
| * is. | ||
| */ | ||
| @InterfaceAudience.Public | ||
| @InterfaceStability.Unstable | ||
| public interface BulkDeleteSource { | ||
|
|
||
| /** | ||
| * Create a bulk delete operation. | ||
| * There is no network IO at this point, simply the creation of | ||
| * a bulk delete object. | ||
| * A path must be supplied to assist in link resolution. | ||
| * @param path path to delete under. | ||
| * @return the bulk delete. | ||
| * @throws UnsupportedOperationException bulk delete under that path is not supported. | ||
| * @throws IllegalArgumentException path not valid. | ||
| * @throws IOException problems resolving paths | ||
| */ | ||
| default BulkDelete createBulkDelete(Path path) | ||
| throws UnsupportedOperationException, IllegalArgumentException, IOException { | ||
| throw new UnsupportedOperationException("Bulk delete not supported"); | ||
| } | ||
|
|
||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,9 @@ public final class StoreStatisticNames { | |
| /** {@value}. */ | ||
| public static final String OP_APPEND = "op_append"; | ||
|
|
||
| /** {@value}. */ | ||
| public static final String OP_BULK_DELETE = "op_bulk-delete"; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: change to |
||
|
|
||
| /** {@value}. */ | ||
| public static final String OP_COPY_FROM_LOCAL_FILE = | ||
| "op_copy_from_local_file"; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.hadoop.util.functional; | ||
|
|
||
| import java.util.Map; | ||
|
|
||
| import org.apache.hadoop.classification.InterfaceStability; | ||
|
|
||
| /** | ||
| * Tuple support. | ||
| * This allows for tuples to be passed around as part of the public API without | ||
| * committing to a third-party library tuple implementation. | ||
| */ | ||
| @InterfaceStability.Unstable | ||
| public final class Tuples { | ||
|
|
||
| private Tuples() { | ||
| } | ||
|
|
||
| /** | ||
| * Create a 2-tuple. | ||
| * @param key element 1 | ||
| * @param value element 2 | ||
| * @return a tuple. | ||
| * @param <K> element 1 type | ||
| * @param <V> element 2 type | ||
| */ | ||
| public static <K, V> Map.Entry<K, V> pair(final K key, final V value) { | ||
| return new Tuple<>(key, value); | ||
| } | ||
|
|
||
| /** | ||
| * Simple tuple class: uses the Map.Entry interface as other | ||
| * implementations have done, so the API is available across | ||
| * all java versions. | ||
| * @param <K> key | ||
| * @param <V> value | ||
| */ | ||
| private static final class Tuple<K, V> implements Map.Entry<K, V> { | ||
|
|
||
| private final K key; | ||
|
|
||
| private final V value; | ||
|
|
||
| private Tuple(final K key, final V value) { | ||
| this.key = key; | ||
| this.value = value; | ||
| } | ||
|
|
||
| @Override | ||
| public K getKey() { | ||
| return key; | ||
| } | ||
|
|
||
| @Override | ||
| public V getValue() { | ||
| return value; | ||
| } | ||
|
|
||
| @Override | ||
| public V setValue(final V value) { | ||
| throw new UnsupportedOperationException("Tuple is immutable"); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "(" + key + ", " + value + ')'; | ||
| } | ||
|
|
||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't this be greater than 0?
equal to 0 doesn't make sense. also we have the check in S3A impl.