Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs;

import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.statistics.IOStatisticsSource;

import static java.util.Objects.requireNonNull;

/**
* API for bulk deletion of objects/files,
* <i>but not directories</i>.
* After use, call {@code close()} to release any resources and
* to guarantee store IOStatistics are updated.
* <p>
* Callers MUST have no expectation that parent directories will exist after the
* operation completes; if an object store needs to explicitly look for and create
* directory markers, that step will be omitted.
* <p>
* Be aware that on some stores (AWS S3) each object listed in a bulk delete counts
* against the write IOPS limit; large page sizes are counterproductive here, as
* are attempts at parallel submissions across multiple threads.
* @see <a href="https://issues.apache.org/jira/browse/HADOOP-16823">HADOOP-16823.
* Large DeleteObject requests are their own Thundering Herd</a>
* <p>
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public interface BulkDelete extends IOStatisticsSource, Closeable {

/**
* The maximum number of objects/files to delete in a single request.
* @return a number greater than or equal to zero.
*/
int pageSize();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be greater than 0?
equal to 0 doesn't make sense. also we have the check in S3A impl.


/**
* Base path of a bulk delete operation.
* All paths submitted in {@link #bulkDelete(List)} must be under this path.
*/
Path basePath();

/**
* Delete a list of files/objects.
* <ul>
* <li>Files must be under the path provided in {@link #basePath()}.</li>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

writing contract tests for this locally., can't find the implementation of this in S3A.

* <li>The size of the list must be equal to or less than the page size
* declared in {@link #pageSize()}.</li>
* <li>Directories are not supported; the outcome of attempting to delete
* directories is undefined (ignored; undetected, listed as failures...).</li>
* <li>The operation is not atomic.</li>
* <li>The operation is treated as idempotent: network failures may
* trigger resubmission of the request -any new objects created under a
* path in the list may then be deleted.</li>
* <li>There is no guarantee that any parent directories exist after this call.
* </li>
* </ul>
* @param paths list of paths which must be absolute and under the base path.
* provided in {@link #basePath()}.
* @throws IOException IO problems including networking, authentication and more.
* @throws IllegalArgumentException if a path argument is invalid.
*/
List<Map.Entry<Path, String>> bulkDelete(List<Path> paths)
throws IOException, IllegalArgumentException;

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs;

import java.io.IOException;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

/**
* Interface for bulk deletion.
* Filesystems which support bulk deletion should implement this interface
* and MUST also declare their support in the path capability
* {@link CommonPathCapabilities#BULK_DELETE}.
* Exporting the interface does not guarantee that the operation is supported;
* returning a {@link BulkDelete} object from the call {@link #createBulkDelete(Path)}
* is.
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public interface BulkDeleteSource {

/**
* Create a bulk delete operation.
* There is no network IO at this point, simply the creation of
* a bulk delete object.
* A path must be supplied to assist in link resolution.
* @param path path to delete under.
* @return the bulk delete.
* @throws UnsupportedOperationException bulk delete under that path is not supported.
* @throws IllegalArgumentException path not valid.
* @throws IOException problems resolving paths
*/
default BulkDelete createBulkDelete(Path path)
throws UnsupportedOperationException, IllegalArgumentException, IOException {
throw new UnsupportedOperationException("Bulk delete not supported");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,10 @@ private CommonPathCapabilities() {
*/
public static final String DIRECTORY_LISTING_INCONSISTENT =
"fs.capability.directory.listing.inconsistent";

/**
* Capability string to probe for bulk delete: {@value}.
*/
public static final String BULK_DELETE = "fs.capability.bulk.delete";

}
Original file line number Diff line number Diff line change
Expand Up @@ -2108,4 +2108,62 @@ public static void maybeIgnoreMissingDirectory(FileSystem fs,
LOG.info("Ignoring missing directory {}", path);
LOG.debug("Directory missing", e);
}

/**
* Get the maximum number of objects/files to delete in a single request.
* @param fs filesystem
* @param path path to delete under.
* @return a number greater than or equal to zero.
* @throws UnsupportedOperationException bulk delete under that path is not supported.
* @throws IllegalArgumentException path not valid.
* @throws IOException problems resolving paths
*/
public static int bulkDeletePageSize(FileSystem fs, Path path) throws IOException {
try (BulkDelete bulk = toBulkDeleteSource(fs).createBulkDelete(path)) {
return bulk.pageSize();
}
}

/**
* Convert a filesystem to a bulk delete source.
* @param fs filesystem
* @return cast fs.
* @throws UnsupportedOperationException FS doesn't implement the interface.
*/
private static BulkDeleteSource toBulkDeleteSource(final FileSystem fs) {
if (!(fs instanceof BulkDeleteSource)) {
throw new UnsupportedOperationException("Bulk delete not supported");
}
return (BulkDeleteSource) fs;
}

/**
* Delete a list of files/objects.
* <ul>
* <li>Files must be under the path provided in {@code base}.</li>
* <li>The size of the list must be equal to or less than the page size.</li>
* <li>Directories are not supported; the outcome of attempting to delete
* directories is undefined (ignored; undetected, listed as failures...).</li>
* <li>The operation is not atomic.</li>
* <li>The operation is treated as idempotent: network failures may
* trigger resubmission of the request -any new objects created under a
* path in the list may then be deleted.</li>
* <li>There is no guarantee that any parent directories exist after this call.
* </li>
* </ul>
* @param fs filesystem
* @param base path to delete under.
* @param paths list of paths which must be absolute and under the base path.
* @return a list of all the paths which couldn't be deleted for a reason other than "not found" and any associated error message.
* @throws UnsupportedOperationException bulk delete under that path is not supported.
* @throws IOException IO problems including networking, authentication and more.
* @throws IllegalArgumentException if a path argument is invalid.
*/
public static List<Map.Entry<Path, String>> bulkDelete(FileSystem fs, Path base, List<Path> paths)
throws IOException {
try (BulkDelete bulk = toBulkDeleteSource(fs).createBulkDelete(base)) {
return bulk.bulkDelete(paths);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public final class StoreStatisticNames {
/** {@value}. */
public static final String OP_APPEND = "op_append";

/** {@value}. */
public static final String OP_BULK_DELETE = "op_bulk-delete";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: change to op_bulk_delete to match how other OPs are named


/** {@value}. */
public static final String OP_COPY_FROM_LOCAL_FILE =
"op_copy_from_local_file";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.util.functional;

import java.util.Map;

import org.apache.hadoop.classification.InterfaceStability;

/**
* Tuple support.
* This allows for tuples to be passed around as part of the public API without
* committing to a third-party library tuple implementation.
*/
@InterfaceStability.Unstable
public final class Tuples {

private Tuples() {
}

/**
* Create a 2-tuple.
* @param key element 1
* @param value element 2
* @return a tuple.
* @param <K> element 1 type
* @param <V> element 2 type
*/
public static <K, V> Map.Entry<K, V> pair(final K key, final V value) {
return new Tuple<>(key, value);
}

/**
* Simple tuple class: uses the Map.Entry interface as other
* implementations have done, so the API is available across
* all java versions.
* @param <K> key
* @param <V> value
*/
private static final class Tuple<K, V> implements Map.Entry<K, V> {

private final K key;

private final V value;

private Tuple(final K key, final V value) {
this.key = key;
this.value = value;
}

@Override
public K getKey() {
return key;
}

@Override
public V getValue() {
return value;
}

@Override
public V setValue(final V value) {
throw new UnsupportedOperationException("Tuple is immutable");
}

@Override
public String toString() {
return "(" + key + ", " + value + ')';
}

}
}
Loading