-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Introduce PathFilter support in DirectoryLister #13590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,12 +15,16 @@ | |
|
|
||
| import org.apache.hadoop.fs.FileSystem; | ||
| import org.apache.hadoop.fs.Path; | ||
| import org.apache.hadoop.fs.PathFilter; | ||
|
|
||
| import java.util.Iterator; | ||
| import java.util.Optional; | ||
|
|
||
| import static com.facebook.presto.hive.util.HiveFileIterator.NestedDirectoryPolicy; | ||
|
|
||
| public interface DirectoryLister | ||
| { | ||
| Iterator<HiveFileInfo> list(FileSystem fileSystem, Path path, NamenodeStats namenodeStats, NestedDirectoryPolicy nestedDirectoryPolicy); | ||
|
|
||
| Iterator<HiveFileInfo> list(FileSystem fileSystem, Path path, NamenodeStats namenodeStats, NestedDirectoryPolicy nestedDirectoryPolicy, Optional<PathFilter> pathFilter); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's leave only a single method in the interface (remove the old one)
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| import com.facebook.presto.spi.PrestoException; | ||
| import com.google.common.collect.AbstractIterator; | ||
| import org.apache.hadoop.fs.Path; | ||
| import org.apache.hadoop.fs.PathFilter; | ||
| import org.apache.hadoop.fs.RemoteIterator; | ||
|
|
||
| import java.io.FileNotFoundException; | ||
|
|
@@ -27,6 +28,7 @@ | |
| import java.util.Collections; | ||
| import java.util.Deque; | ||
| import java.util.Iterator; | ||
| import java.util.Optional; | ||
|
|
||
| import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR; | ||
| import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILE_NOT_FOUND; | ||
|
|
@@ -46,19 +48,22 @@ public enum NestedDirectoryPolicy | |
| private final ListDirectoryOperation listDirectoryOperation; | ||
| private final NamenodeStats namenodeStats; | ||
| private final NestedDirectoryPolicy nestedDirectoryPolicy; | ||
| private final Optional<PathFilter> pathFilter; | ||
|
|
||
| private Iterator<HiveFileInfo> remoteIterator = Collections.emptyIterator(); | ||
|
|
||
| public HiveFileIterator( | ||
| Path path, | ||
| ListDirectoryOperation listDirectoryOperation, | ||
| NamenodeStats namenodeStats, | ||
| NestedDirectoryPolicy nestedDirectoryPolicy) | ||
| NestedDirectoryPolicy nestedDirectoryPolicy, | ||
| Optional<PathFilter> pathFilter) | ||
| { | ||
| paths.addLast(requireNonNull(path, "path is null")); | ||
| this.listDirectoryOperation = requireNonNull(listDirectoryOperation, "listDirectoryOperation is null"); | ||
| this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null"); | ||
| this.nestedDirectoryPolicy = requireNonNull(nestedDirectoryPolicy, "nestedDirectoryPolicy is null"); | ||
| this.pathFilter = requireNonNull(pathFilter, "path filter is null"); | ||
|
bhasudha marked this conversation as resolved.
|
||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -92,14 +97,14 @@ protected HiveFileInfo computeNext() | |
| if (paths.isEmpty()) { | ||
| return endOfData(); | ||
| } | ||
| remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst()); | ||
| remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst(), pathFilter); | ||
| } | ||
| } | ||
|
|
||
| private Iterator<HiveFileInfo> getLocatedFileStatusRemoteIterator(Path path) | ||
| private Iterator<HiveFileInfo> getLocatedFileStatusRemoteIterator(Path path, Optional<PathFilter> pathFilter) | ||
| { | ||
| try (TimeStat.BlockTimer ignored = namenodeStats.getListLocatedStatus().time()) { | ||
| return new FileStatusIterator(path, listDirectoryOperation, namenodeStats); | ||
| return new FileStatusIterator(path, listDirectoryOperation, namenodeStats, pathFilter); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -116,11 +121,14 @@ private static class FileStatusIterator | |
| private final Path path; | ||
| private final NamenodeStats namenodeStats; | ||
| private final RemoteIterator<HiveFileInfo> fileStatusIterator; | ||
| private final Optional<PathFilter> pathFilter; | ||
| private HiveFileInfo next; | ||
|
|
||
| private FileStatusIterator(Path path, ListDirectoryOperation listDirectoryOperation, NamenodeStats namenodeStats) | ||
| private FileStatusIterator(Path path, ListDirectoryOperation listDirectoryOperation, NamenodeStats namenodeStats, Optional<PathFilter> pathFilter) | ||
| { | ||
| this.path = path; | ||
| this.namenodeStats = namenodeStats; | ||
| this.pathFilter = pathFilter; | ||
| try { | ||
| this.fileStatusIterator = listDirectoryOperation.list(path); | ||
| } | ||
|
|
@@ -129,26 +137,39 @@ private FileStatusIterator(Path path, ListDirectoryOperation listDirectoryOperat | |
| } | ||
| } | ||
|
|
||
| @Override | ||
| public boolean hasNext() | ||
| private void computeNext() | ||
| { | ||
| if (next != null) { | ||
| return; | ||
| } | ||
| try { | ||
| return fileStatusIterator.hasNext(); | ||
| while (fileStatusIterator.hasNext()) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about wrapping the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. I tried doing this. It looks like this accepts only Iterator as the input parameter - https://github.com/google/guava/blob/49f5a6332a63737dff70cf77472f9867bc7ca6eb/guava/src/com/google/common/collect/Iterators.java#L629 However the ListDirectoryOperation.list(path) return RemoteIterator. |
||
| HiveFileInfo hiveFileInfo = fileStatusIterator.next(); | ||
| if (!pathFilter.isPresent() || pathFilter.get().accept(hiveFileInfo.getPath())) { | ||
| next = hiveFileInfo; | ||
| return; | ||
| } | ||
| } | ||
| } | ||
| catch (IOException e) { | ||
| throw processException(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public boolean hasNext() | ||
| { | ||
| computeNext(); | ||
| return (next != null); | ||
| } | ||
|
|
||
| @Override | ||
| public HiveFileInfo next() | ||
| { | ||
| try { | ||
| return fileStatusIterator.next(); | ||
| } | ||
| catch (IOException e) { | ||
| throw processException(e); | ||
| } | ||
| computeNext(); | ||
| HiveFileInfo result = next; | ||
| next = null; | ||
| return result; | ||
| } | ||
|
|
||
| private PrestoException processException(IOException exception) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.