Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public static Resource getResource(String urlOrPath, long offset)
try {
if(urlOrPath.startsWith("http://")) {
return getResource(new URL(urlOrPath), offset);
} else if(urlOrPath.startsWith("hdfs://")) {
} else if(urlOrPath.startsWith("hdfs://") || urlOrPath.startsWith("s3://")) {
try {
return getResource(new URI(urlOrPath), offset);

Expand All @@ -108,33 +108,51 @@ public static Resource getResource(String urlOrPath, long offset)
}

protected static FileSystem hdfsSys = null;
protected static FileSystem s3Sys = null;

public static Resource getResource( URI uri, long offset)
throws IOException, ResourceNotAvailableException, URISyntaxException {

Resource r = null;

FSDataInputStream is = null;
Path path = null;
// FIXME: Put this into static initialization? or require
// explicit init during startup? Or just create it each
// time?
//

// Attempt at fix: Only initializing file system once
if (hdfsSys == null)
{
Configuration conf = new Configuration();

// Assume that the URL is a fully-qualified HDFS url, like:
// hdfs://namenode:6100/collections/foo/some.arc.gz
// create fs with just the default URL

URI defaultURI = new URI(uri.getScheme() + "://" + uri.getHost() + ":"+ uri.getPort() + "/");
hdfsSys = FileSystem.get(defaultURI, conf);
}

Path path = new Path( uri.getPath() );
if (uri.toString().startsWith("s3://")) {
path = new Path(uri.toString());

if (s3Sys == null)
{
Configuration conf = new Configuration();
s3Sys = path.getFileSystem(conf);
}

// Assume that keys for Amazon S3 are already set in
// $HADOOP_CONF/core-site.xml
// Refer to https://wiki.apache.org/hadoop/AmazonS3 for more details

is = s3Sys.open(path);
} else {
if (hdfsSys == null)
{
Configuration conf = new Configuration();

// Assume that the URL is a fully-qualified HDFS url, like:
// hdfs://namenode:6100/collections/foo/some.arc.gz
// create fs with just the default URL

URI defaultURI = new URI(uri.getScheme() + "://" + uri.getHost() + ":"+ uri.getPort() + "/");
hdfsSys = FileSystem.get(defaultURI, conf);
}

path = new Path( uri.getPath() );

FSDataInputStream is = hdfsSys.open( path );
is = hdfsSys.open( path );
}
is.seek( offset );

if (isArc(path.getName()))
Expand Down