Skip to content

Commit c59a98e

Browse files
author
Roger G. Coram
committed
Merge pull request #189 from akshaydixi/s3-feature
Adding functionality for loading resources from S3 buckets
2 parents c51fe79 + dee814b commit c59a98e

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public static Resource getResource(String urlOrPath, long offset)
8585
try {
8686
if(urlOrPath.startsWith("http://")) {
8787
return getResource(new URL(urlOrPath), offset);
88-
} else if(urlOrPath.startsWith("hdfs://")) {
88+
} else if(urlOrPath.startsWith("hdfs://") || urlOrPath.startsWith("s3://")) {
8989
try {
9090
return getResource(new URI(urlOrPath), offset);
9191

@@ -108,33 +108,51 @@ public static Resource getResource(String urlOrPath, long offset)
108108
}
109109

110110
protected static FileSystem hdfsSys = null;
111+
protected static FileSystem s3Sys = null;
111112

112113
public static Resource getResource( URI uri, long offset)
113114
throws IOException, ResourceNotAvailableException, URISyntaxException {
114115

115116
Resource r = null;
116-
117+
FSDataInputStream is = null;
118+
Path path = null;
117119
// FIXME: Put this into static initialization? or require
118120
// explicit init during startup? Or just create it each
119121
// time?
120122
//
121123

122124
// Attempt at fix: Only initializing file system once
123-
if (hdfsSys == null)
124-
{
125-
Configuration conf = new Configuration();
126-
127-
// Assume that the URL is a fully-qualified HDFS url, like:
128-
// hdfs://namenode:6100/collections/foo/some.arc.gz
129-
// create fs with just the default URL
130-
131-
URI defaultURI = new URI(uri.getScheme() + "://" + uri.getHost() + ":"+ uri.getPort() + "/");
132-
hdfsSys = FileSystem.get(defaultURI, conf);
133-
}
134-
135-
Path path = new Path( uri.getPath() );
125+
if (uri.toString().startsWith("s3://")) {
126+
path = new Path(uri.toString());
127+
128+
if (s3Sys == null)
129+
{
130+
Configuration conf = new Configuration();
131+
s3Sys = path.getFileSystem(conf);
132+
}
133+
134+
// Assume that keys for Amazon S3 are already set in
135+
// $HADOOP_CONF/core-site.xml
136+
// Refer to https://wiki.apache.org/hadoop/AmazonS3 for more details
137+
138+
is = s3Sys.open(path);
139+
} else {
140+
if (hdfsSys == null)
141+
{
142+
Configuration conf = new Configuration();
143+
144+
// Assume that the URL is a fully-qualified HDFS url, like:
145+
// hdfs://namenode:6100/collections/foo/some.arc.gz
146+
// create fs with just the default URL
147+
148+
URI defaultURI = new URI(uri.getScheme() + "://" + uri.getHost() + ":"+ uri.getPort() + "/");
149+
hdfsSys = FileSystem.get(defaultURI, conf);
150+
}
151+
152+
path = new Path( uri.getPath() );
136153

137-
FSDataInputStream is = hdfsSys.open( path );
154+
is = hdfsSys.open( path );
155+
}
138156
is.seek( offset );
139157

140158
if (isArc(path.getName()))

0 commit comments

Comments
 (0)