@@ -85,7 +85,7 @@ public static Resource getResource(String urlOrPath, long offset)
8585 try {
8686 if (urlOrPath .startsWith ("http://" )) {
8787 return getResource (new URL (urlOrPath ), offset );
88- } else if (urlOrPath .startsWith ("hdfs://" )) {
88+ } else if (urlOrPath .startsWith ("hdfs://" ) || urlOrPath . startsWith ( "s3://" ) ) {
8989 try {
9090 return getResource (new URI (urlOrPath ), offset );
9191
@@ -108,33 +108,51 @@ public static Resource getResource(String urlOrPath, long offset)
108108 }
109109
110110 protected static FileSystem hdfsSys = null ;
111+ protected static FileSystem s3Sys = null ;
111112
112113 public static Resource getResource ( URI uri , long offset )
113114 throws IOException , ResourceNotAvailableException , URISyntaxException {
114115
115116 Resource r = null ;
116-
117+ FSDataInputStream is = null ;
118+ Path path = null ;
117119 // FIXME: Put this into static initialization? or require
118120 // explicit init during startup? Or just create it each
119121 // time?
120122 //
121123
122124 // Attempt at fix: Only initializing file system once
123- if (hdfsSys == null )
124- {
125- Configuration conf = new Configuration ();
126-
127- // Assume that the URL is a fully-qualified HDFS url, like:
128- // hdfs://namenode:6100/collections/foo/some.arc.gz
129- // create fs with just the default URL
130-
131- URI defaultURI = new URI (uri .getScheme () + "://" + uri .getHost () + ":" + uri .getPort () + "/" );
132- hdfsSys = FileSystem .get (defaultURI , conf );
133- }
134-
135- Path path = new Path ( uri .getPath () );
125+ if (uri .toString ().startsWith ("s3://" )) {
126+ path = new Path (uri .toString ());
127+
128+ if (s3Sys == null )
129+ {
130+ Configuration conf = new Configuration ();
131+ s3Sys = path .getFileSystem (conf );
132+ }
133+
134+ // Assume that keys for Amazon S3 are already set in
135+ // $HADOOP_CONF/core-site.xml
136+ // Refer to https://wiki.apache.org/hadoop/AmazonS3 for more details
137+
138+ is = s3Sys .open (path );
139+ } else {
140+ if (hdfsSys == null )
141+ {
142+ Configuration conf = new Configuration ();
143+
144+ // Assume that the URL is a fully-qualified HDFS url, like:
145+ // hdfs://namenode:6100/collections/foo/some.arc.gz
146+ // create fs with just the default URL
147+
148+ URI defaultURI = new URI (uri .getScheme () + "://" + uri .getHost () + ":" + uri .getPort () + "/" );
149+ hdfsSys = FileSystem .get (defaultURI , conf );
150+ }
151+
152+ path = new Path ( uri .getPath () );
136153
137- FSDataInputStream is = hdfsSys .open ( path );
154+ is = hdfsSys .open ( path );
155+ }
138156 is .seek ( offset );
139157
140158 if (isArc (path .getName ()))
0 commit comments