@@ -85,7 +85,7 @@ public static Resource getResource(String urlOrPath, long offset)
8585		try  {
8686			if (urlOrPath .startsWith ("http://" )) {
8787				return  getResource (new  URL (urlOrPath ), offset );
88-             } else  if (urlOrPath .startsWith ("hdfs://" )) {           	
88+             } else  if (urlOrPath .startsWith ("hdfs://" ) ||  urlOrPath . startsWith ( "s3://" ) ) {           	
8989                try  {
9090                  return  getResource (new  URI (urlOrPath ), offset );
9191
@@ -108,33 +108,51 @@ public static Resource getResource(String urlOrPath, long offset)
108108	}
109109
110110	protected  static  FileSystem  hdfsSys  = null ;
111+     protected  static  FileSystem  s3Sys  = null ;
111112
112113  public  static  Resource  getResource ( URI  uri , long  offset )
113114    throws  IOException , ResourceNotAvailableException , URISyntaxException  {
114115
115116    Resource  r  = null ;
116-     
117+     FSDataInputStream  is  = null ;
118+     Path  path  = null ; 
117119    // FIXME: Put this into static initialization?  or require 
118120    //        explicit init during startup?  Or just create it each 
119121    //        time? 
120122    //  
121123
122124    // Attempt at fix: Only initializing file system once     
123-     if  (hdfsSys  == null )
124-     {
125-         Configuration  conf  = new  Configuration ();
126- 
127-         // Assume that the URL is a fully-qualified HDFS url, like: 
128-         //   hdfs://namenode:6100/collections/foo/some.arc.gz 
129-         // create fs with just the default URL 
130-         
131-         URI  defaultURI  = new  URI (uri .getScheme () + "://"  + uri .getHost () + ":" + uri .getPort () + "/" );
132-         hdfsSys  = FileSystem .get (defaultURI , conf );
133-     }
134-         
135-     Path  path  = new  Path ( uri .getPath () );
125+     if  (uri .toString ().startsWith ("s3://" )) {
126+       path  = new  Path (uri .toString ());
127+       
128+       if  (s3Sys  == null )
129+       {
130+           Configuration  conf  = new  Configuration ();
131+           s3Sys  = path .getFileSystem (conf );
132+       }
133+       
134+       // Assume that keys for Amazon S3 are already set in  
135+       // $HADOOP_CONF/core-site.xml 
136+       // Refer to https://wiki.apache.org/hadoop/AmazonS3 for more details 
137+        
138+       is  = s3Sys .open (path );
139+     } else  {
140+       if  (hdfsSys  == null )
141+       {
142+           Configuration  conf  = new  Configuration ();
143+ 
144+           // Assume that the URL is a fully-qualified HDFS url, like: 
145+           //   hdfs://namenode:6100/collections/foo/some.arc.gz 
146+           // create fs with just the default URL 
147+           
148+           URI  defaultURI  = new  URI (uri .getScheme () + "://"  + uri .getHost () + ":" + uri .getPort () + "/" );
149+           hdfsSys  = FileSystem .get (defaultURI , conf );
150+       }
151+           
152+       path  = new  Path ( uri .getPath () );
136153
137-     FSDataInputStream  is  = hdfsSys .open ( path  );
154+       is  = hdfsSys .open ( path  );
155+     }
138156    is .seek ( offset  );
139157
140158    if  (isArc (path .getName ()))
0 commit comments