|  | 
|  | 1 | +/* | 
|  | 2 | + *  This file is part of the Wayback archival access software | 
|  | 3 | + *   (http://archive-access.sourceforge.net/projects/wayback/). | 
|  | 4 | + * | 
|  | 5 | + *  Licensed to the Internet Archive (IA) by one or more individual  | 
|  | 6 | + *  contributors.  | 
|  | 7 | + * | 
|  | 8 | + *  The IA licenses this file to You under the Apache License, Version 2.0 | 
|  | 9 | + *  (the "License"); you may not use this file except in compliance with | 
|  | 10 | + *  the License.  You may obtain a copy of the License at | 
|  | 11 | + * | 
|  | 12 | + *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 13 | + * | 
|  | 14 | + *  Unless required by applicable law or agreed to in writing, software | 
|  | 15 | + *  distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 16 | + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 17 | + *  See the License for the specific language governing permissions and | 
|  | 18 | + *  limitations under the License. | 
|  | 19 | + */ | 
|  | 20 | +package org.archive.wayback.liveweb; | 
|  | 21 | + | 
|  | 22 | +import java.io.ByteArrayInputStream; | 
|  | 23 | +import java.io.IOException; | 
|  | 24 | +import java.net.ConnectException; | 
|  | 25 | +import java.net.SocketException; | 
|  | 26 | +import java.net.SocketTimeoutException; | 
|  | 27 | +import java.net.URL; | 
|  | 28 | +import java.util.logging.Logger; | 
|  | 29 | +import java.util.zip.GZIPInputStream; | 
|  | 30 | + | 
|  | 31 | +import org.apache.commons.httpclient.ConnectTimeoutException; | 
|  | 32 | +import org.apache.commons.httpclient.HostConfiguration; | 
|  | 33 | +import org.apache.commons.httpclient.HttpClient; | 
|  | 34 | +import org.apache.commons.httpclient.HttpMethod; | 
|  | 35 | +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; | 
|  | 36 | +import org.apache.commons.httpclient.NoHttpResponseException; | 
|  | 37 | +import org.apache.commons.httpclient.methods.GetMethod; | 
|  | 38 | +import org.apache.commons.httpclient.params.HttpClientParams; | 
|  | 39 | +import org.archive.io.arc.ARCRecord; | 
|  | 40 | +import org.archive.wayback.core.Resource; | 
|  | 41 | +import org.archive.wayback.exception.LiveDocumentNotAvailableException; | 
|  | 42 | +import org.archive.wayback.exception.LiveWebCacheUnavailableException; | 
|  | 43 | +import org.archive.wayback.exception.LiveWebTimeoutException; | 
|  | 44 | +import org.archive.wayback.exception.ResourceNotAvailableException; | 
|  | 45 | +import org.archive.wayback.resourcestore.resourcefile.ArcResource; | 
|  | 46 | +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; | 
|  | 47 | + | 
|  | 48 | +/** | 
|  | 49 | + * This class fetches resource from live web.  | 
|  | 50 | + * It works with {@link ARCRecordingProxy} not standard proxy servers. | 
|  | 51 | + *  | 
|  | 52 | + * @author brad | 
|  | 53 | + * @see LiveWebCache | 
|  | 54 | + * @see StdRemoteLiveWebCache | 
|  | 55 | + * | 
|  | 56 | + */ | 
|  | 57 | +public class ArcRemoteLiveWebCache implements LiveWebCache { | 
|  | 58 | +	private static final Logger LOGGER = Logger.getLogger( | 
|  | 59 | +			ArcRemoteLiveWebCache.class.getName()); | 
|  | 60 | + | 
|  | 61 | +    protected MultiThreadedHttpConnectionManager connectionManager = null; | 
|  | 62 | +    protected HostConfiguration hostConfiguration = null; | 
|  | 63 | +    protected HttpClient http = null;  | 
|  | 64 | +    protected String requestPrefix = null; | 
|  | 65 | + | 
|  | 66 | +    /** | 
|  | 67 | +     *  | 
|  | 68 | +     */ | 
|  | 69 | +    public ArcRemoteLiveWebCache() { | 
|  | 70 | +    	connectionManager = new MultiThreadedHttpConnectionManager(); | 
|  | 71 | +    	hostConfiguration = new HostConfiguration(); | 
|  | 72 | +		HttpClientParams params = new HttpClientParams(); | 
|  | 73 | +        params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler()); | 
|  | 74 | +    	http = new HttpClient(params,connectionManager); | 
|  | 75 | +    	http.setHostConfiguration(hostConfiguration); | 
|  | 76 | +    } | 
|  | 77 | + | 
|  | 78 | +    /* (non-Javadoc) | 
|  | 79 | +	 * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) | 
|  | 80 | +	 */ | 
|  | 81 | +	public Resource getCachedResource(URL url, long maxCacheMS, | 
|  | 82 | +			boolean bUseOlder) throws LiveDocumentNotAvailableException, | 
|  | 83 | +			LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException { | 
|  | 84 | +		String urlString = url.toExternalForm(); | 
|  | 85 | +		 | 
|  | 86 | +		if (requestPrefix != null) { | 
|  | 87 | +			urlString = requestPrefix + urlString; | 
|  | 88 | +		} | 
|  | 89 | +		 | 
|  | 90 | +		HttpMethod method = null; | 
|  | 91 | +		try { | 
|  | 92 | +			method = new GetMethod(urlString); | 
|  | 93 | +		} catch(IllegalArgumentException e) { | 
|  | 94 | +			LOGGER.warning("Bad URL for live web fetch:" + urlString); | 
|  | 95 | +			throw new LiveDocumentNotAvailableException("Url:" + urlString + | 
|  | 96 | +					"does not look like an URL?"); | 
|  | 97 | +		} | 
|  | 98 | +		boolean success = false; | 
|  | 99 | +	    try { | 
|  | 100 | +	    	int status = http.executeMethod(method); | 
|  | 101 | +	    	if(status == 200) { | 
|  | 102 | + | 
|  | 103 | +	    		ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody()); | 
|  | 104 | +	    		ARCRecord r = new ARCRecord( | 
|  | 105 | +	    				new GZIPInputStream(bais), | 
|  | 106 | +	    				"id",0L,false,false,true); | 
|  | 107 | +	    		ArcResource ar = (ArcResource)  | 
|  | 108 | +	    			ResourceFactory.ARCArchiveRecordToResource(r, null); | 
|  | 109 | +	    		if(ar.getStatusCode() == 502) { | 
|  | 110 | +	    			throw new LiveDocumentNotAvailableException(urlString); | 
|  | 111 | +	    		} else if(ar.getStatusCode() == 504) { | 
|  | 112 | +	    			throw new LiveWebTimeoutException("Timeout:" + urlString); | 
|  | 113 | +	    		} | 
|  | 114 | +	    		success = true; | 
|  | 115 | +	    		return ar; | 
|  | 116 | +	    		 | 
|  | 117 | +	    	} else { | 
|  | 118 | +	    		throw new LiveWebCacheUnavailableException(urlString); | 
|  | 119 | +	    	} | 
|  | 120 | + | 
|  | 121 | +	    } catch (ResourceNotAvailableException e) { | 
|  | 122 | +    		throw new LiveDocumentNotAvailableException(urlString); | 
|  | 123 | + | 
|  | 124 | +	    } catch (NoHttpResponseException e) { | 
|  | 125 | + | 
|  | 126 | +	    	throw new LiveWebCacheUnavailableException("No Http Response for " | 
|  | 127 | +	    			+ urlString); | 
|  | 128 | + | 
|  | 129 | +	    } catch (ConnectException e) { | 
|  | 130 | +    		throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()  | 
|  | 131 | +    				+ " : " + urlString); | 
|  | 132 | +	    } catch (SocketException e) { | 
|  | 133 | +    		throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()  | 
|  | 134 | +    				+ " : " + urlString); | 
|  | 135 | +	    } catch (SocketTimeoutException e) { | 
|  | 136 | +    		throw new LiveWebTimeoutException(e.getLocalizedMessage()  | 
|  | 137 | +    				+ " : " + urlString); | 
|  | 138 | +	    } catch(ConnectTimeoutException e) { | 
|  | 139 | +    		throw new LiveWebTimeoutException(e.getLocalizedMessage()  | 
|  | 140 | +    				+ " : " + urlString);	    	 | 
|  | 141 | +		} finally { | 
|  | 142 | +			if (!success) { | 
|  | 143 | +				method.abort(); | 
|  | 144 | +			} | 
|  | 145 | +	    	method.releaseConnection(); | 
|  | 146 | +	    } | 
|  | 147 | +	} | 
|  | 148 | + | 
|  | 149 | +	/* (non-Javadoc) | 
|  | 150 | +	 * @see org.archive.wayback.liveweb.LiveWebCache#shutdown() | 
|  | 151 | +	 */ | 
|  | 152 | +	public void shutdown() { | 
|  | 153 | +		// TODO Auto-generated method stub | 
|  | 154 | +	} | 
|  | 155 | +     | 
|  | 156 | + | 
|  | 157 | +    /** | 
|  | 158 | +     * @param hostPort to proxy requests through - ex. "localhost:3128" | 
|  | 159 | +     */ | 
|  | 160 | +    public void setProxyHostPort(String hostPort) { | 
|  | 161 | +    	int colonIdx = hostPort.indexOf(':'); | 
|  | 162 | +    	if(colonIdx > 0) { | 
|  | 163 | +    		String host = hostPort.substring(0,colonIdx); | 
|  | 164 | +    		int port = Integer.valueOf(hostPort.substring(colonIdx+1)); | 
|  | 165 | +    		 | 
|  | 166 | +//            http.getHostConfiguration().setProxy(host, port); | 
|  | 167 | +    		hostConfiguration.setProxy(host, port); | 
|  | 168 | +    	} | 
|  | 169 | +    } | 
|  | 170 | +    /** | 
|  | 171 | +     * @param maxTotalConnections the HttpConnectionManagerParams config | 
|  | 172 | +     */ | 
|  | 173 | +    public void setMaxTotalConnections(int maxTotalConnections) { | 
|  | 174 | +    	connectionManager.getParams(). | 
|  | 175 | +    		setMaxTotalConnections(maxTotalConnections); | 
|  | 176 | +    } | 
|  | 177 | +    /** | 
|  | 178 | +     * @return the HttpConnectionManagerParams maxTotalConnections config | 
|  | 179 | +     */ | 
|  | 180 | +    public int getMaxTotalConnections() { | 
|  | 181 | +    	return connectionManager.getParams().getMaxTotalConnections(); | 
|  | 182 | +    } | 
|  | 183 | +  | 
|  | 184 | +    /** | 
|  | 185 | +     * @param maxHostConnections the HttpConnectionManagerParams config  | 
|  | 186 | +     */ | 
|  | 187 | +    public void setMaxHostConnections(int maxHostConnections) { | 
|  | 188 | +    	connectionManager.getParams(). | 
|  | 189 | +    		setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); | 
|  | 190 | +    } | 
|  | 191 | + | 
|  | 192 | +    /** | 
|  | 193 | +     * @return the HttpConnectionManagerParams maxHostConnections config  | 
|  | 194 | +     */ | 
|  | 195 | +    public int getMaxHostConnections() { | 
|  | 196 | +    	return connectionManager.getParams(). | 
|  | 197 | +    		getMaxConnectionsPerHost(hostConfiguration); | 
|  | 198 | +    } | 
|  | 199 | + | 
|  | 200 | +    /** | 
|  | 201 | +	 * @return the connectionTimeoutMS | 
|  | 202 | +	 */ | 
|  | 203 | +	public int getConnectionTimeoutMS() { | 
|  | 204 | +		return connectionManager.getParams().getConnectionTimeout(); | 
|  | 205 | +	} | 
|  | 206 | + | 
|  | 207 | +	/** | 
|  | 208 | +	 * @param connectionTimeoutMS the connectionTimeoutMS to set | 
|  | 209 | +	 */ | 
|  | 210 | +	public void setConnectionTimeoutMS(int connectionTimeoutMS) { | 
|  | 211 | +    	connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); | 
|  | 212 | +	} | 
|  | 213 | + | 
|  | 214 | +	/** | 
|  | 215 | +	 * @return the socketTimeoutMS | 
|  | 216 | +	 */ | 
|  | 217 | +	public int getSocketTimeoutMS() { | 
|  | 218 | +		return connectionManager.getParams().getSoTimeout(); | 
|  | 219 | +	} | 
|  | 220 | + | 
|  | 221 | +	/** | 
|  | 222 | +	 * @param socketTimeoutMS the socketTimeoutMS to set | 
|  | 223 | +	 */ | 
|  | 224 | +	public void setSocketTimeoutMS(int socketTimeoutMS) { | 
|  | 225 | +    	connectionManager.getParams().setSoTimeout(socketTimeoutMS); | 
|  | 226 | +	} | 
|  | 227 | + | 
|  | 228 | +	public String getRequestPrefix() { | 
|  | 229 | +		return requestPrefix; | 
|  | 230 | +	} | 
|  | 231 | + | 
|  | 232 | +	public void setRequestPrefix(String requestPrefix) { | 
|  | 233 | +		this.requestPrefix = requestPrefix; | 
|  | 234 | +	} | 
|  | 235 | +	 | 
|  | 236 | +	public HttpClient getHttpClient() | 
|  | 237 | +	{ | 
|  | 238 | +		return http; | 
|  | 239 | +	} | 
|  | 240 | +} | 
0 commit comments