diff --git a/pom.xml b/pom.xml index 1b9a5013cb..d7044ca170 100644 --- a/pom.xml +++ b/pom.xml @@ -388,6 +388,12 @@ + + Mohamed Elsayed + https://github.com/MohammedElsayyed + The New Library of Alexandria + http://bibalex.org/ + diff --git a/src/site/xdoc/release_notes.xml b/src/site/xdoc/release_notes.xml index 7faad0a502..6dfeb315f6 100644 Binary files a/src/site/xdoc/release_notes.xml and b/src/site/xdoc/release_notes.xml differ diff --git a/wayback-core/pom.xml b/wayback-core/pom.xml index b2c9bca49c..a02a04fa8a 100644 --- a/wayback-core/pom.xml +++ b/wayback-core/pom.xml @@ -125,6 +125,12 @@ 2.5.1 test + + org.apache.httpcomponents + httpclient + 4.3.5 + jar + diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java new file mode 100644 index 0000000000..3e54782048 --- /dev/null +++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java @@ -0,0 +1,240 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.wayback.liveweb; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.ConnectException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.URL; +import java.util.logging.Logger; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.httpclient.ConnectTimeoutException; +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.NoHttpResponseException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.params.HttpClientParams; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.LiveDocumentNotAvailableException; +import org.archive.wayback.exception.LiveWebCacheUnavailableException; +import org.archive.wayback.exception.LiveWebTimeoutException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; + +/** + * This class fetches resource from live web. + * It works with {@link ARCRecordingProxy} not standard proxy servers. + * + * @author brad + * @see LiveWebCache + * @see StdRemoteLiveWebCache + * + */ +public class ArcRemoteLiveWebCache implements LiveWebCache { + private static final Logger LOGGER = Logger.getLogger( + ArcRemoteLiveWebCache.class.getName()); + + protected MultiThreadedHttpConnectionManager connectionManager = null; + protected HostConfiguration hostConfiguration = null; + protected HttpClient http = null; + protected String requestPrefix = null; + + /** + * + */ + public ArcRemoteLiveWebCache() { + connectionManager = new MultiThreadedHttpConnectionManager(); + hostConfiguration = new HostConfiguration(); + HttpClientParams params = new HttpClientParams(); + params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler()); + http = new HttpClient(params,connectionManager); + http.setHostConfiguration(hostConfiguration); + } + + /* (non-Javadoc) + * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) + */ + public Resource getCachedResource(URL url, long maxCacheMS, + boolean bUseOlder) throws LiveDocumentNotAvailableException, + LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException { + String urlString = url.toExternalForm(); + + if (requestPrefix != null) { + urlString = requestPrefix + urlString; + } + + HttpMethod method = null; + try { + method = new GetMethod(urlString); + } catch(IllegalArgumentException e) { + LOGGER.warning("Bad URL for live web fetch:" + urlString); + throw new LiveDocumentNotAvailableException("Url:" + urlString + + "does not look like an URL?"); + } + boolean success = false; + try { + int status = http.executeMethod(method); + if(status == 200) { + + ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody()); + ARCRecord r = new ARCRecord( + new GZIPInputStream(bais), + "id",0L,false,false,true); + ArcResource ar = (ArcResource) + ResourceFactory.ARCArchiveRecordToResource(r, null); + if(ar.getStatusCode() == 502) { + throw new LiveDocumentNotAvailableException(urlString); + } else if(ar.getStatusCode() == 504) { + throw new LiveWebTimeoutException("Timeout:" + urlString); + } + success = true; + return ar; + + } else { + throw new LiveWebCacheUnavailableException(urlString); + } + + } catch (ResourceNotAvailableException e) { + throw new LiveDocumentNotAvailableException(urlString); + + } catch (NoHttpResponseException e) { + + throw new LiveWebCacheUnavailableException("No Http Response for " + + urlString); + + } catch (ConnectException e) { + throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + + " : " + urlString); + } catch (SocketException e) { + throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + + " : " + urlString); + } catch (SocketTimeoutException e) { + throw new LiveWebTimeoutException(e.getLocalizedMessage() + + " : " + urlString); + } catch(ConnectTimeoutException e) { + throw new LiveWebTimeoutException(e.getLocalizedMessage() + + " : " + urlString); + } finally { + if (!success) { + method.abort(); + } + method.releaseConnection(); + } + } + + /* (non-Javadoc) + * @see org.archive.wayback.liveweb.LiveWebCache#shutdown() + */ + public void shutdown() { + // TODO Auto-generated method stub + } + + + /** + * @param hostPort to proxy requests through - ex. "localhost:3128" + */ + public void setProxyHostPort(String hostPort) { + int colonIdx = hostPort.indexOf(':'); + if(colonIdx > 0) { + String host = hostPort.substring(0,colonIdx); + int port = Integer.valueOf(hostPort.substring(colonIdx+1)); + +// http.getHostConfiguration().setProxy(host, port); + hostConfiguration.setProxy(host, port); + } + } + /** + * @param maxTotalConnections the HttpConnectionManagerParams config + */ + public void setMaxTotalConnections(int maxTotalConnections) { + connectionManager.getParams(). + setMaxTotalConnections(maxTotalConnections); + } + /** + * @return the HttpConnectionManagerParams maxTotalConnections config + */ + public int getMaxTotalConnections() { + return connectionManager.getParams().getMaxTotalConnections(); + } + + /** + * @param maxHostConnections the HttpConnectionManagerParams config + */ + public void setMaxHostConnections(int maxHostConnections) { + connectionManager.getParams(). + setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); + } + + /** + * @return the HttpConnectionManagerParams maxHostConnections config + */ + public int getMaxHostConnections() { + return connectionManager.getParams(). + getMaxConnectionsPerHost(hostConfiguration); + } + + /** + * @return the connectionTimeoutMS + */ + public int getConnectionTimeoutMS() { + return connectionManager.getParams().getConnectionTimeout(); + } + + /** + * @param connectionTimeoutMS the connectionTimeoutMS to set + */ + public void setConnectionTimeoutMS(int connectionTimeoutMS) { + connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); + } + + /** + * @return the socketTimeoutMS + */ + public int getSocketTimeoutMS() { + return connectionManager.getParams().getSoTimeout(); + } + + /** + * @param socketTimeoutMS the socketTimeoutMS to set + */ + public void setSocketTimeoutMS(int socketTimeoutMS) { + connectionManager.getParams().setSoTimeout(socketTimeoutMS); + } + + public String getRequestPrefix() { + return requestPrefix; + } + + public void setRequestPrefix(String requestPrefix) { + this.requestPrefix = requestPrefix; + } + + public HttpClient getHttpClient() + { + return http; + } +} diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java index dd7d54a247..389823ea89 100644 --- a/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java +++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java @@ -16,7 +16,7 @@ import com.google.common.io.ByteStreams; -public class LiveRobotsNoCache extends RemoteLiveWebCache { +public class LiveRobotsNoCache extends ArcRemoteLiveWebCache { protected int maxRobotsSize = 512000; diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java index 629572c697..c20171f10f 100644 --- a/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java +++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java @@ -1,235 +1,11 @@ -/* - * This file is part of the Wayback archival access software - * (http://archive-access.sourceforge.net/projects/wayback/). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.wayback.liveweb; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.net.ConnectException; -import java.net.SocketException; -import java.net.SocketTimeoutException; -import java.net.URL; -import java.util.logging.Logger; -import java.util.zip.GZIPInputStream; - -import org.apache.commons.httpclient.ConnectTimeoutException; -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; -import org.apache.commons.httpclient.NoHttpResponseException; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.params.HttpClientParams; -import org.archive.io.arc.ARCRecord; -import org.archive.wayback.core.Resource; -import org.archive.wayback.exception.LiveDocumentNotAvailableException; -import org.archive.wayback.exception.LiveWebCacheUnavailableException; -import org.archive.wayback.exception.LiveWebTimeoutException; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.resourcefile.ArcResource; -import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; - -/** - * @author brad - * - */ -public class RemoteLiveWebCache implements LiveWebCache { - private static final Logger LOGGER = Logger.getLogger( - RemoteLiveWebCache.class.getName()); - - protected MultiThreadedHttpConnectionManager connectionManager = null; - protected HostConfiguration hostConfiguration = null; - protected HttpClient http = null; - protected String requestPrefix = null; - - /** - * - */ - public RemoteLiveWebCache() { - connectionManager = new MultiThreadedHttpConnectionManager(); - hostConfiguration = new HostConfiguration(); - HttpClientParams params = new HttpClientParams(); - params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler()); - http = new HttpClient(params,connectionManager); - http.setHostConfiguration(hostConfiguration); - } - - /* (non-Javadoc) - * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) - */ - public Resource getCachedResource(URL url, long maxCacheMS, - boolean bUseOlder) throws LiveDocumentNotAvailableException, - LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException { - String urlString = url.toExternalForm(); - - if (requestPrefix != null) { - urlString = requestPrefix + urlString; - } - - HttpMethod method = null; - try { - method = new GetMethod(urlString); - } catch(IllegalArgumentException e) { - LOGGER.warning("Bad URL for live web fetch:" + urlString); - throw new LiveDocumentNotAvailableException("Url:" + urlString + - "does not look like an URL?"); - } - boolean success = false; - try { - int status = http.executeMethod(method); - if(status == 200) { - - ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody()); - ARCRecord r = new ARCRecord( - new GZIPInputStream(bais), - "id",0L,false,false,true); - ArcResource ar = (ArcResource) - ResourceFactory.ARCArchiveRecordToResource(r, null); - if(ar.getStatusCode() == 502) { - throw new LiveDocumentNotAvailableException(urlString); - } else if(ar.getStatusCode() == 504) { - throw new LiveWebTimeoutException("Timeout:" + urlString); - } - success = true; - return ar; - - } else { - throw new LiveWebCacheUnavailableException(urlString); - } - - } catch (ResourceNotAvailableException e) { - throw new LiveDocumentNotAvailableException(urlString); - - } catch (NoHttpResponseException e) { - - throw new LiveWebCacheUnavailableException("No Http Response for " - + urlString); - - } catch (ConnectException e) { - throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() - + " : " + urlString); - } catch (SocketException e) { - throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() - + " : " + urlString); - } catch (SocketTimeoutException e) { - throw new LiveWebTimeoutException(e.getLocalizedMessage() - + " : " + urlString); - } catch(ConnectTimeoutException e) { - throw new LiveWebTimeoutException(e.getLocalizedMessage() - + " : " + urlString); - } finally { - if (!success) { - method.abort(); - } - method.releaseConnection(); - } - } - - /* (non-Javadoc) - * @see org.archive.wayback.liveweb.LiveWebCache#shutdown() - */ - public void shutdown() { - // TODO Auto-generated method stub - } - - - /** - * @param hostPort to proxy requests through - ex. "localhost:3128" - */ - public void setProxyHostPort(String hostPort) { - int colonIdx = hostPort.indexOf(':'); - if(colonIdx > 0) { - String host = hostPort.substring(0,colonIdx); - int port = Integer.valueOf(hostPort.substring(colonIdx+1)); - -// http.getHostConfiguration().setProxy(host, port); - hostConfiguration.setProxy(host, port); - } - } - /** - * @param maxTotalConnections the HttpConnectionManagerParams config - */ - public void setMaxTotalConnections(int maxTotalConnections) { - connectionManager.getParams(). - setMaxTotalConnections(maxTotalConnections); - } - /** - * @return the HttpConnectionManagerParams maxTotalConnections config - */ - public int getMaxTotalConnections() { - return connectionManager.getParams().getMaxTotalConnections(); - } - - /** - * @param maxHostConnections the HttpConnectionManagerParams config - */ - public void setMaxHostConnections(int maxHostConnections) { - connectionManager.getParams(). - setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); - } - - /** - * @return the HttpConnectionManagerParams maxHostConnections config - */ - public int getMaxHostConnections() { - return connectionManager.getParams(). - getMaxConnectionsPerHost(hostConfiguration); - } - - /** - * @return the connectionTimeoutMS - */ - public int getConnectionTimeoutMS() { - return connectionManager.getParams().getConnectionTimeout(); - } - - /** - * @param connectionTimeoutMS the connectionTimeoutMS to set - */ - public void setConnectionTimeoutMS(int connectionTimeoutMS) { - connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); - } - - /** - * @return the socketTimeoutMS - */ - public int getSocketTimeoutMS() { - return connectionManager.getParams().getSoTimeout(); - } - - /** - * @param socketTimeoutMS the socketTimeoutMS to set - */ - public void setSocketTimeoutMS(int socketTimeoutMS) { - connectionManager.getParams().setSoTimeout(socketTimeoutMS); - } - - public String getRequestPrefix() { - return requestPrefix; - } - - public void setRequestPrefix(String requestPrefix) { - this.requestPrefix = requestPrefix; - } - - public HttpClient getHttpClient() - { - return http; - } -} +package org.archive.wayback.liveweb; + +/** + * This class is provided to maintain backwards compatibility. + * + * @deprecated As of release 2.2.0, replaced by {@link ArcRemoteLiveWebCache}. This class will be removed in 3.0.0 + */ +@Deprecated +public class RemoteLiveWebCache extends ArcRemoteLiveWebCache { + +} diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java new file mode 100644 index 0000000000..645e7ebecb --- /dev/null +++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java @@ -0,0 +1,258 @@ +/* + * Copyright 2014 Bibliotheca Alexandrina. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.liveweb; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.ConnectException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.URL; +import java.util.logging.Logger; +import org.apache.commons.httpclient.ConnectTimeoutException; +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.NoHttpResponseException; +import org.apache.commons.httpclient.params.HttpClientParams; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.LiveDocumentNotAvailableException; +import org.archive.wayback.exception.LiveWebCacheUnavailableException; +import org.archive.wayback.exception.LiveWebTimeoutException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; + +/** + * This class fetches resource from live web. + * It works with standard proxy server e.g. Squid. + * + * @author Mohamed Elsayed + * @see LiveWebCache + * @see ArcRemoteLiveWebCache + */ +public class StdRemoteLiveWebCache implements LiveWebCache +{ + private static final Logger LOGGER = Logger.getLogger( + StdRemoteLiveWebCache.class.getName() ); + + protected MultiThreadedHttpConnectionManager connectionManager; + protected HostConfiguration hostConfiguration; + protected HttpClient httpClient; + protected String requestPrefix; + private CloseableHttpResponse response; + private ArcResource ar; + + /** + * StdRemoteLiveWebCache constructor initializes and configures connection objects. + */ + public StdRemoteLiveWebCache() + { + connectionManager = new MultiThreadedHttpConnectionManager(); + hostConfiguration = new HostConfiguration(); + HttpClientParams params = new HttpClientParams(); + params.setParameter( HttpClientParams.RETRY_HANDLER, + new NoRetryHandler() ); + httpClient = new HttpClient( params, connectionManager ); + httpClient.setHostConfiguration( hostConfiguration ); + } + + /** + * Gets resource object from the live web. Configure timeout to 10 seconds. + * + * @param url to fetch from the live web. + * @param maxCacheMS maximum age of resource to return - optionally honored + * @param bUseOlder if true, return documents older than maxCacheMS if + * a more recent copy is not available. + * + * @return Resource for url + * + * @throws LiveDocumentNotAvailableException if the resource cannot be + * retrieved from the live web, but all proxying and caching + * mechanisms functioned properly + * @throws LiveWebCacheUnavailableException if there was a problem either + * accessing the live web, in proxying to the live web, or in + * maintaining the cache for the live web + * @throws LiveWebTimeoutException if there is no response from the live + * web cache before a timeout occurred. + * @throws IOException for the usual reasons + * + * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) + * @inheritDoc org.archive.wayback.liveweb.LiveWebCache#getCachedResource + */ + @Override + public Resource getCachedResource( URL url, long maxCacheMS, + boolean bUseOlder ) + throws LiveDocumentNotAvailableException, + LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException + { + String urlStr = url.toExternalForm(); + + if (requestPrefix != null) + urlStr = requestPrefix + urlStr; + + HttpHost proxy = new HttpHost( hostConfiguration.getProxyHost(), + hostConfiguration.getProxyPort() ); + + // Set socketTimeout and connectionTimeout to 10 seconds. + RequestConfig reqConf = RequestConfig.custom().setProxy( proxy ) + .setSocketTimeout( 10000 ) + .setConnectTimeout( 10000 ) + .setConnectionRequestTimeout( 10000 ) + .build(); + CloseableHttpClient httpclient = HttpClients.custom(). + setDefaultRequestConfig(reqConf).build(); + HttpGet httpGet = new HttpGet( urlStr ); + + try + { + // The following line gets robots.txt from live web + response= httpclient.execute( httpGet ); + + String httpHeaderStr = ""; + String bodyStr = ""; + + /* If it fails to get robots.txt (http status code is 404), + then display contents and don't throw exception + (socketTimeOutException or connectTimeOutException) + */ + if ( response.getStatusLine().getStatusCode() == 404 ) + { + httpHeaderStr = "HTTP/1.0 200 OK\n"; + bodyStr = String.format( "%s\n%s\n", + "User-agent: *", "Allow: /" ); + } + else if ( response.getStatusLine().getStatusCode() == 200 ) + { + // The following line represents first line in http header + httpHeaderStr = String.format( "%s %d %s\n", + response.getStatusLine().getProtocolVersion(), + response.getStatusLine().getStatusCode(), + response.getStatusLine().getReasonPhrase() ); + + // Get robots.txt contents and store it into bodyStr + HttpEntity entity = response.getEntity(); + bodyStr = EntityUtils.toString(entity); + } + + // Get Http Header and store complete http header in httpHeaderStr + for ( Header header : response.getAllHeaders() ) + httpHeaderStr += header.toString() + "\n"; + + httpHeaderStr += "\n"; + int length = httpHeaderStr.length() + bodyStr.length(); + + /* + Using httpHeaderStr and bodyStr to construct responseStr. + First line in responseStr should exist. + */ + + // TODO: the following line should be enhanced, + // especially the first line in responseStr. + String responseStr = String.format( "%s %s %d\n%s%s", urlStr, + "0.0.0.0 10000000000000 text/plain", length, + httpHeaderStr, bodyStr ); + + ByteArrayInputStream bais = new ByteArrayInputStream( + responseStr.getBytes() ); + + // TODO: Should not use ARCRecord + ARCRecord r = new ARCRecord( bais, "id", 0L, false, false, true ); + ar = ( ArcResource ) ResourceFactory.ARCArchiveRecordToResource( r, null ); + + if ( ar.getStatusCode() == 502 ) + { + throw new LiveDocumentNotAvailableException( urlStr ); + } + else if ( ar.getStatusCode() == 504 ) + { + throw new LiveWebTimeoutException( "Timeout:" + urlStr ); + } + + return ar; + } + catch( ResourceNotAvailableException e ) + { + throw new LiveDocumentNotAvailableException( urlStr ); + } + catch( NoHttpResponseException e ) + { + throw new LiveWebCacheUnavailableException( "No Http Response for " + + urlStr ); + } + catch( ConnectException e ) + { + throw new LiveWebCacheUnavailableException( e.getLocalizedMessage() + + " : " + urlStr ); + } + catch ( SocketException e ) + { + throw new LiveWebCacheUnavailableException( e.getLocalizedMessage() + + " : " + urlStr ); + } + catch ( SocketTimeoutException e ) + { + throw new LiveWebTimeoutException( e.getLocalizedMessage() + " : " + + urlStr ); + } + catch( ConnectTimeoutException e ) + { + throw new LiveWebTimeoutException( e.getLocalizedMessage() + " : " + + urlStr ); + } + finally + { + response.close(); + } + } + + /** + * Sets proxy and port (proxy:port). + * + * @param hostPort to proxy requests through - ex. "localhost:3128" + */ + public void setProxyHostPort( String hostPort ) + { + int colonIdx = hostPort.indexOf( ':' ); + if(colonIdx > 0) + { + String host = hostPort.substring( 0,colonIdx ); + int port = Integer.valueOf( hostPort.substring( colonIdx+1 ) ); + hostConfiguration.setProxy( host, port ); + } + } + + /** + * + * @see org.archive.wayback.liveweb.LiveWebCache#shutdown() + */ + @Override + public void shutdown() + { + throw new UnsupportedOperationException( "Not supported yet." ); //To change body of generated methods, choose Tools | Templates. + } +} \ No newline at end of file diff --git a/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml b/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml index cec9b1a696..ea89f6c30b 100644 --- a/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml +++ b/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml @@ -30,12 +30,15 @@ + class="org.archive.wayback.liveweb.ArcRemoteLiveWebCache">