diff --git a/pom.xml b/pom.xml
index 1b9a5013cb..d7044ca170 100644
--- a/pom.xml
+++ b/pom.xml
@@ -388,6 +388,12 @@
+
+ Mohamed Elsayed
+ https://github.com/MohammedElsayyed
+ The New Library of Alexandria
+ http://bibalex.org/
+
diff --git a/src/site/xdoc/release_notes.xml b/src/site/xdoc/release_notes.xml
index 7faad0a502..6dfeb315f6 100644
Binary files a/src/site/xdoc/release_notes.xml and b/src/site/xdoc/release_notes.xml differ
diff --git a/wayback-core/pom.xml b/wayback-core/pom.xml
index b2c9bca49c..a02a04fa8a 100644
--- a/wayback-core/pom.xml
+++ b/wayback-core/pom.xml
@@ -125,6 +125,12 @@
2.5.1
test
+
+ org.apache.httpcomponents
+ httpclient
+ 4.3.5
+ jar
+
diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java
new file mode 100644
index 0000000000..3e54782048
--- /dev/null
+++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/ArcRemoteLiveWebCache.java
@@ -0,0 +1,240 @@
+/*
+ * This file is part of the Wayback archival access software
+ * (http://archive-access.sourceforge.net/projects/wayback/).
+ *
+ * Licensed to the Internet Archive (IA) by one or more individual
+ * contributors.
+ *
+ * The IA licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.archive.wayback.liveweb;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+import java.net.URL;
+import java.util.logging.Logger;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.commons.httpclient.ConnectTimeoutException;
+import org.apache.commons.httpclient.HostConfiguration;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpMethod;
+import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
+import org.apache.commons.httpclient.NoHttpResponseException;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.archive.io.arc.ARCRecord;
+import org.archive.wayback.core.Resource;
+import org.archive.wayback.exception.LiveDocumentNotAvailableException;
+import org.archive.wayback.exception.LiveWebCacheUnavailableException;
+import org.archive.wayback.exception.LiveWebTimeoutException;
+import org.archive.wayback.exception.ResourceNotAvailableException;
+import org.archive.wayback.resourcestore.resourcefile.ArcResource;
+import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;
+
+/**
+ * This class fetches resource from live web.
+ * It works with {@link ARCRecordingProxy} not standard proxy servers.
+ *
+ * @author brad
+ * @see LiveWebCache
+ * @see StdRemoteLiveWebCache
+ *
+ */
+public class ArcRemoteLiveWebCache implements LiveWebCache {
+ private static final Logger LOGGER = Logger.getLogger(
+ ArcRemoteLiveWebCache.class.getName());
+
+ protected MultiThreadedHttpConnectionManager connectionManager = null;
+ protected HostConfiguration hostConfiguration = null;
+ protected HttpClient http = null;
+ protected String requestPrefix = null;
+
+ /**
+ *
+ */
+ public ArcRemoteLiveWebCache() {
+ connectionManager = new MultiThreadedHttpConnectionManager();
+ hostConfiguration = new HostConfiguration();
+ HttpClientParams params = new HttpClientParams();
+ params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler());
+ http = new HttpClient(params,connectionManager);
+ http.setHostConfiguration(hostConfiguration);
+ }
+
+ /* (non-Javadoc)
+ * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean)
+ */
+ public Resource getCachedResource(URL url, long maxCacheMS,
+ boolean bUseOlder) throws LiveDocumentNotAvailableException,
+ LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException {
+ String urlString = url.toExternalForm();
+
+ if (requestPrefix != null) {
+ urlString = requestPrefix + urlString;
+ }
+
+ HttpMethod method = null;
+ try {
+ method = new GetMethod(urlString);
+ } catch(IllegalArgumentException e) {
+ LOGGER.warning("Bad URL for live web fetch:" + urlString);
+ throw new LiveDocumentNotAvailableException("Url:" + urlString +
+ "does not look like an URL?");
+ }
+ boolean success = false;
+ try {
+ int status = http.executeMethod(method);
+ if(status == 200) {
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody());
+ ARCRecord r = new ARCRecord(
+ new GZIPInputStream(bais),
+ "id",0L,false,false,true);
+ ArcResource ar = (ArcResource)
+ ResourceFactory.ARCArchiveRecordToResource(r, null);
+ if(ar.getStatusCode() == 502) {
+ throw new LiveDocumentNotAvailableException(urlString);
+ } else if(ar.getStatusCode() == 504) {
+ throw new LiveWebTimeoutException("Timeout:" + urlString);
+ }
+ success = true;
+ return ar;
+
+ } else {
+ throw new LiveWebCacheUnavailableException(urlString);
+ }
+
+ } catch (ResourceNotAvailableException e) {
+ throw new LiveDocumentNotAvailableException(urlString);
+
+ } catch (NoHttpResponseException e) {
+
+ throw new LiveWebCacheUnavailableException("No Http Response for "
+ + urlString);
+
+ } catch (ConnectException e) {
+ throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
+ + " : " + urlString);
+ } catch (SocketException e) {
+ throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
+ + " : " + urlString);
+ } catch (SocketTimeoutException e) {
+ throw new LiveWebTimeoutException(e.getLocalizedMessage()
+ + " : " + urlString);
+ } catch(ConnectTimeoutException e) {
+ throw new LiveWebTimeoutException(e.getLocalizedMessage()
+ + " : " + urlString);
+ } finally {
+ if (!success) {
+ method.abort();
+ }
+ method.releaseConnection();
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.archive.wayback.liveweb.LiveWebCache#shutdown()
+ */
+ public void shutdown() {
+ // TODO Auto-generated method stub
+ }
+
+
+ /**
+ * @param hostPort to proxy requests through - ex. "localhost:3128"
+ */
+ public void setProxyHostPort(String hostPort) {
+ int colonIdx = hostPort.indexOf(':');
+ if(colonIdx > 0) {
+ String host = hostPort.substring(0,colonIdx);
+ int port = Integer.valueOf(hostPort.substring(colonIdx+1));
+
+// http.getHostConfiguration().setProxy(host, port);
+ hostConfiguration.setProxy(host, port);
+ }
+ }
+ /**
+ * @param maxTotalConnections the HttpConnectionManagerParams config
+ */
+ public void setMaxTotalConnections(int maxTotalConnections) {
+ connectionManager.getParams().
+ setMaxTotalConnections(maxTotalConnections);
+ }
+ /**
+ * @return the HttpConnectionManagerParams maxTotalConnections config
+ */
+ public int getMaxTotalConnections() {
+ return connectionManager.getParams().getMaxTotalConnections();
+ }
+
+ /**
+ * @param maxHostConnections the HttpConnectionManagerParams config
+ */
+ public void setMaxHostConnections(int maxHostConnections) {
+ connectionManager.getParams().
+ setMaxConnectionsPerHost(hostConfiguration, maxHostConnections);
+ }
+
+ /**
+ * @return the HttpConnectionManagerParams maxHostConnections config
+ */
+ public int getMaxHostConnections() {
+ return connectionManager.getParams().
+ getMaxConnectionsPerHost(hostConfiguration);
+ }
+
+ /**
+ * @return the connectionTimeoutMS
+ */
+ public int getConnectionTimeoutMS() {
+ return connectionManager.getParams().getConnectionTimeout();
+ }
+
+ /**
+ * @param connectionTimeoutMS the connectionTimeoutMS to set
+ */
+ public void setConnectionTimeoutMS(int connectionTimeoutMS) {
+ connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS);
+ }
+
+ /**
+ * @return the socketTimeoutMS
+ */
+ public int getSocketTimeoutMS() {
+ return connectionManager.getParams().getSoTimeout();
+ }
+
+ /**
+ * @param socketTimeoutMS the socketTimeoutMS to set
+ */
+ public void setSocketTimeoutMS(int socketTimeoutMS) {
+ connectionManager.getParams().setSoTimeout(socketTimeoutMS);
+ }
+
+ public String getRequestPrefix() {
+ return requestPrefix;
+ }
+
+ public void setRequestPrefix(String requestPrefix) {
+ this.requestPrefix = requestPrefix;
+ }
+
+ public HttpClient getHttpClient()
+ {
+ return http;
+ }
+}
diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java
index dd7d54a247..389823ea89 100644
--- a/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java
+++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveRobotsNoCache.java
@@ -16,7 +16,7 @@
import com.google.common.io.ByteStreams;
-public class LiveRobotsNoCache extends RemoteLiveWebCache {
+public class LiveRobotsNoCache extends ArcRemoteLiveWebCache {
protected int maxRobotsSize = 512000;
diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java
index 629572c697..c20171f10f 100644
--- a/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java
+++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java
@@ -1,235 +1,11 @@
-/*
- * This file is part of the Wayback archival access software
- * (http://archive-access.sourceforge.net/projects/wayback/).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.wayback.liveweb;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.net.ConnectException;
-import java.net.SocketException;
-import java.net.SocketTimeoutException;
-import java.net.URL;
-import java.util.logging.Logger;
-import java.util.zip.GZIPInputStream;
-
-import org.apache.commons.httpclient.ConnectTimeoutException;
-import org.apache.commons.httpclient.HostConfiguration;
-import org.apache.commons.httpclient.HttpClient;
-import org.apache.commons.httpclient.HttpMethod;
-import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
-import org.apache.commons.httpclient.NoHttpResponseException;
-import org.apache.commons.httpclient.methods.GetMethod;
-import org.apache.commons.httpclient.params.HttpClientParams;
-import org.archive.io.arc.ARCRecord;
-import org.archive.wayback.core.Resource;
-import org.archive.wayback.exception.LiveDocumentNotAvailableException;
-import org.archive.wayback.exception.LiveWebCacheUnavailableException;
-import org.archive.wayback.exception.LiveWebTimeoutException;
-import org.archive.wayback.exception.ResourceNotAvailableException;
-import org.archive.wayback.resourcestore.resourcefile.ArcResource;
-import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;
-
-/**
- * @author brad
- *
- */
-public class RemoteLiveWebCache implements LiveWebCache {
- private static final Logger LOGGER = Logger.getLogger(
- RemoteLiveWebCache.class.getName());
-
- protected MultiThreadedHttpConnectionManager connectionManager = null;
- protected HostConfiguration hostConfiguration = null;
- protected HttpClient http = null;
- protected String requestPrefix = null;
-
- /**
- *
- */
- public RemoteLiveWebCache() {
- connectionManager = new MultiThreadedHttpConnectionManager();
- hostConfiguration = new HostConfiguration();
- HttpClientParams params = new HttpClientParams();
- params.setParameter(HttpClientParams.RETRY_HANDLER, new NoRetryHandler());
- http = new HttpClient(params,connectionManager);
- http.setHostConfiguration(hostConfiguration);
- }
-
- /* (non-Javadoc)
- * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean)
- */
- public Resource getCachedResource(URL url, long maxCacheMS,
- boolean bUseOlder) throws LiveDocumentNotAvailableException,
- LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException {
- String urlString = url.toExternalForm();
-
- if (requestPrefix != null) {
- urlString = requestPrefix + urlString;
- }
-
- HttpMethod method = null;
- try {
- method = new GetMethod(urlString);
- } catch(IllegalArgumentException e) {
- LOGGER.warning("Bad URL for live web fetch:" + urlString);
- throw new LiveDocumentNotAvailableException("Url:" + urlString +
- "does not look like an URL?");
- }
- boolean success = false;
- try {
- int status = http.executeMethod(method);
- if(status == 200) {
-
- ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody());
- ARCRecord r = new ARCRecord(
- new GZIPInputStream(bais),
- "id",0L,false,false,true);
- ArcResource ar = (ArcResource)
- ResourceFactory.ARCArchiveRecordToResource(r, null);
- if(ar.getStatusCode() == 502) {
- throw new LiveDocumentNotAvailableException(urlString);
- } else if(ar.getStatusCode() == 504) {
- throw new LiveWebTimeoutException("Timeout:" + urlString);
- }
- success = true;
- return ar;
-
- } else {
- throw new LiveWebCacheUnavailableException(urlString);
- }
-
- } catch (ResourceNotAvailableException e) {
- throw new LiveDocumentNotAvailableException(urlString);
-
- } catch (NoHttpResponseException e) {
-
- throw new LiveWebCacheUnavailableException("No Http Response for "
- + urlString);
-
- } catch (ConnectException e) {
- throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
- + " : " + urlString);
- } catch (SocketException e) {
- throw new LiveWebCacheUnavailableException(e.getLocalizedMessage()
- + " : " + urlString);
- } catch (SocketTimeoutException e) {
- throw new LiveWebTimeoutException(e.getLocalizedMessage()
- + " : " + urlString);
- } catch(ConnectTimeoutException e) {
- throw new LiveWebTimeoutException(e.getLocalizedMessage()
- + " : " + urlString);
- } finally {
- if (!success) {
- method.abort();
- }
- method.releaseConnection();
- }
- }
-
- /* (non-Javadoc)
- * @see org.archive.wayback.liveweb.LiveWebCache#shutdown()
- */
- public void shutdown() {
- // TODO Auto-generated method stub
- }
-
-
- /**
- * @param hostPort to proxy requests through - ex. "localhost:3128"
- */
- public void setProxyHostPort(String hostPort) {
- int colonIdx = hostPort.indexOf(':');
- if(colonIdx > 0) {
- String host = hostPort.substring(0,colonIdx);
- int port = Integer.valueOf(hostPort.substring(colonIdx+1));
-
-// http.getHostConfiguration().setProxy(host, port);
- hostConfiguration.setProxy(host, port);
- }
- }
- /**
- * @param maxTotalConnections the HttpConnectionManagerParams config
- */
- public void setMaxTotalConnections(int maxTotalConnections) {
- connectionManager.getParams().
- setMaxTotalConnections(maxTotalConnections);
- }
- /**
- * @return the HttpConnectionManagerParams maxTotalConnections config
- */
- public int getMaxTotalConnections() {
- return connectionManager.getParams().getMaxTotalConnections();
- }
-
- /**
- * @param maxHostConnections the HttpConnectionManagerParams config
- */
- public void setMaxHostConnections(int maxHostConnections) {
- connectionManager.getParams().
- setMaxConnectionsPerHost(hostConfiguration, maxHostConnections);
- }
-
- /**
- * @return the HttpConnectionManagerParams maxHostConnections config
- */
- public int getMaxHostConnections() {
- return connectionManager.getParams().
- getMaxConnectionsPerHost(hostConfiguration);
- }
-
- /**
- * @return the connectionTimeoutMS
- */
- public int getConnectionTimeoutMS() {
- return connectionManager.getParams().getConnectionTimeout();
- }
-
- /**
- * @param connectionTimeoutMS the connectionTimeoutMS to set
- */
- public void setConnectionTimeoutMS(int connectionTimeoutMS) {
- connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS);
- }
-
- /**
- * @return the socketTimeoutMS
- */
- public int getSocketTimeoutMS() {
- return connectionManager.getParams().getSoTimeout();
- }
-
- /**
- * @param socketTimeoutMS the socketTimeoutMS to set
- */
- public void setSocketTimeoutMS(int socketTimeoutMS) {
- connectionManager.getParams().setSoTimeout(socketTimeoutMS);
- }
-
- public String getRequestPrefix() {
- return requestPrefix;
- }
-
- public void setRequestPrefix(String requestPrefix) {
- this.requestPrefix = requestPrefix;
- }
-
- public HttpClient getHttpClient()
- {
- return http;
- }
-}
+package org.archive.wayback.liveweb;
+
+/**
+ * This class is provided to maintain backwards compatibility.
+ *
+ * @deprecated As of release 2.2.0, replaced by {@link ArcRemoteLiveWebCache}. This class will be removed in 3.0.0
+ */
+@Deprecated
+public class RemoteLiveWebCache extends ArcRemoteLiveWebCache {
+
+}
diff --git a/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java b/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java
new file mode 100644
index 0000000000..645e7ebecb
--- /dev/null
+++ b/wayback-core/src/main/java/org/archive/wayback/liveweb/StdRemoteLiveWebCache.java
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2014 Bibliotheca Alexandrina.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.archive.wayback.liveweb;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+import java.net.URL;
+import java.util.logging.Logger;
+import org.apache.commons.httpclient.ConnectTimeoutException;
+import org.apache.commons.httpclient.HostConfiguration;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
+import org.apache.commons.httpclient.NoHttpResponseException;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpHost;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.archive.io.arc.ARCRecord;
+import org.archive.wayback.core.Resource;
+import org.archive.wayback.exception.LiveDocumentNotAvailableException;
+import org.archive.wayback.exception.LiveWebCacheUnavailableException;
+import org.archive.wayback.exception.LiveWebTimeoutException;
+import org.archive.wayback.exception.ResourceNotAvailableException;
+import org.archive.wayback.resourcestore.resourcefile.ArcResource;
+import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;
+
+/**
+ * This class fetches resource from live web.
+ * It works with standard proxy server e.g. Squid.
+ *
+ * @author Mohamed Elsayed
+ * @see LiveWebCache
+ * @see ArcRemoteLiveWebCache
+ */
+public class StdRemoteLiveWebCache implements LiveWebCache
+{
+ private static final Logger LOGGER = Logger.getLogger(
+ StdRemoteLiveWebCache.class.getName() );
+
+ protected MultiThreadedHttpConnectionManager connectionManager;
+ protected HostConfiguration hostConfiguration;
+ protected HttpClient httpClient;
+ protected String requestPrefix;
+ private CloseableHttpResponse response;
+ private ArcResource ar;
+
+ /**
+ * StdRemoteLiveWebCache constructor initializes and configures connection objects.
+ */
+ public StdRemoteLiveWebCache()
+ {
+ connectionManager = new MultiThreadedHttpConnectionManager();
+ hostConfiguration = new HostConfiguration();
+ HttpClientParams params = new HttpClientParams();
+ params.setParameter( HttpClientParams.RETRY_HANDLER,
+ new NoRetryHandler() );
+ httpClient = new HttpClient( params, connectionManager );
+ httpClient.setHostConfiguration( hostConfiguration );
+ }
+
+ /**
+ * Gets resource object from the live web. Configure timeout to 10 seconds.
+ *
+ * @param url to fetch from the live web.
+ * @param maxCacheMS maximum age of resource to return - optionally honored
+ * @param bUseOlder if true, return documents older than maxCacheMS if
+ * a more recent copy is not available.
+ *
+ * @return Resource for url
+ *
+ * @throws LiveDocumentNotAvailableException if the resource cannot be
+ * retrieved from the live web, but all proxying and caching
+ * mechanisms functioned properly
+ * @throws LiveWebCacheUnavailableException if there was a problem either
+ * accessing the live web, in proxying to the live web, or in
+ * maintaining the cache for the live web
+ * @throws LiveWebTimeoutException if there is no response from the live
+ * web cache before a timeout occurred.
+ * @throws IOException for the usual reasons
+ *
+ * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean)
+ * @inheritDoc org.archive.wayback.liveweb.LiveWebCache#getCachedResource
+ */
+ @Override
+ public Resource getCachedResource( URL url, long maxCacheMS,
+ boolean bUseOlder )
+ throws LiveDocumentNotAvailableException,
+ LiveWebCacheUnavailableException, LiveWebTimeoutException, IOException
+ {
+ String urlStr = url.toExternalForm();
+
+ if (requestPrefix != null)
+ urlStr = requestPrefix + urlStr;
+
+ HttpHost proxy = new HttpHost( hostConfiguration.getProxyHost(),
+ hostConfiguration.getProxyPort() );
+
+ // Set socketTimeout and connectionTimeout to 10 seconds.
+ RequestConfig reqConf = RequestConfig.custom().setProxy( proxy )
+ .setSocketTimeout( 10000 )
+ .setConnectTimeout( 10000 )
+ .setConnectionRequestTimeout( 10000 )
+ .build();
+ CloseableHttpClient httpclient = HttpClients.custom().
+ setDefaultRequestConfig(reqConf).build();
+ HttpGet httpGet = new HttpGet( urlStr );
+
+ try
+ {
+ // The following line gets robots.txt from live web
+ response= httpclient.execute( httpGet );
+
+ String httpHeaderStr = "";
+ String bodyStr = "";
+
+ /* If it fails to get robots.txt (http status code is 404),
+ then display contents and don't throw exception
+ (socketTimeOutException or connectTimeOutException)
+ */
+ if ( response.getStatusLine().getStatusCode() == 404 )
+ {
+ httpHeaderStr = "HTTP/1.0 200 OK\n";
+ bodyStr = String.format( "%s\n%s\n",
+ "User-agent: *", "Allow: /" );
+ }
+ else if ( response.getStatusLine().getStatusCode() == 200 )
+ {
+ // The following line represents first line in http header
+ httpHeaderStr = String.format( "%s %d %s\n",
+ response.getStatusLine().getProtocolVersion(),
+ response.getStatusLine().getStatusCode(),
+ response.getStatusLine().getReasonPhrase() );
+
+ // Get robots.txt contents and store it into bodyStr
+ HttpEntity entity = response.getEntity();
+ bodyStr = EntityUtils.toString(entity);
+ }
+
+ // Get Http Header and store complete http header in httpHeaderStr
+ for ( Header header : response.getAllHeaders() )
+ httpHeaderStr += header.toString() + "\n";
+
+ httpHeaderStr += "\n";
+ int length = httpHeaderStr.length() + bodyStr.length();
+
+ /*
+ Using httpHeaderStr and bodyStr to construct responseStr.
+ First line in responseStr should exist.
+ */
+
+ // TODO: the following line should be enhanced,
+ // especially the first line in responseStr.
+ String responseStr = String.format( "%s %s %d\n%s%s", urlStr,
+ "0.0.0.0 10000000000000 text/plain", length,
+ httpHeaderStr, bodyStr );
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(
+ responseStr.getBytes() );
+
+ // TODO: Should not use ARCRecord
+ ARCRecord r = new ARCRecord( bais, "id", 0L, false, false, true );
+ ar = ( ArcResource ) ResourceFactory.ARCArchiveRecordToResource( r, null );
+
+ if ( ar.getStatusCode() == 502 )
+ {
+ throw new LiveDocumentNotAvailableException( urlStr );
+ }
+ else if ( ar.getStatusCode() == 504 )
+ {
+ throw new LiveWebTimeoutException( "Timeout:" + urlStr );
+ }
+
+ return ar;
+ }
+ catch( ResourceNotAvailableException e )
+ {
+ throw new LiveDocumentNotAvailableException( urlStr );
+ }
+ catch( NoHttpResponseException e )
+ {
+ throw new LiveWebCacheUnavailableException( "No Http Response for " +
+ urlStr );
+ }
+ catch( ConnectException e )
+ {
+ throw new LiveWebCacheUnavailableException( e.getLocalizedMessage() +
+ " : " + urlStr );
+ }
+ catch ( SocketException e )
+ {
+ throw new LiveWebCacheUnavailableException( e.getLocalizedMessage() +
+ " : " + urlStr );
+ }
+ catch ( SocketTimeoutException e )
+ {
+ throw new LiveWebTimeoutException( e.getLocalizedMessage() + " : " +
+ urlStr );
+ }
+ catch( ConnectTimeoutException e )
+ {
+ throw new LiveWebTimeoutException( e.getLocalizedMessage() + " : " +
+ urlStr );
+ }
+ finally
+ {
+ response.close();
+ }
+ }
+
+ /**
+ * Sets proxy and port (proxy:port).
+ *
+ * @param hostPort to proxy requests through - ex. "localhost:3128"
+ */
+ public void setProxyHostPort( String hostPort )
+ {
+ int colonIdx = hostPort.indexOf( ':' );
+ if(colonIdx > 0)
+ {
+ String host = hostPort.substring( 0,colonIdx );
+ int port = Integer.valueOf( hostPort.substring( colonIdx+1 ) );
+ hostConfiguration.setProxy( host, port );
+ }
+ }
+
+ /**
+ *
+ * @see org.archive.wayback.liveweb.LiveWebCache#shutdown()
+ */
+ @Override
+ public void shutdown()
+ {
+ throw new UnsupportedOperationException( "Not supported yet." ); //To change body of generated methods, choose Tools | Templates.
+ }
+}
\ No newline at end of file
diff --git a/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml b/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml
index cec9b1a696..ea89f6c30b 100644
--- a/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml
+++ b/wayback-webapp/src/main/webapp/WEB-INF/LiveWeb.xml
@@ -30,12 +30,15 @@
+ class="org.archive.wayback.liveweb.ArcRemoteLiveWebCache">