Skip to content

Commit 289e34a

Browse files
authored
[Zen2] Add HandshakingTransportAddressConnector (#32643)
The `PeerFinder`, introduced in #32246, needs to be able to identify, and connect to, a remote master node using only its `TransportAddress`. This can be done by opening a single-channel connection to the address, performing a handshake, and only then forming a full-blown connection to the node. This change implements this logic.
1 parent 2176184 commit 289e34a

File tree

3 files changed

+300
-5
lines changed

3 files changed

+300
-5
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.discovery;
21+
22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.action.ActionListener;
24+
import org.elasticsearch.cluster.node.DiscoveryNode;
25+
import org.elasticsearch.common.component.AbstractComponent;
26+
import org.elasticsearch.common.settings.Setting;
27+
import org.elasticsearch.common.settings.Settings;
28+
import org.elasticsearch.common.transport.TransportAddress;
29+
import org.elasticsearch.common.unit.TimeValue;
30+
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
31+
import org.elasticsearch.core.internal.io.IOUtils;
32+
import org.elasticsearch.discovery.PeerFinder.TransportAddressConnector;
33+
import org.elasticsearch.transport.ConnectTransportException;
34+
import org.elasticsearch.transport.ConnectionProfile;
35+
import org.elasticsearch.transport.Transport.Connection;
36+
import org.elasticsearch.transport.TransportRequestOptions.Type;
37+
import org.elasticsearch.transport.TransportService;
38+
39+
import static java.util.Collections.emptyMap;
40+
import static java.util.Collections.emptySet;
41+
42+
public class HandshakingTransportAddressConnector extends AbstractComponent implements TransportAddressConnector {
43+
44+
// connection timeout for probes
45+
public static final Setting<TimeValue> PROBE_CONNECT_TIMEOUT_SETTING =
46+
Setting.timeSetting("discovery.probe.connect_timeout",
47+
TimeValue.timeValueMillis(3000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
48+
// handshake timeout for probes
49+
public static final Setting<TimeValue> PROBE_HANDSHAKE_TIMEOUT_SETTING =
50+
Setting.timeSetting("discovery.probe.handshake_timeout",
51+
TimeValue.timeValueMillis(1000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
52+
53+
private final TransportService transportService;
54+
private final TimeValue probeConnectTimeout;
55+
private final TimeValue probeHandshakeTimeout;
56+
57+
public HandshakingTransportAddressConnector(Settings settings, TransportService transportService) {
58+
super(settings);
59+
this.transportService = transportService;
60+
probeConnectTimeout = PROBE_CONNECT_TIMEOUT_SETTING.get(settings);
61+
probeHandshakeTimeout = PROBE_HANDSHAKE_TIMEOUT_SETTING.get(settings);
62+
}
63+
64+
@Override
65+
public void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener<DiscoveryNode> listener) {
66+
transportService.getThreadPool().generic().execute(new AbstractRunnable() {
67+
@Override
68+
protected void doRun() throws Exception {
69+
70+
// TODO if transportService is already connected to this address then skip the handshaking
71+
72+
final DiscoveryNode targetNode = new DiscoveryNode(transportAddress.toString(), transportAddress, emptyMap(),
73+
emptySet(), Version.CURRENT.minimumCompatibilityVersion());
74+
75+
logger.trace("[{}] opening probe connection", this);
76+
final Connection connection = transportService.openConnection(targetNode,
77+
ConnectionProfile.buildSingleChannelProfile(Type.REG, probeConnectTimeout, probeHandshakeTimeout));
78+
logger.trace("[{}] opened probe connection", this);
79+
80+
final DiscoveryNode remoteNode;
81+
try {
82+
remoteNode = transportService.handshake(connection, probeHandshakeTimeout.millis());
83+
// success means (amongst other things) that the cluster names match
84+
logger.trace("[{}] handshake successful: {}", this, remoteNode);
85+
} finally {
86+
IOUtils.closeWhileHandlingException(connection);
87+
}
88+
89+
if (remoteNode.equals(transportService.getLocalNode())) {
90+
// TODO cache this result for some time? forever?
91+
listener.onFailure(new ConnectTransportException(remoteNode, "local node found"));
92+
} else if (remoteNode.isMasterNode() == false) {
93+
// TODO cache this result for some time?
94+
listener.onFailure(new ConnectTransportException(remoteNode, "non-master-eligible node found"));
95+
} else {
96+
transportService.connectToNode(remoteNode);
97+
logger.trace("[{}] full connection successful: {}", this, remoteNode);
98+
listener.onResponse(remoteNode);
99+
}
100+
}
101+
102+
@Override
103+
public void onFailure(Exception e) {
104+
listener.onFailure(e);
105+
}
106+
107+
@Override
108+
public String toString() {
109+
return "connectToRemoteMasterNode[" + transportAddress + "]";
110+
}
111+
});
112+
}
113+
}
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.discovery;
21+
22+
import org.apache.lucene.util.SetOnce;
23+
import org.elasticsearch.Version;
24+
import org.elasticsearch.action.ActionListener;
25+
import org.elasticsearch.cluster.ClusterName;
26+
import org.elasticsearch.cluster.node.DiscoveryNode;
27+
import org.elasticsearch.common.settings.Settings;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.transport.CapturingTransport;
30+
import org.elasticsearch.threadpool.TestThreadPool;
31+
import org.elasticsearch.threadpool.ThreadPool;
32+
import org.elasticsearch.transport.TransportRequest;
33+
import org.elasticsearch.transport.TransportService;
34+
import org.elasticsearch.transport.TransportService.HandshakeResponse;
35+
import org.junit.After;
36+
import org.junit.Before;
37+
38+
import java.util.concurrent.CountDownLatch;
39+
import java.util.concurrent.TimeUnit;
40+
41+
import static java.util.Collections.emptyMap;
42+
import static java.util.Collections.emptySet;
43+
import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING;
44+
import static org.elasticsearch.discovery.HandshakingTransportAddressConnector.PROBE_HANDSHAKE_TIMEOUT_SETTING;
45+
import static org.elasticsearch.node.Node.NODE_NAME_SETTING;
46+
import static org.hamcrest.Matchers.equalTo;
47+
48+
public class HandshakingTransportAddressConnectorTests extends ESTestCase {
49+
50+
private DiscoveryNode remoteNode;
51+
private TransportService transportService;
52+
private ThreadPool threadPool;
53+
private String remoteClusterName;
54+
private HandshakingTransportAddressConnector handshakingTransportAddressConnector;
55+
private DiscoveryNode localNode;
56+
57+
private boolean dropHandshake;
58+
59+
@Before
60+
public void startServices() {
61+
localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
62+
final Settings settings = Settings.builder()
63+
.put(NODE_NAME_SETTING.getKey(), "node")
64+
.put(CLUSTER_NAME_SETTING.getKey(), "local-cluster")
65+
.build();
66+
threadPool = new TestThreadPool("node", settings);
67+
68+
remoteNode = null;
69+
remoteClusterName = null;
70+
dropHandshake = false;
71+
72+
final CapturingTransport capturingTransport = new CapturingTransport() {
73+
@Override
74+
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
75+
super.onSendRequest(requestId, action, request, node);
76+
assertThat(action, equalTo(TransportService.HANDSHAKE_ACTION_NAME));
77+
assertEquals(remoteNode.getAddress(), node.getAddress());
78+
assertNotEquals(remoteNode, node);
79+
if (dropHandshake == false) {
80+
handleResponse(requestId, new HandshakeResponse(remoteNode, new ClusterName(remoteClusterName), Version.CURRENT));
81+
}
82+
}
83+
};
84+
85+
transportService = new TransportService(settings, capturingTransport, threadPool,
86+
TransportService.NOOP_TRANSPORT_INTERCEPTOR, address -> localNode, null, emptySet());
87+
88+
transportService.start();
89+
transportService.acceptIncomingRequests();
90+
91+
handshakingTransportAddressConnector = new HandshakingTransportAddressConnector(settings, transportService);
92+
}
93+
94+
@After
95+
public void stopServices() throws InterruptedException {
96+
transportService.stop();
97+
terminate(threadPool);
98+
}
99+
100+
public void testConnectsToMasterNode() throws InterruptedException {
101+
final CountDownLatch completionLatch = new CountDownLatch(1);
102+
final SetOnce<DiscoveryNode> receivedNode = new SetOnce<>();
103+
104+
remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT);
105+
remoteClusterName = "local-cluster";
106+
107+
handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), new ActionListener<DiscoveryNode>() {
108+
@Override
109+
public void onResponse(DiscoveryNode discoveryNode) {
110+
receivedNode.set(discoveryNode);
111+
completionLatch.countDown();
112+
}
113+
114+
@Override
115+
public void onFailure(Exception e) {
116+
throw new AssertionError(e);
117+
}
118+
});
119+
120+
assertTrue(completionLatch.await(30, TimeUnit.SECONDS));
121+
assertEquals(remoteNode, receivedNode.get());
122+
}
123+
124+
public void testDoesNotConnectToNonMasterNode() throws InterruptedException {
125+
remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
126+
remoteClusterName = "local-cluster";
127+
128+
FailureListener failureListener = new FailureListener();
129+
handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener);
130+
failureListener.assertFailure();
131+
}
132+
133+
public void testDoesNotConnectToLocalNode() throws Exception {
134+
remoteNode = localNode;
135+
remoteClusterName = "local-cluster";
136+
137+
FailureListener failureListener = new FailureListener();
138+
handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener);
139+
failureListener.assertFailure();
140+
}
141+
142+
public void testDoesNotConnectToDifferentCluster() throws InterruptedException {
143+
remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT);
144+
remoteClusterName = "another-cluster";
145+
146+
FailureListener failureListener = new FailureListener();
147+
handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener);
148+
failureListener.assertFailure();
149+
}
150+
151+
public void testHandshakeTimesOut() throws InterruptedException {
152+
remoteNode = new DiscoveryNode("remote-node", buildNewFakeTransportAddress(), Version.CURRENT);
153+
remoteClusterName = "local-cluster";
154+
dropHandshake = true;
155+
156+
FailureListener failureListener = new FailureListener();
157+
handshakingTransportAddressConnector.connectToRemoteMasterNode(remoteNode.getAddress(), failureListener);
158+
Thread.sleep(PROBE_HANDSHAKE_TIMEOUT_SETTING.get(Settings.EMPTY).millis());
159+
failureListener.assertFailure();
160+
}
161+
162+
private class FailureListener implements ActionListener<DiscoveryNode> {
163+
final CountDownLatch completionLatch = new CountDownLatch(1);
164+
165+
@Override
166+
public void onResponse(DiscoveryNode discoveryNode) {
167+
fail(discoveryNode.toString());
168+
}
169+
170+
@Override
171+
public void onFailure(Exception e) {
172+
completionLatch.countDown();
173+
}
174+
175+
void assertFailure() throws InterruptedException {
176+
assertTrue(completionLatch.await(30, TimeUnit.SECONDS));
177+
}
178+
}
179+
}

test/framework/src/main/java/org/elasticsearch/test/transport/CapturingTransport.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,21 @@ public DiscoveryNode getNode() {
204204

205205
@Override
206206
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options)
207-
throws IOException, TransportException {
208-
requests.put(requestId, Tuple.tuple(node, action));
209-
capturedRequests.add(new CapturedRequest(node, requestId, action, request));
207+
throws TransportException {
208+
onSendRequest(requestId, action, request, node);
210209
}
211210

212211
@Override
213-
public void close() throws IOException {
214-
212+
public void close() {
215213
}
216214
};
217215
}
218216

217+
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
218+
requests.put(requestId, Tuple.tuple(node, action));
219+
capturedRequests.add(new CapturedRequest(node, requestId, action, request));
220+
}
221+
219222
@Override
220223
public TransportStats getStats() {
221224
throw new UnsupportedOperationException();

0 commit comments

Comments
 (0)