Skip to content

Commit a612dd1

Browse files
authored
Zen2: Add node id to log output of CoordinatorTests (#33929)
With recent changes to the logging framework, the node name can no longer be injected into the logging output using the node.name setting, which means that for the CoordinatorTests (which are simulating a cluster in a fully deterministic fashion using a single thread), as all the different nodes are running under the same test thread, we are not able to distinguish which log lines are coming from which node. This commit readds logging for node ids in the CoordinatorTests, making two very small changes to DeterministicTaskQueue and TestThreadInfoPatternConverter.
1 parent 187f787 commit a612dd1

File tree

9 files changed

+158
-145
lines changed

9 files changed

+158
-145
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -557,10 +557,16 @@ protected void sendApplyCommit(DiscoveryNode destination, ApplyCommitRequest app
557557
: "[" + currentPublication.get() + "] in progress, cannot start [" + publication + ']';
558558
currentPublication = Optional.of(publication);
559559

560-
transportService.getThreadPool().schedule(publishTimeout, Names.GENERIC, () -> {
561-
synchronized (mutex) {
560+
transportService.getThreadPool().schedule(publishTimeout, Names.GENERIC, new Runnable() {
561+
@Override
562+
public void run() {
562563
publication.onTimeout();
563564
}
565+
566+
@Override
567+
public String toString() {
568+
return "scheduled timeout for " + publication;
569+
}
564570
});
565571
publication.start(Collections.emptySet()); // TODO start failure detector and put faultyNodes here
566572
}
@@ -625,15 +631,23 @@ protected void onFoundPeersUpdated() {
625631
if (foundQuorum) {
626632
if (electionScheduler == null) {
627633
final TimeValue gracePeriod = TimeValue.ZERO; // TODO variable grace period
628-
electionScheduler = electionSchedulerFactory.startElectionScheduler(gracePeriod, () -> {
629-
synchronized (mutex) {
630-
if (mode == Mode.CANDIDATE) {
631-
if (prevotingRound != null) {
632-
prevotingRound.close();
634+
electionScheduler = electionSchedulerFactory.startElectionScheduler(gracePeriod, new Runnable() {
635+
@Override
636+
public void run() {
637+
synchronized (mutex) {
638+
if (mode == Mode.CANDIDATE) {
639+
if (prevotingRound != null) {
640+
prevotingRound.close();
641+
}
642+
prevotingRound = preVoteCollector.start(lastAcceptedState, getDiscoveredNodes());
633643
}
634-
prevotingRound = preVoteCollector.start(lastAcceptedState, getDiscoveredNodes());
635644
}
636645
}
646+
647+
@Override
648+
public String toString() {
649+
return "scheduling of new prevoting round";
650+
}
637651
});
638652
}
639653
} else {

server/src/main/java/org/elasticsearch/common/util/concurrent/ListenableFuture.java

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,22 @@ protected synchronized void done() {
8282

8383
private void notifyListener(ActionListener<V> listener, ExecutorService executorService) {
8484
try {
85-
executorService.submit(() -> {
86-
try {
87-
// call get in a non-blocking fashion as we could be on a network thread
88-
// or another thread like the scheduler, which we should never block!
89-
V value = FutureUtils.get(this, 0L, TimeUnit.NANOSECONDS);
90-
listener.onResponse(value);
91-
} catch (Exception e) {
92-
listener.onFailure(e);
85+
executorService.submit(new Runnable() {
86+
@Override
87+
public void run() {
88+
try {
89+
// call get in a non-blocking fashion as we could be on a network thread
90+
// or another thread like the scheduler, which we should never block!
91+
V value = FutureUtils.get(ListenableFuture.this, 0L, TimeUnit.NANOSECONDS);
92+
listener.onResponse(value);
93+
} catch (Exception e) {
94+
listener.onFailure(e);
95+
}
96+
}
97+
98+
@Override
99+
public String toString() {
100+
return "ListenableFuture notification";
93101
}
94102
});
95103
} catch (Exception e) {

server/src/main/java/org/elasticsearch/transport/TransportResponse.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,10 @@ public abstract class TransportResponse extends TransportMessage {
2323

2424
public static class Empty extends TransportResponse {
2525
public static final Empty INSTANCE = new Empty();
26+
27+
@Override
28+
public String toString() {
29+
return "Empty{}";
30+
}
2631
}
2732
}

server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java

Lines changed: 46 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.elasticsearch.cluster.coordination;
2020

21+
import org.apache.logging.log4j.CloseableThreadContext;
2122
import org.apache.logging.log4j.message.ParameterizedMessage;
2223
import org.elasticsearch.Version;
2324
import org.elasticsearch.cluster.ClusterState;
@@ -38,13 +39,8 @@
3839
import org.elasticsearch.discovery.zen.UnicastHostsProvider.HostsResolver;
3940
import org.elasticsearch.indices.cluster.FakeThreadPoolMasterService;
4041
import org.elasticsearch.test.ESTestCase;
42+
import org.elasticsearch.test.disruption.DisruptableMockTransport;
4143
import org.elasticsearch.test.junit.annotations.TestLogging;
42-
import org.elasticsearch.test.transport.MockTransport;
43-
import org.elasticsearch.transport.RequestHandlerRegistry;
44-
import org.elasticsearch.transport.TransportChannel;
45-
import org.elasticsearch.transport.TransportRequest;
46-
import org.elasticsearch.transport.TransportResponse;
47-
import org.elasticsearch.transport.TransportResponseOptions;
4844
import org.elasticsearch.transport.TransportService;
4945
import org.hamcrest.Matcher;
5046

@@ -55,7 +51,6 @@
5551
import java.util.List;
5652
import java.util.Optional;
5753
import java.util.Set;
58-
import java.util.function.Consumer;
5954
import java.util.function.Predicate;
6055
import java.util.stream.Collectors;
6156

@@ -184,15 +179,15 @@ class ClusterNode extends AbstractComponent {
184179
private final PersistedState persistedState;
185180
private MasterService masterService;
186181
private TransportService transportService;
187-
private MockTransport mockTransport;
182+
private DisruptableMockTransport mockTransport;
188183

189184
ClusterNode(int nodeIndex) {
190185
super(Settings.builder().put(NODE_NAME_SETTING.getKey(), nodeIdFromIndex(nodeIndex)).build());
191186
this.nodeIndex = nodeIndex;
192187
localNode = createDiscoveryNode();
193188
persistedState = new InMemoryPersistedState(1L,
194189
clusterState(1L, 1L, localNode, initialConfiguration, initialConfiguration, 0L));
195-
setUp();
190+
onNode(localNode, this::setUp).run();
196191
}
197192

198193
private DiscoveryNode createDiscoveryNode() {
@@ -206,112 +201,44 @@ private DiscoveryNode createDiscoveryNode() {
206201
}
207202

208203
private void setUp() {
209-
mockTransport = new MockTransport() {
204+
mockTransport = new DisruptableMockTransport(logger) {
210205
@Override
211-
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode destination) {
212-
assert destination.equals(localNode) == false : "non-local message from " + localNode + " to itself";
213-
super.onSendRequest(requestId, action, request, destination);
206+
protected DiscoveryNode getLocalNode() {
207+
return localNode;
208+
}
209+
210+
@Override
211+
protected ConnectionStatus getConnectionStatus(DiscoveryNode sender, DiscoveryNode destination) {
212+
return ConnectionStatus.CONNECTED;
213+
}
214214

215-
// connecting and handshaking with a new node happens synchronously, so we cannot enqueue these tasks for later
216-
final Consumer<Runnable> scheduler;
215+
@Override
216+
protected Optional<DisruptableMockTransport> getDisruptedCapturingTransport(DiscoveryNode node, String action) {
217217
final Predicate<ClusterNode> matchesDestination;
218218
if (action.equals(HANDSHAKE_ACTION_NAME)) {
219-
scheduler = Runnable::run;
220-
matchesDestination = n -> n.getLocalNode().getAddress().equals(destination.getAddress());
219+
matchesDestination = n -> n.getLocalNode().getAddress().equals(node.getAddress());
221220
} else {
222-
scheduler = deterministicTaskQueue::scheduleNow;
223-
matchesDestination = n -> n.getLocalNode().equals(destination);
221+
matchesDestination = n -> n.getLocalNode().equals(node);
224222
}
223+
return clusterNodes.stream().filter(matchesDestination).findAny().map(cn -> cn.mockTransport);
224+
}
225225

226-
scheduler.accept(new Runnable() {
227-
@Override
228-
public String toString() {
229-
return "delivery of [" + action + "][" + requestId + "]: " + request;
230-
}
231-
232-
@Override
233-
public void run() {
234-
clusterNodes.stream().filter(matchesDestination).findAny().ifPresent(
235-
destinationNode -> {
236-
237-
final RequestHandlerRegistry requestHandler
238-
= destinationNode.mockTransport.getRequestHandler(action);
239-
240-
final TransportChannel transportChannel = new TransportChannel() {
241-
@Override
242-
public String getProfileName() {
243-
return "default";
244-
}
245-
246-
@Override
247-
public String getChannelType() {
248-
return "coordinator-test-channel";
249-
}
250-
251-
@Override
252-
public void sendResponse(final TransportResponse response) {
253-
scheduler.accept(new Runnable() {
254-
@Override
255-
public String toString() {
256-
return "delivery of response " + response
257-
+ " to [" + action + "][" + requestId + "]: " + request;
258-
}
259-
260-
@Override
261-
public void run() {
262-
handleResponse(requestId, response);
263-
}
264-
});
265-
}
266-
267-
@Override
268-
public void sendResponse(TransportResponse response, TransportResponseOptions options) {
269-
sendResponse(response);
270-
}
271-
272-
@Override
273-
public void sendResponse(Exception exception) {
274-
scheduler.accept(new Runnable() {
275-
@Override
276-
public String toString() {
277-
return "delivery of error response " + exception.getMessage()
278-
+ " to [" + action + "][" + requestId + "]: " + request;
279-
}
280-
281-
@Override
282-
public void run() {
283-
handleRemoteError(requestId, exception);
284-
}
285-
});
286-
}
287-
};
288-
289-
try {
290-
processMessageReceived(request, requestHandler, transportChannel);
291-
} catch (Exception e) {
292-
scheduler.accept(new Runnable() {
293-
@Override
294-
public String toString() {
295-
return "delivery of processing error response " + e.getMessage()
296-
+ " to [" + action + "][" + requestId + "]: " + request;
297-
}
298-
299-
@Override
300-
public void run() {
301-
handleRemoteError(requestId, e);
302-
}
303-
});
304-
}
305-
}
306-
);
307-
}
308-
});
226+
@Override
227+
protected void handle(DiscoveryNode sender, DiscoveryNode destination, String action, Runnable doDelivery) {
228+
// handshake needs to run inline as the caller blockingly waits on the result
229+
if (action.equals(HANDSHAKE_ACTION_NAME)) {
230+
onNode(destination, doDelivery).run();
231+
} else {
232+
deterministicTaskQueue.scheduleNow(onNode(destination, doDelivery));
233+
}
309234
}
310235
};
311236

312-
masterService = new FakeThreadPoolMasterService("test", deterministicTaskQueue::scheduleNow);
237+
masterService = new FakeThreadPoolMasterService("test",
238+
runnable -> deterministicTaskQueue.scheduleNow(onNode(localNode, runnable)));
313239
transportService = mockTransport.createTransportService(
314-
settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, a -> localNode, null, emptySet());
240+
settings, deterministicTaskQueue.getThreadPool(runnable -> onNode(localNode, runnable)), NOOP_TRANSPORT_INTERCEPTOR,
241+
a -> localNode, null, emptySet());
315242
coordinator = new Coordinator(settings, transportService, ESAllocationTestCase.createAllocationService(Settings.EMPTY),
316243
masterService, this::getPersistedState, Cluster.this::provideUnicastHosts, Randomness.get());
317244
masterService.setClusterStatePublisher(coordinator);
@@ -359,9 +286,20 @@ private List<TransportAddress> provideUnicastHosts(HostsResolver ignored) {
359286
}
360287
}
361288

362-
@SuppressWarnings("unchecked")
363-
private static void processMessageReceived(TransportRequest request, RequestHandlerRegistry requestHandler,
364-
TransportChannel transportChannel) throws Exception {
365-
requestHandler.processMessageReceived(request, transportChannel);
289+
private static Runnable onNode(DiscoveryNode node, Runnable runnable) {
290+
final String nodeId = "{" + node.getId() + "}{" + node.getEphemeralId() + "}";
291+
return new Runnable() {
292+
@Override
293+
public void run() {
294+
try (CloseableThreadContext.Instance ignored = CloseableThreadContext.put("nodeId", nodeId)) {
295+
runnable.run();
296+
}
297+
}
298+
299+
@Override
300+
public String toString() {
301+
return nodeId + ": " + runnable.toString();
302+
}
303+
};
366304
}
367305
}

test/framework/src/main/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueue.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import java.util.concurrent.ScheduledFuture;
4141
import java.util.concurrent.TimeUnit;
4242
import java.util.concurrent.atomic.AtomicInteger;
43+
import java.util.function.Function;
4344

4445
public class DeterministicTaskQueue extends AbstractComponent {
4546

@@ -182,6 +183,13 @@ public void advanceTime() {
182183
* @return A <code>ExecutorService</code> that uses this task queue.
183184
*/
184185
public ExecutorService getExecutorService() {
186+
return getExecutorService(Function.identity());
187+
}
188+
189+
/**
190+
* @return A <code>ExecutorService</code> that uses this task queue and wraps <code>Runnable</code>s in the given wrapper.
191+
*/
192+
public ExecutorService getExecutorService(Function<Runnable, Runnable> runnableWrapper) {
185193
return new ExecutorService() {
186194

187195
@Override
@@ -246,7 +254,7 @@ public <T> T invokeAny(Collection<? extends Callable<T>> tasks, long timeout, Ti
246254

247255
@Override
248256
public void execute(Runnable command) {
249-
scheduleNow(command);
257+
scheduleNow(runnableWrapper.apply(command));
250258
}
251259
};
252260
}
@@ -255,6 +263,13 @@ public void execute(Runnable command) {
255263
* @return A <code>ThreadPool</code> that uses this task queue.
256264
*/
257265
public ThreadPool getThreadPool() {
266+
return getThreadPool(Function.identity());
267+
}
268+
269+
/**
270+
* @return A <code>ThreadPool</code> that uses this task queue and wraps <code>Runnable</code>s in the given wrapper.
271+
*/
272+
public ThreadPool getThreadPool(Function<Runnable, Runnable> runnableWrapper) {
258273
return new ThreadPool(settings) {
259274

260275
{
@@ -303,12 +318,12 @@ public ThreadPoolStats stats() {
303318

304319
@Override
305320
public ExecutorService generic() {
306-
return getExecutorService();
321+
return getExecutorService(runnableWrapper);
307322
}
308323

309324
@Override
310325
public ExecutorService executor(String name) {
311-
return getExecutorService();
326+
return getExecutorService(runnableWrapper);
312327
}
313328

314329
@Override
@@ -318,7 +333,7 @@ public ScheduledFuture<?> schedule(TimeValue delay, String executor, Runnable co
318333
final int CANCELLED = 2;
319334
final AtomicInteger taskState = new AtomicInteger(NOT_STARTED);
320335

321-
scheduleAt(currentTimeMillis + delay.millis(), new Runnable() {
336+
scheduleAt(currentTimeMillis + delay.millis(), runnableWrapper.apply(new Runnable() {
322337
@Override
323338
public void run() {
324339
if (taskState.compareAndSet(NOT_STARTED, STARTED)) {
@@ -330,7 +345,7 @@ public void run() {
330345
public String toString() {
331346
return command.toString();
332347
}
333-
});
348+
}));
334349

335350
return new ScheduledFuture<Object>() {
336351
@Override

0 commit comments

Comments
 (0)