Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2dcd409
Better behaviour in the presence of backing off
j-baker Aug 2, 2018
c583b33
Merge branch 'develop' into jbaker/better_429_behaviour
j-baker Aug 2, 2018
d51d3ac
more docs, easier to read
j-baker Aug 2, 2018
19a8173
Better concurrency limiters
j-baker Aug 3, 2018
d7164de
fixes
j-baker Aug 3, 2018
39388b7
simplify
j-baker Aug 3, 2018
f2927d0
Checkstyle
j-baker Aug 13, 2018
8f57de6
checkstyle
j-baker Aug 13, 2018
8f02b8b
PR comments
j-baker Sep 2, 2018
fbbcc41
Tweak the concurrency limiters lib
j-baker Sep 2, 2018
27a2153
reset flow control test
j-baker Sep 2, 2018
e548996
changes
j-baker Sep 2, 2018
a4c9e68
Changes
j-baker Sep 2, 2018
c68bc89
Passes the build
j-baker Sep 2, 2018
91dd6d2
update lockfiles
j-baker Sep 2, 2018
2229a81
Merge remote-tracking branch 'origin/develop' into jbaker/better_429_…
j-baker Sep 11, 2018
ee8e539
New attempt using interceptor
j-baker Sep 11, 2018
036d45b
more comments
j-baker Sep 11, 2018
1e18435
some bullshit
j-baker Sep 12, 2018
fbdeab1
Perfect
j-baker Sep 12, 2018
951dfdd
cleanup
j-baker Sep 12, 2018
ed18c48
Ready to go?
j-baker Sep 13, 2018
fda4d64
docs
j-baker Sep 13, 2018
8a088d7
Checkstyle
j-baker Sep 13, 2018
a9721e4
chekcstyle
j-baker Sep 13, 2018
1ce7b82
Metric
j-baker Sep 13, 2018
bc38b76
Javadoc
iamdanfox Sep 13, 2018
addbdca
README describes new flow control
iamdanfox Sep 13, 2018
c97925a
Move docs -> class level javadoc
iamdanfox Sep 13, 2018
baaa142
Rename ConcurrencyLimiters#limiter -> acquireLimiter
iamdanfox Sep 13, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public final class ClientConfigurations {
private static final Duration DEFAULT_FAILED_URL_COOLDOWN = Duration.ZERO;
private static final boolean DEFAULT_ENABLE_GCM_CIPHERS = false;
private static final NodeSelectionStrategy DEFAULT_NODE_SELECTION_STRATEGY = NodeSelectionStrategy.PIN_UNTIL_ERROR;
private static final int DEFAULT_MAX_NUM_RETRIES = 3;
private static final int DEFAULT_MAX_NUM_RETRIES = 4;

private ClientConfigurations() {}

Expand Down Expand Up @@ -87,7 +87,7 @@ public static ClientConfiguration of(
.enableGcmCipherSuites(DEFAULT_ENABLE_GCM_CIPHERS)
.proxy(ProxySelector.getDefault())
.proxyCredentials(Optional.empty())
.maxNumRetries(uris.size())
.maxNumRetries(DEFAULT_MAX_NUM_RETRIES)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uhm, I thought we had merged such a change already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, but we did it in only one of the two places (see above in this class)

.backoffSlotSize(DEFAULT_BACKOFF_SLOT_SIZE)
.nodeSelectionStrategy(DEFAULT_NODE_SELECTION_STRATEGY)
.failedUrlCooldown(DEFAULT_FAILED_URL_COOLDOWN)
Expand Down
14 changes: 14 additions & 0 deletions jaxrs-clients/versions.lock
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@
"com.palantir.tracing:tracing"
]
},
"com.netflix.concurrency-limits:concurrency-limits-core": {
"locked": "0.0.48",
"transitive": [
"com.palantir.remoting3:okhttp-clients"
]
},
"com.netflix.feign:feign-core": {
"locked": "8.17.0",
"transitive": [
Expand Down Expand Up @@ -273,6 +279,7 @@
"org.slf4j:slf4j-api": {
"locked": "1.7.12",
"transitive": [
"com.netflix.concurrency-limits:concurrency-limits-core",
"com.netflix.feign:feign-slf4j",
"com.palantir.remoting3:error-handling",
"com.palantir.remoting3:okhttp-clients",
Expand Down Expand Up @@ -383,6 +390,12 @@
"com.palantir.tracing:tracing"
]
},
"com.netflix.concurrency-limits:concurrency-limits-core": {
"locked": "0.0.48",
"transitive": [
"com.palantir.remoting3:okhttp-clients"
]
},
"com.netflix.feign:feign-core": {
"locked": "8.17.0",
"transitive": [
Expand Down Expand Up @@ -557,6 +570,7 @@
"org.slf4j:slf4j-api": {
"locked": "1.7.12",
"transitive": [
"com.netflix.concurrency-limits:concurrency-limits-core",
"com.netflix.feign:feign-slf4j",
"com.palantir.remoting3:error-handling",
"com.palantir.remoting3:okhttp-clients",
Expand Down
14 changes: 14 additions & 0 deletions jaxrs-scala-clients/versions.lock
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@
"com.palantir.tracing:tracing"
]
},
"com.netflix.concurrency-limits:concurrency-limits-core": {
"locked": "0.0.48",
"transitive": [
"com.palantir.remoting3:okhttp-clients"
]
},
"com.netflix.feign:feign-core": {
"locked": "8.17.0",
"transitive": [
Expand Down Expand Up @@ -334,6 +340,7 @@
"org.slf4j:slf4j-api": {
"locked": "1.7.12",
"transitive": [
"com.netflix.concurrency-limits:concurrency-limits-core",
"com.netflix.feign:feign-slf4j",
"com.palantir.remoting3:error-handling",
"com.palantir.remoting3:jaxrs-clients",
Expand Down Expand Up @@ -460,6 +467,12 @@
"com.palantir.tracing:tracing"
]
},
"com.netflix.concurrency-limits:concurrency-limits-core": {
"locked": "0.0.48",
"transitive": [
"com.palantir.remoting3:okhttp-clients"
]
},
"com.netflix.feign:feign-core": {
"locked": "8.17.0",
"transitive": [
Expand Down Expand Up @@ -680,6 +693,7 @@
"org.slf4j:slf4j-api": {
"locked": "1.7.12",
"transitive": [
"com.netflix.concurrency-limits:concurrency-limits-core",
"com.netflix.feign:feign-slf4j",
"com.palantir.remoting3:error-handling",
"com.palantir.remoting3:jaxrs-clients",
Expand Down
1 change: 1 addition & 0 deletions okhttp-clients/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dependencies {
compile project(':http-clients')
compile project(':tracing-okhttp3')
compile 'com.google.guava:guava'
compile 'com.netflix.concurrency-limits:concurrency-limits-core'
compile 'com.palantir.safe-logging:preconditions'
compile 'com.palantir.tritium:tritium-registry'
compile 'com.squareup.okhttp3:logging-interceptor'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* (c) Copyright 2018 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.remoting3.okhttp;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@iamdanfox what's the deal with remoting-vs-conjure in PRs?


import com.google.common.annotations.VisibleForTesting;
import com.netflix.concurrency.limits.Limiter;
import com.netflix.concurrency.limits.limiter.BlockingLimiter;
import com.palantir.remoting3.tracing.okhttp3.OkhttpTraceInterceptor;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import okhttp3.Request;

/**
* Flow control in Conjure is a collaborative effort between servers and clients. Servers advertise an overloaded state
* via 429/503 responses, and clients throttle the number of requests that they send concurrently as a response to this.
* The latter is implemented as a combination of two techniques, yielding a mechanism similar to flow control in TCP/IP.
* <ol>
* <li>
* Clients use the frequency of 429/503 responses (as well as the request latency) to determine an estimate
* for the number of permissible concurrent requests
* </li>
* <li>
* Each such request gets scheduled according to an exponential backoff algorithm.
* </li>
* </ol>
* <p>
* This class provides an asynchronous implementation of Netflix's
* <a href="https://github.com/Netflix/concurrency-limits/">concurrency-limits</a> library for determining the
* above mentioned concurrency estimates.
* <p>
* In order to use this class, one should acquire a Limiter for their request, which returns a future. once the Future
* is completed, the caller can assume that the request is schedulable. After the request completes, the caller
* <b>must</b> call one of the methods on {@link Limiter.Listener} in order to provide feedback about the request's
* success. If this is not done, a deadlock could result.
*/
final class ConcurrencyLimiters {
private static final Void NO_CONTEXT = null;
private static final String FALLBACK = "";

private final ConcurrentMap<String, Limiter<Void>> limiters = new ConcurrentHashMap<>();

@VisibleForTesting
Limiter.Listener limiter(String name) {
return limiters.computeIfAbsent(name, key ->
new IdempotentLimiter(new BlockingLimiter<>(RemotingConcurrencyLimiter.createDefault())))
.acquire(NO_CONTEXT).get();
}

Limiter.Listener limiter(Request request) {
return limiter(limiterKey(request));
}

private static String limiterKey(Request request) {
String pathTemplate = request.header(OkhttpTraceInterceptor.PATH_TEMPLATE_HEADER);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a bit dodgy

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is still dodgy

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really see a way of avoiding this. It seems reasonable to do this by endpoint, and if you do that you end up with this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could also see this being something that uses a dynamic proxy which makes it much easier to limit per method or per some annotation. think the only sad thing about this is relying on the tracing header which is only every passed around internally (never sent across the wire)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. But then I'd rename the code bits so that they're no longer "trace"-specific. Probably also need to stop deleting the header in the trace-specific code path

if (pathTemplate == null) {
return FALLBACK;
} else {
return request.method() + " " + pathTemplate;
}
}

private static final class IdempotentLimiter implements Limiter<Void> {
private final Limiter<Void> delegate;

private IdempotentLimiter(Limiter<Void> delegate) {
this.delegate = delegate;
}

@Override
public Optional<Listener> acquire(Void context) {
return delegate.acquire(context).map(IdempotentListener::new);
}
}

private static final class IdempotentListener implements Limiter.Listener {
private final Limiter.Listener delegate;
private boolean consumed = false;

private IdempotentListener(Limiter.Listener delegate) {
this.delegate = delegate;
}

@Override
public void onSuccess() {
if (!consumed) {
delegate.onSuccess();
}
consumed = true;
}

@Override
public void onIgnore() {
if (!consumed) {
delegate.onIgnore();
}
consumed = true;
}

@Override
public void onDropped() {
if (!consumed) {
delegate.onDropped();
}
consumed = true;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package com.palantir.remoting3.okhttp;

import static com.google.common.base.Preconditions.checkState;

import com.netflix.concurrency.limits.Limiter;
import java.io.IOException;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.Interceptor;
import okhttp3.Request;
import okhttp3.Response;
import okhttp3.ResponseBody;
import okio.AsyncTimeout;
import okio.BufferedSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* WIP docs for benefit of reviewer.
*
* An interceptor for limiting the concurrency of requests to an endpoint.
*
* Requests must be tagged (before reaching this point) with a ConcurrencyLimitTag. At this point, we block on
* receiving a permit to run the request, and store the listener in the tag.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to just use the QosHandler directly in here rather than passing around this tag and requiring it to be set

*
* When we see evidence of being dropped, we write this into the tag, and when the request retries again the permit
* will be returned to the pool before acquiring a new one.
*
* Users must also wrap the final callback they use; this is used in two ways; first it clears the state in case of
* failure, secondly on success it will wait until the response is closed before handing back permits. In other words,
* if you have a server with a concurrency limit (e.g. it is CPU bound), clients should respect the server's
* concurrency limit.
*
* This has a timeout of 1 minute (before an error is logged) in order to try to catch people who have leaked responses
* (which here will deadlock otherwise). It indicates an application bug every time, but might affect users poorly.
* I'm happy to remove it, but think there should probably be another solution?
*/
final class ConcurrencyLimitingInterceptor implements Interceptor {
private static final Logger log = LoggerFactory.getLogger(ConcurrencyLimitingInterceptor.class);
private final ConcurrencyLimiters limiters = new ConcurrencyLimiters();

@Override
public Response intercept(Chain chain) throws IOException {
ConcurrencyLimitTag tagState = chain.request().tag(ConcurrencyLimitTag.class);
tagState.invalidate();
tagState.setListener(limiters.limiter(chain.request()));
return chain.proceed(chain.request());
}

public static Callback wrapCallback(Callback callback) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't follow why this is necessary?

return new Callback() {
@Override
public void onFailure(Call call, IOException e) {
Optional.ofNullable(call.request().tag(ConcurrencyLimitTag.class)).ifPresent(ConcurrencyLimitTag::invalidate);
callback.onFailure(call, e);
}

@Override
public void onResponse(Call call, Response response) throws IOException {
Response newResponse =
Optional.ofNullable(call.request().tag(ConcurrencyLimitTag.class))
.map(t -> wrapResponse(t, response))
.orElse(response);
callback.onResponse(call, newResponse);
}
};
}

public static Request wrapRequest(Request request) {
return request.newBuilder().tag(ConcurrencyLimitTag.class, new ConcurrencyLimitTag()).build();
}

private static Response wrapResponse(ConcurrencyLimitTag tag, Response response) {
if (response.body() == null) {
return response;
}
ResponseBody currentBody = response.body();
ResourceDeallocator deallocator = new ResourceDeallocator(tag);
ResponseBody newResponseBody =
ResponseBody.create(currentBody.contentType(), currentBody.contentLength(),
new ReleaseConcurrencyLimitBufferedSource(currentBody.source(), tag, deallocator));
deallocator.timeout(1, TimeUnit.MINUTES);
deallocator.enter();
return response.newBuilder()
.body(newResponseBody)
.build();
}

static final class ConcurrencyLimitTag {
private Limiter.Listener listener;
private boolean wasDropped = false;

private void invalidate() {
if (listener == null) {
return;
}

if (wasDropped) {
listener.onDropped();
} else {
listener.onIgnore();
}
listener = null;
wasDropped = false;
}

private void setListener(Limiter.Listener listener) {
checkState(listener == null);
this.listener = listener;
}

public void success() {
listener.onSuccess();
}

public void wasDropped() {
wasDropped = true;
}
}

private static final class ResourceDeallocator extends AsyncTimeout {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't think it's worth worrying about this case. if clients aren't releasing resources properly they're going to lock things up eventually anyway. At a minimum it should be a separate change from the concurrency limiting

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed more in person: going to timeout on acquiring a limit vs releasing a limit which has the added benefit of always allowing requests through after some period of time

private final ConcurrencyLimitTag tag;

private ResourceDeallocator(ConcurrencyLimitTag tag) {
this.tag = tag;
}

@Override
public void timedOut() {
log.warn("A call appears to have been leaked. We think this is an application bug caused by not properly "
+ "cleaning up the response object. Make sure you close() it!");
tag.invalidate();
}
}

private static final class ReleaseConcurrencyLimitBufferedSource extends ForwardingBufferedSource {
private final BufferedSource delegate;
private final ConcurrencyLimitTag tag;
private final ResourceDeallocator deallocator;

private ReleaseConcurrencyLimitBufferedSource(BufferedSource delegate,
ConcurrencyLimitTag tag,
ResourceDeallocator deallocator) {
super(delegate);
this.delegate = delegate;
this.tag = tag;
this.deallocator = deallocator;
}

@Override
public void close() throws IOException {
if (deallocator.exit()) {
log.info("The timeout fired but we have now closed the source. This implies a very long lived "
+ "call being used properly, which the Conjure devs do not expect.");
}
tag.success();
delegate.close();
}
}

}
Loading