Skip to content
This repository was archived by the owner on Apr 30, 2025. It is now read-only.

Commit a7dbf84

Browse files
domdom82geofffranks
authored andcommitted
feat: classify incomplete requests as retriable
feat(proxy_round_tripper): Use atomics instead of mutexes for traces
1 parent 8ca9d2f commit a7dbf84

File tree

12 files changed

+345
-132
lines changed

12 files changed

+345
-132
lines changed

proxy/fails/basic_classifiers.go

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,88 @@
11
package fails
22

33
import (
4+
"context"
45
"crypto/tls"
56
"crypto/x509"
67
"errors"
78
"net"
8-
9-
"context"
9+
"strings"
1010
)
1111

1212
var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)")
1313

14+
var IncompleteRequestError = errors.New("incomplete request")
15+
1416
var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool {
15-
switch err.(type) {
16-
case tls.RecordHeaderError, *tls.RecordHeaderError:
17-
return true
18-
default:
19-
return false
20-
}
17+
return errors.As(err, &tls.RecordHeaderError{})
2118
})
2219

2320
var Dial = ClassifierFunc(func(err error) bool {
24-
ne, ok := err.(*net.OpError)
25-
return ok && ne.Op == "dial"
21+
var opErr *net.OpError
22+
if errors.As(err, &opErr) {
23+
return opErr.Op == "dial"
24+
}
25+
return false
2626
})
2727

2828
var ContextCancelled = ClassifierFunc(func(err error) bool {
29-
return err == context.Canceled
29+
return errors.Is(err, context.Canceled)
3030
})
3131

3232
var ConnectionResetOnRead = ClassifierFunc(func(err error) bool {
33-
ne, ok := err.(*net.OpError)
34-
return ok && ne.Op == "read" && ne.Err.Error() == "read: connection reset by peer"
33+
var opErr *net.OpError
34+
if errors.As(err, &opErr) {
35+
return opErr.Err.Error() == "read: connection reset by peer"
36+
}
37+
return false
3538
})
3639

3740
var RemoteFailedCertCheck = ClassifierFunc(func(err error) bool {
38-
return err != nil && (err.Error() == "readLoopPeekFailLocked: remote error: tls: bad certificate" || err.Error() == "remote error: tls: bad certificate")
41+
var opErr *net.OpError
42+
if errors.As(err, &opErr) {
43+
return opErr.Op == "remote error" && opErr.Err.Error() == "tls: bad certificate"
44+
}
45+
return false
3946
})
4047

4148
var RemoteHandshakeTimeout = ClassifierFunc(func(err error) bool {
42-
return err != nil && err.Error() == "net/http: TLS handshake timeout"
49+
return err != nil && strings.Contains(err.Error(), "net/http: TLS handshake timeout")
4350
})
4451

4552
var ExpiredOrNotYetValidCertFailure = ClassifierFunc(func(err error) bool {
46-
switch x509err := err.(type) {
47-
case x509.CertificateInvalidError:
48-
return x509err.Reason == x509.Expired
49-
case *x509.CertificateInvalidError:
50-
return x509err.Reason == x509.Expired
51-
default:
52-
return false
53+
var certErr x509.CertificateInvalidError
54+
if errors.As(err, &certErr) {
55+
return certErr.Reason == x509.Expired
5356
}
57+
return false
5458
})
5559

5660
var RemoteHandshakeFailure = ClassifierFunc(func(err error) bool {
57-
return err != nil && err.Error() == "remote error: tls: handshake failure"
61+
var opErr *net.OpError
62+
if errors.As(err, &opErr) {
63+
return opErr != nil && opErr.Error() == "remote error: tls: handshake failure"
64+
}
65+
return false
5866
})
5967

6068
var HostnameMismatch = ClassifierFunc(func(err error) bool {
61-
switch err.(type) {
62-
case x509.HostnameError, *x509.HostnameError:
63-
return true
64-
default:
65-
return false
66-
}
69+
return errors.As(err, &x509.HostnameError{})
6770
})
6871

6972
var UntrustedCert = ClassifierFunc(func(err error) bool {
70-
switch err.(type) {
71-
case x509.UnknownAuthorityError, *x509.UnknownAuthorityError:
73+
var tlsCertError *tls.CertificateVerificationError
74+
switch {
75+
case errors.As(err, &x509.UnknownAuthorityError{}), errors.As(err, &tlsCertError):
7276
return true
7377
default:
7478
return false
7579
}
7680
})
7781

7882
var IdempotentRequestEOF = ClassifierFunc(func(err error) bool {
79-
return err == IdempotentRequestEOFError
83+
return errors.Is(err, IdempotentRequestEOFError)
84+
})
85+
86+
var IncompleteRequest = ClassifierFunc(func(err error) bool {
87+
return errors.Is(err, IncompleteRequestError)
8088
})

proxy/fails/basic_classifiers_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,8 @@ var _ = Describe("ErrorClassifiers - enemy tests", func() {
234234
Describe("ExpiredOrNotYetValidCertFailure", func() {
235235
Context("when the cert is expired or not yet valid", func() {
236236
var (
237-
expiredClientCert *x509.Certificate
237+
expiredClientCert *x509.Certificate
238+
expiredClientCACert *x509.CertPool
238239
)
239240

240241
BeforeEach(func() {
@@ -243,10 +244,12 @@ var _ = Describe("ErrorClassifiers - enemy tests", func() {
243244
var err error
244245
expiredClientCert, err = x509.ParseCertificate(block.Bytes)
245246
Expect(err).NotTo(HaveOccurred())
247+
expiredClientCACert = x509.NewCertPool()
248+
expiredClientCACert.AddCert(expiredClientCertPool.CACert)
246249
})
247250

248251
It("matches", func() {
249-
_, err := expiredClientCert.Verify(x509.VerifyOptions{})
252+
_, err := expiredClientCert.Verify(x509.VerifyOptions{Roots: expiredClientCACert})
250253
Expect(fails.ExpiredOrNotYetValidCertFailure(err)).To(BeTrue())
251254
})
252255
})

proxy/fails/classifier_group.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ var RetriableClassifiers = ClassifierGroup{
2020
UntrustedCert,
2121
ExpiredOrNotYetValidCertFailure,
2222
IdempotentRequestEOF,
23+
IncompleteRequest,
2324
}
2425

2526
var FailableClassifiers = ClassifierGroup{

proxy/fails/classifier_group_test.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
package fails_test
22

33
import (
4-
"errors"
5-
4+
"crypto/tls"
65
"crypto/x509"
6+
"errors"
7+
"fmt"
78
"net"
89

9-
"crypto/tls"
10-
11-
"code.cloudfoundry.org/gorouter/proxy/fails"
1210
. "github.com/onsi/ginkgo"
1311
. "github.com/onsi/gomega"
12+
13+
"code.cloudfoundry.org/gorouter/proxy/fails"
1414
)
1515

1616
var _ = Describe("ClassifierGroup", func() {
@@ -34,15 +34,25 @@ var _ = Describe("ClassifierGroup", func() {
3434
rc := fails.RetriableClassifiers
3535

3636
Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue())
37+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue())
3738
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue())
39+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue())
3840
Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue())
41+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue())
3942
Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue())
43+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue())
4044
Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue())
45+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue())
4146
Expect(rc.Classify(x509.HostnameError{})).To(BeTrue())
47+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
4248
Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue())
49+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue())
4350
Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
51+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue())
4452
Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse())
4553
Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
54+
Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue())
55+
Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue())
4656
})
4757
})
4858

@@ -61,6 +71,7 @@ var _ = Describe("ClassifierGroup", func() {
6171
Expect(pc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue())
6272
Expect(pc.Classify(errors.New("i'm a potato"))).To(BeFalse())
6373
Expect(pc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue())
74+
Expect(pc.Classify(fails.IncompleteRequestError)).To(BeTrue())
6475
})
6576
})
6677
})

proxy/proxy.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ import (
1111

1212
"code.cloudfoundry.org/gorouter/common/health"
1313

14+
"github.com/cloudfoundry/dropsonde"
15+
"github.com/uber-go/zap"
16+
"github.com/urfave/negroni"
17+
1418
"code.cloudfoundry.org/gorouter/accesslog"
1519
router_http "code.cloudfoundry.org/gorouter/common/http"
1620
"code.cloudfoundry.org/gorouter/config"
@@ -25,9 +29,6 @@ import (
2529
"code.cloudfoundry.org/gorouter/registry"
2630
"code.cloudfoundry.org/gorouter/route"
2731
"code.cloudfoundry.org/gorouter/routeservice"
28-
"github.com/cloudfoundry/dropsonde"
29-
"github.com/uber-go/zap"
30-
"github.com/urfave/negroni"
3132
)
3233

3334
var (
@@ -110,7 +111,7 @@ func NewProxy(
110111

111112
roundTripperFactory := &round_tripper.FactoryImpl{
112113
BackendTemplate: &http.Transport{
113-
Dial: dialer.Dial,
114+
DialContext: dialer.DialContext,
114115
DisableKeepAlives: cfg.DisableKeepAlives,
115116
MaxIdleConns: cfg.MaxIdleConns,
116117
IdleConnTimeout: 90 * time.Second, // setting the value to golang default transport
@@ -120,7 +121,7 @@ func NewProxy(
120121
TLSHandshakeTimeout: cfg.TLSHandshakeTimeout,
121122
},
122123
RouteServiceTemplate: &http.Transport{
123-
Dial: dialer.Dial,
124+
DialContext: dialer.DialContext,
124125
DisableKeepAlives: cfg.DisableKeepAlives,
125126
MaxIdleConns: cfg.MaxIdleConns,
126127
IdleConnTimeout: 90 * time.Second, // setting the value to golang default transport

proxy/round_tripper/dropsonde_round_tripper.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ package round_tripper
33
import (
44
"net/http"
55

6-
"code.cloudfoundry.org/gorouter/proxy/utils"
76
"github.com/cloudfoundry/dropsonde"
7+
8+
"code.cloudfoundry.org/gorouter/proxy/utils"
89
)
910

1011
func NewDropsondeRoundTripper(p ProxyRoundTripper) ProxyRoundTripper {
@@ -44,7 +45,7 @@ func (t *FactoryImpl) New(expectedServerName string, isRouteService bool, isHttp
4445
customTLSConfig := utils.TLSConfigWithServerName(expectedServerName, template.TLSClientConfig, isRouteService)
4546

4647
newTransport := &http.Transport{
47-
Dial: template.Dial,
48+
DialContext: template.DialContext,
4849
DisableKeepAlives: template.DisableKeepAlives,
4950
MaxIdleConns: template.MaxIdleConns,
5051
IdleConnTimeout: template.IdleConnTimeout,

proxy/round_tripper/error_handler_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"crypto/tls"
66
"crypto/x509"
77
"errors"
8+
"fmt"
89
"net"
910
"net/http/httptest"
1011

@@ -136,6 +137,22 @@ var _ = Describe("HandleError", func() {
136137
})
137138
})
138139

140+
Context("HostnameMismatch wrapped in IncompleteRequestError", func() {
141+
BeforeEach(func() {
142+
wrappedErr := x509.HostnameError{Host: "the wrong one"}
143+
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
144+
errorHandler.HandleError(responseWriter, err)
145+
})
146+
147+
It("has a 503 Status Code", func() {
148+
Expect(responseWriter.Status()).To(Equal(503))
149+
})
150+
151+
It("emits a backend_invalid_id metric", func() {
152+
Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1))
153+
})
154+
})
155+
139156
Context("Untrusted Cert", func() {
140157
BeforeEach(func() {
141158
err = x509.UnknownAuthorityError{}
@@ -151,6 +168,22 @@ var _ = Describe("HandleError", func() {
151168
})
152169
})
153170

171+
Context("Untrusted Cert wrapped in IncompleteRequestError", func() {
172+
BeforeEach(func() {
173+
wrappedErr := x509.UnknownAuthorityError{}
174+
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
175+
errorHandler.HandleError(responseWriter, err)
176+
})
177+
178+
It("has a 526 Status Code", func() {
179+
Expect(responseWriter.Status()).To(Equal(526))
180+
})
181+
182+
It("emits a backend_invalid_tls_cert metric", func() {
183+
Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1))
184+
})
185+
})
186+
154187
Context("Attempted TLS with non-TLS backend error", func() {
155188
BeforeEach(func() {
156189
err = tls.RecordHeaderError{Msg: "bad handshake"}
@@ -166,6 +199,22 @@ var _ = Describe("HandleError", func() {
166199
})
167200
})
168201

202+
Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() {
203+
BeforeEach(func() {
204+
wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"}
205+
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
206+
errorHandler.HandleError(responseWriter, err)
207+
})
208+
209+
It("has a 525 Status Code", func() {
210+
Expect(responseWriter.Status()).To(Equal(525))
211+
})
212+
213+
It("emits a backend_tls_handshake_failed metric", func() {
214+
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
215+
})
216+
})
217+
169218
Context("Remote handshake failure", func() {
170219
BeforeEach(func() {
171220
err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
@@ -181,6 +230,22 @@ var _ = Describe("HandleError", func() {
181230
})
182231
})
183232

233+
Context("Remote handshake failure wrapped in IncompleteRequestError", func() {
234+
BeforeEach(func() {
235+
wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}
236+
err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr)
237+
errorHandler.HandleError(responseWriter, err)
238+
})
239+
240+
It("has a 525 Status Code", func() {
241+
Expect(responseWriter.Status()).To(Equal(525))
242+
})
243+
244+
It("emits a backend_tls_handshake_failed metric", func() {
245+
Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1))
246+
})
247+
})
248+
184249
Context("Context Cancelled Error", func() {
185250
BeforeEach(func() {
186251
err = context.Canceled

0 commit comments

Comments
 (0)