@@ -2,19 +2,23 @@ package cluster
2
2
3
3
import (
4
4
"context"
5
+ "errors"
5
6
"flag"
6
7
"fmt"
7
8
"io"
8
9
"log"
10
+ "math"
9
11
"math/rand"
10
12
"net"
11
13
"net/http"
12
14
"os"
13
15
"path/filepath"
16
+ "strings"
14
17
"sync"
15
18
"time"
16
19
17
20
"github.com/prometheus/client_golang/prometheus"
21
+ pm "github.com/prometheus/client_model/go"
18
22
"golang.org/x/sync/errgroup"
19
23
20
24
"github.com/grafana/pyroscope/api/gen/proto/go/push/v1/pushv1connect"
@@ -116,6 +120,19 @@ func nodeNameFlags(nodeName string) []string {
116
120
}
117
121
}
118
122
123
+ func listenAddrFlags (listenAddr string ) []string {
124
+ return []string {
125
+ "-compactor.ring.instance-addr=" + listenAddr ,
126
+ "-distributor.ring.instance-addr=" + listenAddr ,
127
+ "-ingester.lifecycler.addr=" + listenAddr ,
128
+ "-memberlist.advertise-addr=" + listenAddr ,
129
+ "-overrides-exporter.ring.instance-addr=" + listenAddr ,
130
+ "-query-frontend.instance-addr=" + listenAddr ,
131
+ "-query-scheduler.ring.instance-addr=" + listenAddr ,
132
+ "-store-gateway.sharding-ring.instance-addr=" + listenAddr ,
133
+ }
134
+ }
135
+
119
136
func (c * Cluster ) pickHealthyComponent (targets ... string ) (addr string , err error ) {
120
137
results := make ([][]string , len (targets ))
121
138
@@ -150,7 +167,7 @@ func (c *Cluster) Prepare() (err error) {
150
167
151
168
// allocate two tcp ports per component
152
169
portsPerComponent := 3
153
- listenAddr := "0 .0.0.0 "
170
+ listenAddr := "127 .0.0.1 "
154
171
ports , err := getFreeTCPPorts (listenAddr , len (c .Components )* portsPerComponent )
155
172
if err != nil {
156
173
return err
@@ -184,6 +201,8 @@ func (c *Cluster) Prepare() (err error) {
184
201
185
202
comp .flags = append (
186
203
nodeNameFlags (comp .nodeName ()),
204
+ listenAddrFlags ("127.0.0.1" )... )
205
+ comp .flags = append (comp .flags ,
187
206
[]string {
188
207
"-tracing.enabled=false" , // data race
189
208
"-distributor.replication-factor=3" ,
@@ -237,7 +256,11 @@ func (c *Cluster) Start(ctx context.Context) (err error) {
237
256
238
257
notReady := make (map [* Component ]error )
239
258
259
+ countPerTarget := map [string ]int {}
260
+
240
261
for _ , comp := range c .Components {
262
+ countPerTarget [comp .Target ]++
263
+
241
264
p , err := comp .start (ctx )
242
265
if err != nil {
243
266
return err
@@ -273,7 +296,14 @@ func (c *Cluster) Start(ctx context.Context) (err error) {
273
296
return err
274
297
}
275
298
276
- return t .querierReadyCheck (ctx , 3 , 3 )
299
+ return t .querierReadyCheck (ctx , countPerTarget ["ingester" ], countPerTarget ["store-gateway" ])
300
+ }
301
+ if t .Target == "distributor" {
302
+ if err := t .httpReadyCheck (ctx ); err != nil {
303
+ return err
304
+ }
305
+
306
+ return t .distributorReadyCheck (ctx , countPerTarget ["ingester" ], countPerTarget ["distributor" ])
277
307
}
278
308
279
309
return t .httpReadyCheck (ctx )
@@ -327,59 +357,129 @@ type Component struct {
327
357
reg * prometheus.Registry
328
358
}
329
359
330
- func (comp * Component ) querierReadyCheck (ctx context.Context , expectedIngesters , expectedStoreGateways int ) error {
331
- metrics , err := comp .reg .Gather ()
360
+ type gatherCheck struct {
361
+ g prometheus.Gatherer
362
+ conditions []gatherCoditions
363
+ }
364
+
365
+ //nolint:unparam
366
+ func (c * gatherCheck ) addExpectValue (value float64 , metricName string , labelPairs ... string ) * gatherCheck {
367
+ c .conditions = append (c .conditions , gatherCoditions {
368
+ metricName : metricName ,
369
+ labelPairs : labelPairs ,
370
+ expectedValue : value ,
371
+ })
372
+ return c
373
+ }
374
+
375
+ type gatherCoditions struct {
376
+ metricName string
377
+ labelPairs []string
378
+ expectedValue float64
379
+ }
380
+
381
+ func (c * gatherCoditions ) String () string {
382
+ b := strings.Builder {}
383
+ b .WriteString (c .metricName )
384
+ b .WriteRune ('{' )
385
+ for i := 0 ; i < len (c .labelPairs ); i += 2 {
386
+ b .WriteString (c .labelPairs [i ])
387
+ b .WriteRune ('=' )
388
+ b .WriteString (c .labelPairs [i + 1 ])
389
+ b .WriteRune (',' )
390
+ }
391
+ s := b .String ()
392
+ return s [:len (s )- 1 ] + "}"
393
+ }
394
+
395
+ func (c * gatherCoditions ) matches (pairs []* pm.LabelPair ) bool {
396
+ outer:
397
+ for i := 0 ; i < len (c .labelPairs ); i += 2 {
398
+ for _ , l := range pairs {
399
+ if l .GetName () != c .labelPairs [i ] {
400
+ continue
401
+ }
402
+ if l .GetValue () == c .labelPairs [i + 1 ] {
403
+ continue outer // match move to next pair
404
+ }
405
+ return false // value wrong
406
+ }
407
+ return false // label not found
408
+ }
409
+ return true
410
+ }
411
+
412
+ func (comp * Component ) checkMetrics () * gatherCheck {
413
+ return & gatherCheck {
414
+ g : comp .reg ,
415
+ }
416
+ }
417
+
418
+ func (g * gatherCheck ) run (ctx context.Context ) error {
419
+ actualValues := make ([]float64 , len (g .conditions ))
420
+
421
+ // maps from metric name to condition index
422
+ nameMap := make (map [string ][]int )
423
+ for idx , c := range g .conditions {
424
+ // not a number
425
+ actualValues [idx ] = math .NaN ()
426
+ nameMap [c .metricName ] = append (nameMap [c .metricName ], idx )
427
+ }
428
+
429
+ // now gather actual metrics
430
+ metrics , err := g .g .Gather ()
332
431
if err != nil {
333
432
return err
334
433
}
335
434
336
- activeIngesters := 0
337
- activeStoreGateways := 0
338
-
339
435
for _ , m := range metrics {
340
436
if ctx .Err () != nil {
341
437
return ctx .Err ()
342
438
}
343
439
344
- if m .GetName () == "pyroscope_ring_members" {
345
- for _ , sm := range m .GetMetric () {
346
- foundIngester := false
347
- foundStoreGateway := false
348
- foundActive := false
349
- for _ , l := range sm .GetLabel () {
350
- if l .GetName () == "name" && l .GetValue () == "ingester" {
351
- foundIngester = true
352
- }
353
- if l .GetName () == "name" && l .GetValue () == "store-gateway-client" {
354
- foundStoreGateway = true
355
- }
356
- if l .GetName () == "state" && l .GetValue () == "ACTIVE" {
357
- foundActive = true
358
- }
359
- }
360
- if foundIngester && foundActive {
361
- if v := sm .GetGauge ().GetValue (); v > 0 {
362
- activeIngesters = int (v )
363
- }
364
- }
365
- if foundStoreGateway && foundActive {
366
- if v := sm .GetGauge ().GetValue (); v > 0 {
367
- activeStoreGateways = int (v )
368
- }
440
+ conditions , ok := nameMap [m .GetName ()]
441
+ if ! ok {
442
+ continue
443
+ }
444
+
445
+ // now iterate over all label pairs
446
+ for _ , sm := range m .GetMetric () {
447
+ // check for each condition if it matches with he labels
448
+ for _ , condIdx := range conditions {
449
+ if g .conditions [condIdx ].matches (sm .Label ) {
450
+ actualValues [condIdx ] = sm .GetGauge ().GetValue () // TODO: handle other types
369
451
}
370
452
}
371
453
}
372
454
}
373
455
374
- if activeIngesters != expectedIngesters {
375
- return fmt .Errorf ("expected %d active ingesters, got %d" , expectedIngesters , activeIngesters )
376
- }
377
- if activeStoreGateways != expectedStoreGateways {
378
- return fmt .Errorf ("expected %d active store gateways, got %d" , expectedStoreGateways , activeStoreGateways )
456
+ errs := make ([]error , len (actualValues ))
457
+ for idx , actual := range actualValues {
458
+ cond := g .conditions [idx ]
459
+ if math .IsNaN (actual ) {
460
+ errs [idx ] = fmt .Errorf ("metric for %s not found" , cond .String ())
461
+ continue
462
+ }
463
+ if actual != cond .expectedValue {
464
+ errs [idx ] = fmt .Errorf ("unexpected value for %s: expected %f, got %f" , cond .String (), cond .expectedValue , actual )
465
+ }
379
466
}
380
467
381
- return nil
468
+ return errors .Join (errs ... )
469
+ }
470
+
471
+ func (comp * Component ) querierReadyCheck (ctx context.Context , expectedIngesters , expectedStoreGateways int ) (err error ) {
472
+ check := comp .checkMetrics ().
473
+ addExpectValue (float64 (expectedIngesters ), "pyroscope_ring_members" , "name" , "ingester" , "state" , "ACTIVE" ).
474
+ addExpectValue (float64 (expectedStoreGateways ), "pyroscope_ring_members" , "name" , "store-gateway-client" , "state" , "ACTIVE" )
475
+ return check .run (ctx )
476
+ }
382
477
478
+ func (comp * Component ) distributorReadyCheck (ctx context.Context , expectedIngesters , expectedDistributors int ) (err error ) {
479
+ check := comp .checkMetrics ().
480
+ addExpectValue (float64 (expectedIngesters ), "pyroscope_ring_members" , "name" , "ingester" , "state" , "ACTIVE" ).
481
+ addExpectValue (float64 (expectedDistributors ), "pyroscope_ring_members" , "name" , "distributor" , "state" , "ACTIVE" )
482
+ return check .run (ctx )
383
483
}
384
484
385
485
func (comp * Component ) httpReadyCheck (ctx context.Context ) error {
0 commit comments