grafana
diff --git a/‎docs/sources/operators-guide/configure/reference-configuration-parameters/index.md
Lines changed: 64 additions & 0 deletions b/‎docs/sources/operators-guide/configure/reference-configuration-parameters/index.md
Lines changed: 64 additions & 0 deletions
diff --git a/‎go.mod
Lines changed: 1 addition & 1 deletion b/‎go.mod
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/distributor/distributor.go
Lines changed: 155 additions & 15 deletions b/‎pkg/distributor/distributor.go
Lines changed: 155 additions & 15 deletions
@@ -167,6 +167,60 @@ client:
 # The frontend_worker block configures the frontend-worker.
 [frontend_worker: <frontend_worker>]
 
+limits:
+  # Per-tenant ingestion rate limit in sample size per second. Units in MB.
+  # CLI flag: -distributor.ingestion-rate-limit-mb
+  [ingestion_rate_mb: <float> | default = 4]
+
+  # Per-tenant allowed ingestion burst size (in sample size). Units in MB. The
+  # burst size refers to the per-distributor local rate limiter, and should be
+  # set at least to the maximum profile size expected in a single push request.
+  # CLI flag: -distributor.ingestion-burst-size-mb
+  [ingestion_burst_size_mb: <float> | default = 2]
+
+  # Maximum length accepted for label names.
+  # CLI flag: -validation.max-length-label-name
+  [max_label_name_length: <int> | default = 1024]
+
+  # Maximum length accepted for label value. This setting also applies to the
+  # metric name.
+  # CLI flag: -validation.max-length-label-value
+  [max_label_value_length: <int> | default = 2048]
+
+  # Maximum number of label names per series.
+  # CLI flag: -validation.max-label-names-per-series
+  [max_label_names_per_series: <int> | default = 30]
+
+  # Maximum number of active series of profiles per tenant, per ingester. 0 to
+  # disable.
+  # CLI flag: -ingester.max-series-per-tenant
+  [max_series_per_tenant: <int> | default = 0]
+
+  # Maximum number of active series of profiles per tenant, across the cluster.
+  # 0 to disable. When the global limit is enabled, each ingester is configured
+  # with a dynamic local limit based on the replication factor and the current
+  # number of healthy ingesters, and is kept updated whenever the number of
+  # ingesters change.
+  # CLI flag: -ingester.max-global-series-per-tenant
+  [max_global_series_per_user: <int> | default = 5000]
+
+  # Limit how far back in profiling data can be queried, up until lookback
+  # duration ago. This limit is enforced in the query frontend. If the requested
+  # time range is outside the allowed range, the request will not fail, but will
+  # be modified to only query data within the allowed time range. The default
+  # value of 0 does not set a limit.
+  # CLI flag: -querier.max-query-lookback
+  [max_query_lookback: <duration> | default = 0s]
+
+  # The limit to length of queries. 0 to disable.
+  # CLI flag: -querier.max-query-length
+  [max_query_length: <duration> | default = 30d1h]
+
+  # Maximum number of queries that will be scheduled in parallel by the
+  # frontend.
+  # CLI flag: -querier.max-query-parallelism
+  [max_query_parallelism: <int> | default = 32]
+
 # The query_scheduler block configures the query-scheduler.
 [query_scheduler: <query_scheduler>]
 
@@ -194,6 +248,16 @@ tracing:
   # CLI flag: -tracing.enabled
   [enabled: <boolean> | default = true]
 
+runtime_config:
+  # How often to check runtime config files.
+  # CLI flag: -runtime-config.reload-period
+  [period: <duration> | default = 10s]
+
+  # Comma separated list of yaml files with the configuration that can be
+  # updated at runtime. Runtime config files will be merged from left to right.
+  # CLI flag: -runtime-config.file
+  [file: <string> | default = ""]
+
 storage:
   # Backend storage to use. Supported backends are: s3, gcs, azure, swift,
   # filesystem, cos.
 
@@ -56,6 +56,7 @@ require (
 	golang.org/x/sync v0.1.0
 	golang.org/x/sys v0.4.0
 	golang.org/x/text v0.6.0
+	golang.org/x/time v0.3.0
 	google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef
 	google.golang.org/grpc v1.51.0
 	google.golang.org/protobuf v1.28.1
@@ -235,7 +236,6 @@ require (
 	golang.org/x/mod v0.7.0 // indirect
 	golang.org/x/oauth2 v0.3.0 // indirect
 	golang.org/x/term v0.4.0 // indirect
-	golang.org/x/time v0.3.0 // indirect
 	golang.org/x/tools v0.4.0 // indirect
 	golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
 	google.golang.org/api v0.105.0 // indirect
 
@@ -6,12 +6,16 @@ import (
 	"flag"
 	"fmt"
 	"hash/fnv"
+	"net/http"
 	"strconv"
 	"time"
 
 	"github.com/bufbuild/connect-go"
+	"github.com/dustin/go-humanize"
 	"github.com/go-kit/log"
 	"github.com/google/uuid"
+	"github.com/grafana/dskit/kv"
+	"github.com/grafana/dskit/limiter"
 	"github.com/grafana/dskit/ring"
 	ring_client "github.com/grafana/dskit/ring/client"
 	"github.com/grafana/dskit/services"
@@ -29,12 +33,23 @@ import (
 	"github.com/grafana/phlare/pkg/pprof"
 	"github.com/grafana/phlare/pkg/tenant"
 	"github.com/grafana/phlare/pkg/usagestats"
+	"github.com/grafana/phlare/pkg/util"
+	"github.com/grafana/phlare/pkg/validation"
 )
 
 type PushClient interface {
 	Push(context.Context, *connect.Request[pushv1.PushRequest]) (*connect.Response[pushv1.PushResponse], error)
 }
 
+const (
+	// distributorRingKey is the key under which we store the distributors ring in the KVStore.
+	distributorRingKey = "distributor"
+
+	// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
+	// in the ring will be automatically removed after.
+	ringAutoForgetUnhealthyPeriods = 10
+)
+
 // todo: move to non global metrics.
 var (
 	clients = promauto.NewGauge(prometheus.GaugeOpts{
@@ -52,12 +67,16 @@ var (
 type Config struct {
 	PushTimeout time.Duration
 	PoolConfig  clientpool.PoolConfig `yaml:"pool_config,omitempty"`
+
+	// Distributors ring
+	DistributorRing RingConfig `yaml:"ring" doc:"hidden"`
 }
 
 // RegisterFlags registers distributor-related flags.
 func (cfg *Config) RegisterFlags(fs *flag.FlagSet) {
 	cfg.PoolConfig.RegisterFlagsWithPrefix("distributor", fs)
 	fs.DurationVar(&cfg.PushTimeout, "distributor.push.timeout", 5*time.Second, "Timeout when pushing data to ingester.")
+	cfg.DistributorRing.RegisterFlags(fs)
 }
 
 // Distributor coordinates replicates and distribution of log streams.
@@ -66,30 +85,64 @@ type Distributor struct {
 	logger log.Logger
 
 	cfg           Config
+	limits        Limits
 	ingestersRing ring.ReadRing
 	pool          *ring_client.Pool
 
+	// The global rate limiter requires a distributors ring to count
+	// the number of healthy instances
+	distributorsLifecycler *ring.BasicLifecycler
+	distributorsRing       *ring.Ring
+	healthyInstancesCount  *atomic.Uint32
+	ingestionRateLimiter   *limiter.RateLimiter
+
 	subservices        *services.Manager
 	subservicesWatcher *services.FailureWatcher
 
 	metrics *metrics
 }
 
-func New(cfg Config, ingestersRing ring.ReadRing, factory ring_client.PoolFactory, reg prometheus.Registerer, logger log.Logger, clientsOptions ...connect.ClientOption) (*Distributor, error) {
+type Limits interface {
+	IngestionRateBytes(tenantID string) float64
+	IngestionBurstSizeBytes(tenantID string) int
+	MaxLabelNameLength(userID string) int
+	MaxLabelValueLength(userID string) int
+	MaxLabelNamesPerSeries(userID string) int
+}
+
+func New(cfg Config, ingestersRing ring.ReadRing, factory ring_client.PoolFactory, limits Limits, reg prometheus.Registerer, logger log.Logger, clientsOptions ...connect.ClientOption) (*Distributor, error) {
 	d := &Distributor{
-		cfg:           cfg,
-		logger:        logger,
-		ingestersRing: ingestersRing,
-		pool:          clientpool.NewPool(cfg.PoolConfig, ingestersRing, factory, clients, logger, clientsOptions...),
-		metrics:       newMetrics(reg),
+		cfg:                   cfg,
+		logger:                logger,
+		ingestersRing:         ingestersRing,
+		pool:                  clientpool.NewPool(cfg.PoolConfig, ingestersRing, factory, clients, logger, clientsOptions...),
+		metrics:               newMetrics(reg),
+		healthyInstancesCount: atomic.NewUint32(0),
+		limits:                limits,
 	}
 	var err error
-	d.subservices, err = services.NewManager(d.pool)
+
+	subservices := []services.Service(nil)
+	subservices = append(subservices, d.pool)
+
+	distributorsRing, distributorsLifecycler, err := newRingAndLifecycler(cfg.DistributorRing, d.healthyInstancesCount, logger, reg)
+	if err != nil {
+		return nil, err
+	}
+
+	subservices = append(subservices, distributorsLifecycler, distributorsRing)
+
+	d.ingestionRateLimiter = limiter.NewRateLimiter(newGlobalRateStrategy(newIngestionRateStrategy(limits), d), 10*time.Second)
+	d.distributorsLifecycler = distributorsLifecycler
+	d.distributorsRing = distributorsRing
+
+	d.subservices, err = services.NewManager(subservices...)
 	if err != nil {
 		return nil, errors.Wrap(err, "services manager")
 	}
 	d.subservicesWatcher = services.NewFailureWatcher()
 	d.subservicesWatcher.WatchManager(d.subservices)
+
 	d.Service = services.NewBasicService(d.starting, d.running, d.stopping)
 	rfStats.Set(int64(ingestersRing.ReplicationFactor()))
 	return d, nil
@@ -115,29 +168,37 @@ func (d *Distributor) stopping(_ error) error {
 func (d *Distributor) Push(ctx context.Context, req *connect.Request[pushv1.PushRequest]) (*connect.Response[pushv1.PushResponse], error) {
 	tenantID, err := tenant.ExtractTenantIDFromContext(ctx)
 	if err != nil {
-		return nil, connect.NewError(connect.CodeInvalidArgument, err)
+		return nil, connect.NewError(connect.CodeUnauthenticated, err)
 	}
 	var (
-		keys     = make([]uint32, 0, len(req.Msg.Series))
-		profiles = make([]*profileTracker, 0, len(req.Msg.Series))
+		keys                       = make([]uint32, 0, len(req.Msg.Series))
+		profiles                   = make([]*profileTracker, 0, len(req.Msg.Series))
+		totalPushUncompressedBytes int64
+		totalProfiles              int64
 	)
 
 	for _, series := range req.Msg.Series {
+		// include the labels in the size calculation
+		for _, lbs := range series.Labels {
+			totalPushUncompressedBytes += int64(len(lbs.Name))
+			totalPushUncompressedBytes += int64(len(lbs.Value))
+		}
 		keys = append(keys, TokenFor(tenantID, labelsString(series.Labels)))
 		profName := phlaremodel.Labels(series.Labels).Get(scrape.ProfileName)
 		for _, raw := range series.Samples {
 			usagestats.NewCounter(fmt.Sprintf("distributor_profile_type_%s_received", profName)).Inc(1)
 			profileReceivedStats.Inc(1)
 			bytesReceivedTotalStats.Inc(int64(len(raw.RawProfile)))
 			bytesReceivedStats.Record(float64(len(raw.RawProfile)))
-			d.metrics.receivedCompressedBytes.WithLabelValues(profName).Observe(float64(len(raw.RawProfile)))
+			totalProfiles++
+			d.metrics.receivedCompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(len(raw.RawProfile)))
 			p, err := pprof.RawFromBytes(raw.RawProfile)
 			if err != nil {
-				return nil, err
+				return nil, connect.NewError(connect.CodeInvalidArgument, err)
 			}
-			d.metrics.receivedDecompressedBytes.WithLabelValues(profName).Observe(float64(p.SizeBytes()))
-			d.metrics.receivedSamples.WithLabelValues(profName).Observe(float64(len(p.Sample)))
-
+			d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(p.SizeBytes()))
+			d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample)))
+			totalPushUncompressedBytes += int64(p.SizeBytes())
 			p.Normalize()
 
 			// zip the data back into the buffer
@@ -153,6 +214,24 @@ func (d *Distributor) Push(ctx context.Context, req *connect.Request[pushv1.Push
 		profiles = append(profiles, &profileTracker{profile: series})
 	}
 
+	// validate the request
+	for _, series := range req.Msg.Series {
+		if err := validation.ValidateLabels(d.limits, tenantID, series.Labels); err != nil {
+			validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), tenantID).Add(float64(totalProfiles))
+			validation.DiscardedBytes.WithLabelValues(string(validation.ReasonOf(err)), tenantID).Add(float64(totalPushUncompressedBytes))
+			return nil, connect.NewError(connect.CodeInvalidArgument, err)
+		}
+	}
+
+	// rate limit the request
+	if !d.ingestionRateLimiter.AllowN(time.Now(), tenantID, int(totalPushUncompressedBytes)) {
+		validation.DiscardedProfiles.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(totalProfiles))
+		validation.DiscardedBytes.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(totalPushUncompressedBytes))
+		return nil, connect.NewError(connect.CodeResourceExhausted,
+			fmt.Errorf("push rate limit (%s) exceeded while adding %s", humanize.Bytes(uint64(d.limits.IngestionRateBytes(tenantID))), humanize.Bytes(uint64(totalPushUncompressedBytes))),
+		)
+	}
+
 	const maxExpectedReplicationSet = 5 // typical replication factor 3 plus one for inactive plus one for luck
 	var descs [maxExpectedReplicationSet]ring.InstanceDesc
 
@@ -245,6 +324,35 @@ func (d *Distributor) sendProfilesErr(ctx context.Context, ingester ring.Instanc
 	return err
 }
 
+func (d *Distributor) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	if d.distributorsRing != nil {
+		d.distributorsRing.ServeHTTP(w, req)
+	} else {
+		ringNotEnabledPage := `
+			<!DOCTYPE html>
+			<html>
+				<head>
+					<meta charset="UTF-8">
+					<title>Distributor Status</title>
+				</head>
+				<body>
+					<h1>Distributor Status</h1>
+					<p>Distributor is not running with global limits enabled</p>
+				</body>
+			</html>`
+		util.WriteHTMLResponse(w, ringNotEnabledPage)
+	}
+}
+
+// HealthyInstancesCount implements the ReadLifecycler interface
+//
+// We use a ring lifecycler delegate to count the number of members of the
+// ring. The count is then used to enforce rate limiting correctly for each
+// distributor. $EFFECTIVE_RATE_LIMIT = $GLOBAL_RATE_LIMIT / $NUM_INSTANCES
+func (d *Distributor) HealthyInstancesCount() int {
+	return int(d.healthyInstancesCount.Load())
+}
+
 type profileTracker struct {
 	profile     *pushv1.RawProfileSeries
 	minSuccess  int
@@ -283,3 +391,35 @@ func TokenFor(tenantID, labels string) uint32 {
 	_, _ = h.Write([]byte(labels))
 	return h.Sum32()
 }
+
+// newRingAndLifecycler creates a new distributor ring and lifecycler with all required lifecycler delegates
+func newRingAndLifecycler(cfg RingConfig, instanceCount *atomic.Uint32, logger log.Logger, reg prometheus.Registerer) (*ring.Ring, *ring.BasicLifecycler, error) {
+	reg = prometheus.WrapRegistererWithPrefix("phlare_", reg)
+	kvStore, err := kv.NewClient(cfg.KVStore, ring.GetCodec(), kv.RegistererWithKVName(reg, "distributor-lifecycler"), logger)
+	if err != nil {
+		return nil, nil, errors.Wrap(err, "failed to initialize distributors' KV store")
+	}
+
+	lifecyclerCfg, err := cfg.ToBasicLifecyclerConfig(logger)
+	if err != nil {
+		return nil, nil, errors.Wrap(err, "failed to build distributors' lifecycler config")
+	}
+
+	var delegate ring.BasicLifecyclerDelegate
+	delegate = ring.NewInstanceRegisterDelegate(ring.ACTIVE, lifecyclerCfg.NumTokens)
+	delegate = newHealthyInstanceDelegate(instanceCount, cfg.HeartbeatTimeout, delegate)
+	delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger)
+	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.HeartbeatTimeout, delegate, logger)
+
+	distributorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "distributor", distributorRingKey, kvStore, delegate, logger, reg)
+	if err != nil {
+		return nil, nil, errors.Wrap(err, "failed to initialize distributors' lifecycler")
+	}
+
+	distributorsRing, err := ring.New(cfg.ToRingConfig(), "distributor", distributorRingKey, logger, reg)
+	if err != nil {
+		return nil, nil, errors.Wrap(err, "failed to initialize distributors' ring client")
+	}
+
+	return distributorsRing, distributorsLifecycler, nil
+}