Skip to content

Commit 88b5507

Browse files
authored
Move statsd agent to a deployment on the operator nodegroup (#2247)
1 parent 9d4f595 commit 88b5507

File tree

5 files changed

+33
-50
lines changed

5 files changed

+33
-50
lines changed

cmd/dequeuer/main.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func main() {
4747
userContainerPort int
4848
apiName string
4949
jobID string
50-
statsdPort int
50+
statsdAddress string
5151
apiKind string
5252
adminPort int
5353
)
@@ -58,8 +58,8 @@ func main() {
5858
flag.StringVar(&apiKind, "api-kind", "", fmt.Sprintf("api kind (%s|%s)", userconfig.BatchAPIKind.String(), userconfig.AsyncAPIKind.String()))
5959
flag.StringVar(&apiName, "api-name", "", "api name")
6060
flag.StringVar(&jobID, "job-id", "", "job ID")
61+
flag.StringVar(&statsdAddress, "statsd-address", "", "address to push statsd metrics")
6162
flag.IntVar(&userContainerPort, "user-port", 8080, "target port to which the dequeued messages will be sent to")
62-
flag.IntVar(&statsdPort, "statsd-port", 9125, "port for to send udp statsd metrics")
6363
flag.IntVar(&adminPort, "admin-port", 0, "port where the admin server (for the probes) will be exposed")
6464

6565
flag.Parse()
@@ -69,8 +69,6 @@ func main() {
6969
version = consts.CortexVersion
7070
}
7171

72-
hostIP := os.Getenv("HOST_IP")
73-
7472
log := logging.GetLogger()
7573
defer func() {
7674
_ = log.Sync()
@@ -158,7 +156,7 @@ func main() {
158156
TargetURL: targetURL,
159157
}
160158

161-
metricsClient, err := statsd.New(fmt.Sprintf("%s:%d", hostIP, statsdPort))
159+
metricsClient, err := statsd.New(statsdAddress)
162160
if err != nil {
163161
exit(log, err, "unable to initialize metrics client")
164162
}

manager/manifests/prometheus-statsd-exporter.yaml

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,13 @@ data:
2323
observer_type: histogram
2424
2525
---
26-
2726
apiVersion: apps/v1
28-
kind: DaemonSet
27+
kind: Deployment
2928
metadata:
3029
name: prometheus-statsd-exporter
3130
namespace: default
3231
spec:
33-
updateStrategy:
34-
type: RollingUpdate
35-
rollingUpdate:
36-
maxUnavailable: 1
32+
replicas: 1
3733
selector:
3834
matchLabels:
3935
name: prometheus-statsd-exporter
@@ -62,7 +58,6 @@ spec:
6258
protocol: TCP
6359
- name: statsd-udp
6460
containerPort: 9125
65-
hostPort: 9125
6661
protocol: UDP
6762
livenessProbe:
6863
httpGet:
@@ -86,16 +81,23 @@ spec:
8681
items:
8782
- key: statsd-mapping.yaml
8883
path: statsd-mapping.yaml
89-
nodeSelector:
90-
workload: "true"
9184
terminationGracePeriodSeconds: 60
92-
tolerations:
93-
- key: aws.amazon.com/neuron
94-
operator: Exists
95-
effect: NoSchedule
96-
- key: nvidia.com/gpu
97-
operator: Exists
98-
effect: NoSchedule
99-
- key: workload
100-
operator: Exists
101-
effect: NoSchedule
85+
86+
---
87+
apiVersion: v1
88+
kind: Service
89+
metadata:
90+
namespace: default
91+
name: prometheus-statsd-exporter
92+
labels:
93+
cortex.dev/name: prometheus-statsd-exporter
94+
spec:
95+
selector:
96+
name: prometheus-statsd-exporter
97+
ports:
98+
- port: 9125
99+
name: statsd-udp
100+
protocol: UDP
101+
- port: 9102
102+
name: metrics
103+
protocol: TCP

pkg/consts/consts.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@ var (
3535
AdminPortName = "admin"
3636
AdminPortStr = "15000"
3737
AdminPortInt32 = int32(15000)
38-
39-
StatsDPortStr = "9125"
40-
41-
AuthHeader = "X-Cortex-Authorization"
38+
AuthHeader = "X-Cortex-Authorization"
4239

4340
DefaultInClusterConfigPath = "/configs/cluster/cluster.yaml"
4441
MaxBucketLifecycleRules = 100

pkg/operator/resources/validations.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,23 +106,21 @@ func ValidateClusterAPIs(apis []userconfig.API) error {
106106
CPU Reservations:
107107
108108
FluentBit 100
109-
StatsDExporter 100
110109
NodeExporter 110 (it has two containers)
111110
KubeProxy 100
112111
AWS cni 10
113112
Reserved (150 + 150) see eks.yaml for details
114113
*/
115-
var _cortexCPUReserve = kresource.MustParse("720m")
114+
var _cortexCPUReserve = kresource.MustParse("620m")
116115

117116
/*
118117
Memory Reservations:
119118
120119
FluentBit 150
121-
StatsDExporter 100
122120
NodeExporter 200 (it has two containers)
123121
Reserved (300 + 300 + 200) see eks.yaml for details
124122
*/
125-
var _cortexMemReserve = kresource.MustParse("1250Mi")
123+
var _cortexMemReserve = kresource.MustParse("1150Mi")
126124

127125
var _nvidiaCPUReserve = kresource.MustParse("100m")
128126
var _nvidiaMemReserve = kresource.MustParse("100Mi")

pkg/workloads/k8s.go

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ const (
6262
_clusterConfigDirVolume = "cluster-config"
6363
_clusterConfigConfigMap = "cluster-config"
6464
_clusterConfigDir = "/configs/cluster"
65+
66+
_statsdAddress = "prometheus-statsd-exporter.default:9125"
6567
)
6668

6769
var (
@@ -128,18 +130,11 @@ func asyncDequeuerProxyContainer(api spec.API, queueURL string) (kcore.Container
128130
"--queue", queueURL,
129131
"--api-kind", api.Kind.String(),
130132
"--api-name", api.Name,
133+
"--statsd-address", _statsdAddress,
131134
"--user-port", s.Int32(*api.Pod.Port),
132-
"--statsd-port", consts.StatsDPortStr,
133135
"--admin-port", consts.AdminPortStr,
134136
},
135-
Env: append(baseEnvVars, kcore.EnvVar{
136-
Name: "HOST_IP",
137-
ValueFrom: &kcore.EnvVarSource{
138-
FieldRef: &kcore.ObjectFieldSelector{
139-
FieldPath: "status.hostIP",
140-
},
141-
},
142-
}),
137+
Env: baseEnvVars,
143138
Ports: []kcore.ContainerPort{
144139
{
145140
Name: consts.AdminPortName,
@@ -181,18 +176,11 @@ func batchDequeuerProxyContainer(api spec.API, jobID, queueURL string) (kcore.Co
181176
"--api-kind", api.Kind.String(),
182177
"--api-name", api.Name,
183178
"--job-id", jobID,
179+
"--statsd-address", _statsdAddress,
184180
"--user-port", s.Int32(*api.Pod.Port),
185-
"--statsd-port", consts.StatsDPortStr,
186181
"--admin-port", consts.AdminPortStr,
187182
},
188-
Env: append(baseEnvVars, kcore.EnvVar{
189-
Name: "HOST_IP",
190-
ValueFrom: &kcore.EnvVarSource{
191-
FieldRef: &kcore.ObjectFieldSelector{
192-
FieldPath: "status.hostIP",
193-
},
194-
},
195-
}),
183+
Env: baseEnvVars,
196184
ReadinessProbe: &kcore.Probe{
197185
Handler: kcore.Handler{
198186
HTTPGet: &kcore.HTTPGetAction{

0 commit comments

Comments
 (0)