Skip to content

Commit 5d21a40

Browse files
authored
Add readiness/liveness probes to k8s CaaS resources (#2187)
1 parent 9722b5a commit 5d21a40

File tree

36 files changed

+1049
-810
lines changed

36 files changed

+1049
-810
lines changed

build/images.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ api_images=(
2727
)
2828

2929
dev_images=(
30-
"downloader"
3130
"manager"
3231
"proxy"
3332
"async-gateway"

cmd/proxy/main.go

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package main
1919
import (
2020
"context"
2121
"flag"
22-
"io/ioutil"
2322
"net/http"
2423
"os"
2524
"os/signal"
@@ -49,7 +48,6 @@ func main() {
4948
userContainerPort int
5049
maxConcurrency int
5150
maxQueueLength int
52-
probeDefPath string
5351
clusterConfigPath string
5452
)
5553

@@ -59,7 +57,6 @@ func main() {
5957
flag.IntVar(&maxConcurrency, "max-concurrency", 0, "max concurrency allowed for user container")
6058
flag.IntVar(&maxQueueLength, "max-queue-length", 0, "max request queue length for user container")
6159
flag.StringVar(&clusterConfigPath, "cluster-config", "", "cluster config path")
62-
flag.StringVar(&probeDefPath, "probe", "", "path to the desired probe json definition")
6360
flag.Parse()
6461

6562
log := logging.GetLogger()
@@ -119,23 +116,7 @@ func main() {
119116
)
120117

121118
promStats := proxy.NewPrometheusStatsReporter()
122-
123-
var readinessProbe *probe.Probe
124-
if probeDefPath != "" {
125-
jsonProbe, err := ioutil.ReadFile(probeDefPath)
126-
if err != nil {
127-
log.Fatal(err)
128-
}
129-
130-
probeDef, err := probe.DecodeJSON(string(jsonProbe))
131-
if err != nil {
132-
log.Fatal(err)
133-
}
134-
135-
readinessProbe = probe.NewProbe(probeDef, log)
136-
} else {
137-
readinessProbe = probe.NewDefaultProbe(target, log)
138-
}
119+
readinessProbe := probe.NewDefaultProbe(target, log)
139120

140121
go func() {
141122
reportTicker := time.NewTicker(_reportInterval)
@@ -165,7 +146,7 @@ func main() {
165146

166147
servers := map[string]*http.Server{
167148
"proxy": {
168-
Addr: ":" + strconv.Itoa(userContainerPort),
149+
Addr: ":" + strconv.Itoa(port),
169150
Handler: proxy.Handler(breaker, httpProxy),
170151
},
171152
"admin": {

docs/clusters/management/create.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ The docker images used by the cluster can also be overridden. They can be config
9999
image_operator: quay.io/cortexlabs/operator:master
100100
image_controller_manager: quay.io/cortexlabs/controller-manager:master
101101
image_manager: quay.io/cortexlabs/manager:master
102-
image_downloader: quay.io/cortexlabs/downloader:master
103102
image_proxy: quay.io/cortexlabs/proxy:master
104103
image_async_gateway: quay.io/cortexlabs/async-gateway:master
105104
image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler:master

images/downloader/Dockerfile

Lines changed: 0 additions & 26 deletions
This file was deleted.

manager/install.sh

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ function main() {
3434
function cluster_up() {
3535
create_eks
3636

37-
start_pre_download_images
38-
3937
echo -n "○ updating cluster configuration "
4038
setup_configmap
4139
echo ""
@@ -76,8 +74,6 @@ function cluster_up() {
7674

7775
validate_cortex
7876

79-
await_pre_download_images
80-
8177
echo -e "\ncortex is ready!"
8278
if [ "$CORTEX_OPERATOR_LOAD_BALANCER_SCHEME" == "internal" ]; then
8379
echo -e "\nnote: you will need to configure VPC Peering to connect to your cluster: https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"
@@ -324,65 +320,6 @@ function setup_istio() {
324320
output_if_error istio-${ISTIO_VERSION}/bin/istioctl install -f /workspace/istio.yaml
325321
}
326322

327-
function start_pre_download_images() {
328-
registry="quay.io/cortexlabs"
329-
if [ -n "$CORTEX_DEV_DEFAULT_IMAGE_REGISTRY" ]; then
330-
registry="$CORTEX_DEV_DEFAULT_IMAGE_REGISTRY"
331-
fi
332-
export CORTEX_IMAGE_PYTHON_HANDLER_CPU="${registry}/python-handler-cpu:${CORTEX_VERSION}"
333-
export CORTEX_IMAGE_PYTHON_HANDLER_GPU="${registry}/python-handler-gpu:${CORTEX_VERSION}-cuda10.2-cudnn8"
334-
export CORTEX_IMAGE_PYTHON_HANDLER_INF="${registry}/python-handler-inf:${CORTEX_VERSION}"
335-
export CORTEX_IMAGE_TENSORFLOW_SERVING_CPU="${registry}/tensorflow-serving-cpu:${CORTEX_VERSION}"
336-
export CORTEX_IMAGE_TENSORFLOW_SERVING_GPU="${registry}/tensorflow-serving-gpu:${CORTEX_VERSION}"
337-
export CORTEX_IMAGE_TENSORFLOW_SERVING_INF="${registry}/tensorflow-serving-inf:${CORTEX_VERSION}"
338-
export CORTEX_IMAGE_TENSORFLOW_HANDLER="${registry}/tensorflow-handler:${CORTEX_VERSION}"
339-
340-
envsubst < manifests/image-downloader-cpu.yaml | kubectl apply -f - &>/dev/null
341-
342-
has_gpu="false"
343-
has_inf="false"
344-
345-
cluster_config_len=$(cat /in/cluster_${CORTEX_CLUSTER_NAME}_${CORTEX_REGION}.yaml | yq -r .node_groups | yq -r length)
346-
for idx in $(seq 0 $(($cluster_config_len-1))); do
347-
ng_instance_type=$(cat /in/cluster_${CORTEX_CLUSTER_NAME}_${CORTEX_REGION}.yaml | yq -r .node_groups[$idx].instance_type)
348-
if [[ "$ng_instance_type" == p* || "$ng_instance_type" == g* ]]; then
349-
has_gpu="true"
350-
fi
351-
if [[ "$ng_instance_type" == inf* ]]; then
352-
has_inf="true"
353-
fi
354-
done
355-
356-
if [ "$has_gpu" == "true" ]; then
357-
envsubst < manifests/image-downloader-gpu.yaml | kubectl apply -f - &>/dev/null
358-
fi
359-
360-
if [ "$has_inf" == "true" ]; then
361-
envsubst < manifests/image-downloader-inf.yaml | kubectl apply -f - &>/dev/null
362-
fi
363-
}
364-
365-
function await_pre_download_images() {
366-
echo -n "○ downloading docker images "
367-
printed_dot="false"
368-
for ds_name in image-downloader-cpu image-downloader-gpu image-downloader-inf; do
369-
if ! kubectl get daemonset $ds_name > /dev/null 2>&1; then
370-
continue
371-
fi
372-
i=0
373-
until [ "$(kubectl get daemonset $ds_name -n=default -o 'jsonpath={.status.numberReady}')" == "$(kubectl get daemonset $ds_name -n=default -o 'jsonpath={.status.desiredNumberScheduled}')" ]; do
374-
if [ $i -eq 120 ]; then break; fi # give up after 6 minutes
375-
echo -n "."
376-
printed_dot="true"
377-
((i=i+1))
378-
sleep 3
379-
done
380-
kubectl -n=default delete --ignore-not-found=true daemonset $ds_name &>/dev/null
381-
done
382-
383-
if [ "$printed_dot" == "true" ]; then echo ""; else echo ""; fi
384-
}
385-
386323
function validate_cortex() {
387324
set +e
388325

manager/manifests/image-downloader-cpu.yaml

Lines changed: 0 additions & 60 deletions
This file was deleted.

manager/manifests/image-downloader-gpu.yaml

Lines changed: 0 additions & 49 deletions
This file was deleted.

manager/manifests/image-downloader-inf.yaml

Lines changed: 0 additions & 54 deletions
This file was deleted.

manager/manifests/prometheus-monitoring.yaml.j2

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ spec:
184184
- path: /metrics
185185
scheme: http
186186
interval: 10s
187-
port: metrics
187+
port: admin
188188
relabelings:
189189
- action: keep
190190
sourceLabels: [ __meta_kubernetes_pod_container_name ]
@@ -221,7 +221,7 @@ metadata:
221221
spec:
222222
jobLabel: "statsd-exporter"
223223
podMetricsEndpoints:
224-
- port: metrics
224+
- port: admin
225225
scheme: http
226226
path: /metrics
227227
interval: 20s

pkg/consts/consts.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ var (
3333
ProxyListeningPortStr = "8888"
3434
ProxyListeningPortInt32 = int32(8888)
3535

36-
MetricsPortStr = "15000"
37-
MetricsPortInt32 = int32(15000)
36+
AdminPortStr = "15000"
37+
AdminPortInt32 = int32(15000)
3838

3939
AuthHeader = "X-Cortex-Authorization"
4040

0 commit comments

Comments
 (0)