From c9d0eacac23ad5eb7a921fad0d5d5c92e3e9487b Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 23 Feb 2021 15:06:23 +0100 Subject: [PATCH 01/12] Add event exporter manifests --- manager/manifests/event-exporter.yaml | 82 +++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 manager/manifests/event-exporter.yaml diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml new file mode 100644 index 0000000000..817418f9d8 --- /dev/null +++ b/manager/manifests/event-exporter.yaml @@ -0,0 +1,82 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: default + name: event-exporter + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: event-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view +subjects: + - kind: ServiceAccount + namespace: default + name: event-exporter + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: event-exporter-config + namespace: default +data: + config.yaml: | + logLevel: error + logFormat: json + route: + routes: + - match: + - receiver: "stdout" + labels: + apiKind: RealtimeAPI + - match: + - receiver: "stdout" + labels: + apiKind: BatchAPI + - match: + - receiver: "stdout" + labels: + apiKind: TaskAPI + receivers: + - name: "stdout" + file: + path: "/dev/stdout" + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: event-exporter + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: event-exporter + template: + metadata: + labels: + app: event-exporter + spec: + serviceAccountName: event-exporter + containers: + - name: event-exporter + image: opsgenie/kubernetes-event-exporter:0.9 # TODO: replace + imagePullPolicy: IfNotPresent + args: + - -conf=/data/config.yaml + volumeMounts: + - mountPath: /data + name: event-exporter-config + volumes: + - name: event-exporter-config + configMap: + name: event-exporter-config From b01e10818c6be551bca6a72b745eeb08d90b3c3c Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 23 Feb 2021 15:06:58 +0100 Subject: [PATCH 02/12] Add filtering operations for the event exporter in fluent-bit --- manager/manifests/fluent-bit.yaml.j2 | 41 ++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2 index b06832d16d..211474e3ea 100644 --- a/manager/manifests/fluent-bit.yaml.j2 +++ b/manager/manifests/fluent-bit.yaml.j2 @@ -64,6 +64,7 @@ data: @INCLUDE input-kubernetes.conf @INCLUDE filter-kubernetes.conf + @INCLUDE filter-k8s-events.conf @INCLUDE output.conf input-kubernetes.conf: | @@ -116,8 +117,36 @@ data: Match k8s_container.* Remove_wildcard k8s. + filter-k8s-events.conf: | + [FILTER] + Name nest + Match k8s_container.*.event-exporter-* + Operation lift + Nested_under involvedObject + Add_prefix involvedObject. + + [FILTER] + Name modify + Match k8s_container.*.event-exporter-* + Condition Key_exists labels + Rename labels k8s.labels + + [FILTER] + Name modify + Match k8s_container.*.event-exporter-* + Condition Key_exists involvedObject.labels + Hard_copy involvedObject.labels labels + + [FILTER] + Name nest + Match k8s_container.*.event-exporter-* + Operation nest + Wildcard involvedObject.* + Nest_under involvedObject + Remove_prefix involvedObject. + output.conf: | - {% if env['CORTEX_PROVIDER'] == "aws" %} + {% if config['provider'] == "aws" %} [OUTPUT] Name cloudwatch Match k8s_container.* @@ -127,7 +156,7 @@ data: auto_create_group true {% endif %} - {% if env['CORTEX_PROVIDER'] == "gcp" %} + {% if config['provider'] == "gcp" %} [OUTPUT] Name stackdriver Match k8s_container.* @@ -173,7 +202,7 @@ spec: memory: 150Mi ports: - containerPort: 2020 - {% if env['CORTEX_PROVIDER'] == "gcp" %} + {% if config['provider'] == "gcp" %} env: - name: GOOGLE_SERVICE_CREDENTIALS value: /var/secrets/google/key.json @@ -186,7 +215,7 @@ spec: readOnly: true - name: fluent-bit-config mountPath: /fluent-bit/etc/ - {% if env['CORTEX_PROVIDER'] == "gcp" %} + {% if config['provider'] == "gcp" %} - name: gcp-credentials mountPath: /var/secrets/google {% endif %} @@ -201,7 +230,7 @@ spec: - name: fluent-bit-config configMap: name: fluent-bit-config - {% if env['CORTEX_PROVIDER'] == "gcp" %} + {% if config['provider'] == "gcp" %} - name: gcp-credentials secret: secretName: gcp-credentials @@ -215,7 +244,7 @@ spec: effect: "NoExecute" - operator: "Exists" effect: "NoSchedule" - {% if env['CORTEX_PROVIDER'] == "aws" %} + {% if config['provider'] == "aws" %} - key: aws.amazon.com/neuron operator: Exists effect: NoSchedule From 89558330c03ee0ff43291e5fcdebdfa9e253a19a Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 23 Feb 2021 16:03:48 +0100 Subject: [PATCH 03/12] Improve logging in stackdriver --- manager/manifests/fluent-bit.yaml.j2 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2 index 211474e3ea..6110e0a8c1 100644 --- a/manager/manifests/fluent-bit.yaml.j2 +++ b/manager/manifests/fluent-bit.yaml.j2 @@ -65,6 +65,7 @@ data: @INCLUDE input-kubernetes.conf @INCLUDE filter-kubernetes.conf @INCLUDE filter-k8s-events.conf + @INCLUDE filter-stackdriver-format.conf @INCLUDE output.conf input-kubernetes.conf: | @@ -145,6 +146,19 @@ data: Nest_under involvedObject Remove_prefix involvedObject. + filter-stackdriver-format.conf: | + [FILTER] + Name modify + Match k8s_container.* + Condition Key_exists log + Rename log message + + [FILTER] + Name modify + Match k8s_container.* + Condition Key_exists levelname + Rename levelname level + output.conf: | {% if config['provider'] == "aws" %} [OUTPUT] @@ -163,6 +177,8 @@ data: resource k8s_container k8s_cluster_name {{ config["cluster_name"] }} k8s_cluster_location {{ config["zone"] }} + severity_key level + labels_key labels {% endif %} parsers.conf: | From 598b6c6446b9b3ba28bff5628b7beec9a2950392 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 23 Feb 2021 16:04:44 +0100 Subject: [PATCH 04/12] Update GCP logging docs --- docs/clusters/gcp/logging.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/clusters/gcp/logging.md b/docs/clusters/gcp/logging.md index 065b2bdd0f..75d0f970eb 100644 --- a/docs/clusters/gcp/logging.md +++ b/docs/clusters/gcp/logging.md @@ -7,8 +7,8 @@ RealtimeAPI: ```text resource.type="k8s_container" resource.labels.cluster_name="" -jsonPayload.labels.apiKind="RealtimeAPI" -jsonPayload.labels.apiName="" +labels.apiKind="RealtimeAPI" +labels.apiName="" ``` TaskAPI: @@ -16,9 +16,9 @@ TaskAPI: ```text resource.type="k8s_container" resource.labels.cluster_name="" -jsonPayload.labels.apiKind="TaskAPI" -jsonPayload.labels.apiName="" -jsonPayload.labels.jobID="" +labels.apiKind="TaskAPI" +labels.apiName="" +labels.jobID="" ``` Please make sure to navigate to the project containing your cluster and adjust the time range accordingly before running queries. From 7c8e051e54b694a94e20a6c75f27d5a8257307a1 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 23 Feb 2021 16:17:58 +0100 Subject: [PATCH 05/12] Add manifests to helm chart --- charts/templates/event-exporter.yaml | 82 ++++++++++++++++++++++++++++ charts/templates/fluentbit.yaml | 45 +++++++++++++++ charts/values.yaml | 1 + 3 files changed, 128 insertions(+) create mode 100644 charts/templates/event-exporter.yaml diff --git a/charts/templates/event-exporter.yaml b/charts/templates/event-exporter.yaml new file mode 100644 index 0000000000..b444a077b8 --- /dev/null +++ b/charts/templates/event-exporter.yaml @@ -0,0 +1,82 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: {{ .Release.Namespace }} + name: event-exporter + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: event-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view +subjects: + - kind: ServiceAccount + namespace: {{ .Release.Namespace }} + name: event-exporter + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: event-exporter-config + namespace: {{ .Release.Namespace }} +data: + config.yaml: | + logLevel: error + logFormat: json + route: + routes: + - match: + - receiver: "stdout" + labels: + apiKind: RealtimeAPI + - match: + - receiver: "stdout" + labels: + apiKind: BatchAPI + - match: + - receiver: "stdout" + labels: + apiKind: TaskAPI + receivers: + - name: "stdout" + file: + path: "/dev/stdout" + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: event-exporter + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: event-exporter + template: + metadata: + labels: + app: event-exporter + spec: + serviceAccountName: event-exporter + containers: + - name: event-exporter + image: {{ .Values.cortex.image_event_exporter }} + imagePullPolicy: IfNotPresent + args: + - -conf=/data/config.yaml + volumeMounts: + - mountPath: /data + name: event-exporter-config + volumes: + - name: event-exporter-config + configMap: + name: event-exporter-config diff --git a/charts/templates/fluentbit.yaml b/charts/templates/fluentbit.yaml index d7326477b7..89b75efaeb 100644 --- a/charts/templates/fluentbit.yaml +++ b/charts/templates/fluentbit.yaml @@ -52,6 +52,8 @@ data: @INCLUDE input-kubernetes.conf @INCLUDE filter-kubernetes.conf + @INCLUDE filter-k8s-events.conf + @INCLUDE filter-stackdriver-format.conf @INCLUDE output.conf input-kubernetes.conf: | @@ -104,6 +106,47 @@ data: Match k8s_container.* Remove_wildcard k8s. + filter-k8s-events.conf: | + [FILTER] + Name nest + Match k8s_container.*.event-exporter-* + Operation lift + Nested_under involvedObject + Add_prefix involvedObject. + + [FILTER] + Name modify + Match k8s_container.*.event-exporter-* + Condition Key_exists labels + Rename labels k8s.labels + + [FILTER] + Name modify + Match k8s_container.*.event-exporter-* + Condition Key_exists involvedObject.labels + Hard_copy involvedObject.labels labels + + [FILTER] + Name nest + Match k8s_container.*.event-exporter-* + Operation nest + Wildcard involvedObject.* + Nest_under involvedObject + Remove_prefix involvedObject. + + filter-stackdriver-format.conf: | + [FILTER] + Name modify + Match k8s_container.* + Condition Key_exists log + Rename log message + + [FILTER] + Name modify + Match k8s_container.* + Condition Key_exists levelname + Rename levelname level + output.conf: | {{- if eq .Values.global.provider "aws" }} [OUTPUT] @@ -122,6 +165,8 @@ data: resource k8s_container k8s_cluster_name {{ .Values.cortex.cluster_name }} k8s_cluster_location {{ .Values.cortex.zone }} + severity_key level + labels_key labels {{- end }} parsers.conf: | diff --git a/charts/values.yaml b/charts/values.yaml index 44838cf46d..6a88bd9aba 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -29,6 +29,7 @@ cortex: image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master image_grafana: quay.io/cortexlabs/grafana:master + image_event_exporter: quay.io/cortexlabs/event-exporter:master networking: istio-discovery: From ae2bca975a321c1dad4413440232022418747892 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 24 Feb 2021 14:49:42 +0100 Subject: [PATCH 06/12] Add event exporter images --- build/images.sh | 3 ++- images/event-exporter/Dockerfile | 1 + manager/manifests/event-exporter.yaml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 images/event-exporter/Dockerfile diff --git a/build/images.sh b/build/images.sh index 53b6b4d3cd..ac9c2835a7 100644 --- a/build/images.sh +++ b/build/images.sh @@ -35,6 +35,7 @@ api_images_gcp=( ) dev_images_cluster=( + "operator" "downloader" "manager" "request-monitor" @@ -50,7 +51,6 @@ non_dev_images_cluster=( "tensorflow-serving-cpu" "tensorflow-serving-gpu" "cluster-autoscaler" - "operator" "istio-proxy" "istio-pilot" "fluent-bit" @@ -59,6 +59,7 @@ non_dev_images_cluster=( "prometheus-operator" "prometheus-statsd-exporter" "grafana" + "event-exporter" ) non_dev_images_aws=( # includes non_dev_images_cluster diff --git a/images/event-exporter/Dockerfile b/images/event-exporter/Dockerfile new file mode 100644 index 0000000000..283108e99f --- /dev/null +++ b/images/event-exporter/Dockerfile @@ -0,0 +1 @@ +FROM opsgenie/kubernetes-event-exporter:0.9 diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml index 817418f9d8..9773b4ff12 100644 --- a/manager/manifests/event-exporter.yaml +++ b/manager/manifests/event-exporter.yaml @@ -69,7 +69,7 @@ spec: serviceAccountName: event-exporter containers: - name: event-exporter - image: opsgenie/kubernetes-event-exporter:0.9 # TODO: replace + image: $CORTEX_IMAGE_EVENT_EXPORTER imagePullPolicy: IfNotPresent args: - -conf=/data/config.yaml From b2c6e5f2838ead6ab9e8c20408a1a6aed2c72164 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 24 Feb 2021 14:50:40 +0100 Subject: [PATCH 07/12] Add installation code for event exporter --- CONTRIBUTING.md | 2 ++ cli/cmd/lib_cluster_config_aws.go | 4 ++++ docs/clusters/aws/install.md | 1 + docs/clusters/gcp/install.md | 1 + manager/install.sh | 4 ++-- pkg/types/clusterconfig/cluster_config_aws.go | 12 ++++++++++++ pkg/types/clusterconfig/cluster_config_gcp.go | 12 ++++++++++++ pkg/types/clusterconfig/config_key.go | 2 ++ 8 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e2ff736566..97d72f40ad 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -194,6 +194,7 @@ image_prometheus_config_reloader: .dkr.ecr..amazonaws.com/co image_prometheus_operator: .dkr.ecr..amazonaws.com/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: .dkr.ecr..amazonaws.com/cortexlabs/prometheus-statsd-exporter:master image_grafana: .dkr.ecr..amazonaws.com/cortexlabs/grafana:master +image_event_exporter: .dkr.ecr..amazonaws.com/cortexlabs/event-exporter:master ``` Create `dev/config/cluster-gcp.yaml`. Paste the following config, and update `project`, `zone`, and all registry URLs (replace `` with your project ID, and update `gcr.io` if you are using a different host): @@ -222,6 +223,7 @@ image_prometheus_config_reloader: gcr.io//cortexlabs/prometheus-conf image_prometheus_operator: gcr.io//cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: gcr.io//cortexlabs/prometheus-statsd-exporter:master image_grafana: gcr.io//cortexlabs/grafana:master +image_event_exporter: gcr.io//cortexlabs/event-exporter:master ``` ### Building diff --git a/cli/cmd/lib_cluster_config_aws.go b/cli/cmd/lib_cluster_config_aws.go index db42c3c41a..3975dab6fa 100644 --- a/cli/cmd/lib_cluster_config_aws.go +++ b/cli/cmd/lib_cluster_config_aws.go @@ -428,6 +428,10 @@ func setConfigFieldsFromCached(userClusterConfig *clusterconfig.Config, cachedCl return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.ImageGrafanaKey, cachedClusterConfig.ImageGrafana) } + if s.Obj(cachedClusterConfig.ImageEventExporter) != s.Obj(userClusterConfig.ImageEventExporter) { + return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.ImageEventExporterKey, cachedClusterConfig.ImageEventExporter) + } + if userClusterConfig.Spot != nil && *userClusterConfig.Spot != *cachedClusterConfig.Spot { return clusterconfig.ErrorConfigCannotBeChangedOnUpdate(clusterconfig.SpotKey, *cachedClusterConfig.Spot) } diff --git a/docs/clusters/aws/install.md b/docs/clusters/aws/install.md index e1e5d78a37..b3a76cca4f 100644 --- a/docs/clusters/aws/install.md +++ b/docs/clusters/aws/install.md @@ -109,4 +109,5 @@ image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader: image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master image_grafana: quay.io/cortexlabs/grafana:master +image_event_exporter: quay.io/cortexlabs/event-exporter:master ``` diff --git a/docs/clusters/gcp/install.md b/docs/clusters/gcp/install.md index 2f39506c7b..4927d64971 100644 --- a/docs/clusters/gcp/install.md +++ b/docs/clusters/gcp/install.md @@ -83,4 +83,5 @@ image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader: image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master image_grafana: quay.io/cortexlabs/grafana:master +image_event_exporter: quay.io/cortexlabs/event-exporter:master ``` diff --git a/manager/install.sh b/manager/install.sh index 000749e734..992f26c5ce 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -61,8 +61,8 @@ function cluster_up_aws() { echo "✓" echo -n "○ configuring logging " - python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/fluent-bit.yaml.j2 > /workspace/fluent-bit.yaml - kubectl apply -f /workspace/fluent-bit.yaml >/dev/null + python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/fluent-bit.yaml.j2 | kubectl apply -f - >/dev/null + envsubst < manifests/event-exporter.yaml | kubectl apply -f - >/dev/null echo "✓" echo -n "○ configuring metrics " diff --git a/pkg/types/clusterconfig/cluster_config_aws.go b/pkg/types/clusterconfig/cluster_config_aws.go index dd394b6d5d..fa87a122b6 100644 --- a/pkg/types/clusterconfig/cluster_config_aws.go +++ b/pkg/types/clusterconfig/cluster_config_aws.go @@ -81,6 +81,7 @@ type CoreConfig struct { ImagePrometheusOperator string `json:"image_prometheus_operator" yaml:"image_prometheus_operator"` ImagePrometheusStatsDExporter string `json:"image_prometheus_statsd_exporter" yaml:"image_prometheus_statsd_exporter"` ImageGrafana string `json:"image_grafana" yaml:"image_grafana"` + ImageEventExporter string `json:"image_event_exporter" yaml:"image_event_exporter"` } type ManagedConfig struct { @@ -338,6 +339,13 @@ var CoreConfigStructFieldValidations = []*cr.StructFieldValidation{ Validator: validateImageVersion, }, }, + { + StructField: "ImageEventExporter", + StringValidation: &cr.StringValidation{ + Default: "quay.io/cortexlabs/event-exporter:" + consts.CortexVersion, + Validator: validateImageVersion, + }, + }, } var ManagedConfigStructFieldValidations = []*cr.StructFieldValidation{ @@ -1284,6 +1292,7 @@ func (cc *CoreConfig) UserTable() table.KeyValuePairs { items.Add(ImagePrometheusOperatorUserKey, cc.ImagePrometheusOperator) items.Add(ImagePrometheusStatsDExporterUserKey, cc.ImagePrometheusStatsDExporter) items.Add(ImageGrafanaUserKey, cc.ImageGrafana) + items.Add(ImageEventExporterUserKey, cc.ImageEventExporter) return items } @@ -1418,6 +1427,9 @@ func (cc *CoreConfig) TelemetryEvent() map[string]interface{} { if strings.HasPrefix(cc.ImageGrafana, "cortexlabs/") { event["image_grafana._is_custom"] = true } + if strings.HasPrefix(cc.ImageEventExporter, "cortexlabs/") { + event["image_event_exporter._is_custom"] = true + } return event } diff --git a/pkg/types/clusterconfig/cluster_config_gcp.go b/pkg/types/clusterconfig/cluster_config_gcp.go index a01b2ac334..05ed5dae0f 100644 --- a/pkg/types/clusterconfig/cluster_config_gcp.go +++ b/pkg/types/clusterconfig/cluster_config_gcp.go @@ -58,6 +58,7 @@ type GCPCoreConfig struct { ImagePrometheusOperator string `json:"image_prometheus_operator" yaml:"image_prometheus_operator"` ImagePrometheusStatsDExporter string `json:"image_prometheus_statsd_exporter" yaml:"image_prometheus_statsd_exporter"` ImageGrafana string `json:"image_grafana" yaml:"image_grafana"` + ImageEventExporter string `json:"image_event_exporter" yaml:"image_event_exporter"` } type GCPManagedConfig struct { @@ -245,6 +246,13 @@ var GCPCoreConfigStructFieldValidations = []*cr.StructFieldValidation{ Validator: validateImageVersion, }, }, + { + StructField: "ImageEventExporter", + StringValidation: &cr.StringValidation{ + Default: "quay.io/cortexlabs/event-exporter:" + consts.CortexVersion, + Validator: validateImageVersion, + }, + }, { StructField: "Telemetry", BoolValidation: &cr.BoolValidation{ @@ -674,6 +682,7 @@ func (cc *GCPCoreConfig) UserTable() table.KeyValuePairs { items.Add(ImagePrometheusOperatorUserKey, cc.ImagePrometheusOperator) items.Add(ImagePrometheusStatsDExporterUserKey, cc.ImagePrometheusStatsDExporter) items.Add(ImageGrafanaUserKey, cc.ImageGrafana) + items.Add(ImageEventExporterUserKey, cc.ImageEventExporter) return items } @@ -781,6 +790,9 @@ func (cc *GCPCoreConfig) TelemetryEvent() map[string]interface{} { if strings.HasPrefix(cc.ImageGrafana, "cortexlabs/") { event["image_grafana._is_custom"] = true } + if strings.HasPrefix(cc.ImageEventExporter, "cortexlabs/") { + event["image_event_exporter._is_custom"] = true + } return event } diff --git a/pkg/types/clusterconfig/config_key.go b/pkg/types/clusterconfig/config_key.go index 434fc7623b..cb98821581 100644 --- a/pkg/types/clusterconfig/config_key.go +++ b/pkg/types/clusterconfig/config_key.go @@ -75,6 +75,7 @@ const ( ImagePrometheusOperatorKey = "image_prometheus_operator" ImagePrometheusStatsDExporterKey = "image_prometheus_statsd_exporter" ImageGrafanaKey = "image_grafana" + ImageEventExporterKey = "image_event_exporter" // User facing string ProviderUserKey = "provider" @@ -134,4 +135,5 @@ const ( ImagePrometheusOperatorUserKey = "prometheus operator image" ImagePrometheusStatsDExporterUserKey = "prometheus statsd exporter image" ImageGrafanaUserKey = "grafana image" + ImageEventExporterUserKey = "event exporter image" ) From 3c8f16a3fa29cb8aab65caa5071591906a18b115 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 24 Feb 2021 15:08:33 +0100 Subject: [PATCH 08/12] Add missing license --- manager/manifests/event-exporter.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml index 9773b4ff12..f58a8fc162 100644 --- a/manager/manifests/event-exporter.yaml +++ b/manager/manifests/event-exporter.yaml @@ -1,3 +1,17 @@ +# Copyright 2021 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v1 kind: ServiceAccount metadata: From 0bdc7d6dfebfe2cea0533129d80177fcf8ccf967 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 24 Feb 2021 15:51:59 +0100 Subject: [PATCH 09/12] Fix install on GCP --- manager/install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manager/install.sh b/manager/install.sh index 992f26c5ce..c6f744fc27 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -120,8 +120,8 @@ function cluster_up_gcp() { echo "✓" echo -n "○ configuring logging " - python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/fluent-bit.yaml.j2 > /workspace/fluent-bit.yaml - kubectl apply -f /workspace/fluent-bit.yaml >/dev/null + python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/fluent-bit.yaml.j2 | kubectl apply -f - >/dev/null + envsubst < manifests/event-exporter.yaml | kubectl apply -f - >/dev/null echo "✓" echo -n "○ configuring metrics " From 47f51d4da28dfe6a7cefd5305aef71ddfa8a9bd1 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 25 Feb 2021 15:32:35 +0100 Subject: [PATCH 10/12] Revert moving operator image to dev images in images.sh --- build/images.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/images.sh b/build/images.sh index ac9c2835a7..480562708a 100644 --- a/build/images.sh +++ b/build/images.sh @@ -35,7 +35,6 @@ api_images_gcp=( ) dev_images_cluster=( - "operator" "downloader" "manager" "request-monitor" @@ -51,6 +50,7 @@ non_dev_images_cluster=( "tensorflow-serving-cpu" "tensorflow-serving-gpu" "cluster-autoscaler" + "operator" "istio-proxy" "istio-pilot" "fluent-bit" From 46ec69674216cc6d123497a4c19ad2b5aed467af Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 25 Feb 2021 15:50:29 +0100 Subject: [PATCH 11/12] Collect events based on a single label --- charts/templates/event-exporter.yaml | 10 +-- manager/manifests/event-exporter.yaml | 10 +-- .../resources/job/batchapi/k8s_specs.go | 77 +++++++++-------- .../resources/job/taskapi/k8s_specs.go | 33 ++++---- .../resources/realtimeapi/k8s_specs.go | 84 ++++++++++--------- .../resources/trafficsplitter/k8s_specs.go | 9 +- 6 files changed, 113 insertions(+), 110 deletions(-) diff --git a/charts/templates/event-exporter.yaml b/charts/templates/event-exporter.yaml index b444a077b8..1203e03f18 100644 --- a/charts/templates/event-exporter.yaml +++ b/charts/templates/event-exporter.yaml @@ -35,15 +35,7 @@ data: - match: - receiver: "stdout" labels: - apiKind: RealtimeAPI - - match: - - receiver: "stdout" - labels: - apiKind: BatchAPI - - match: - - receiver: "stdout" - labels: - apiKind: TaskAPI + cortex.dev/api: true receivers: - name: "stdout" file: diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml index f58a8fc162..ae0957d9aa 100644 --- a/manager/manifests/event-exporter.yaml +++ b/manager/manifests/event-exporter.yaml @@ -49,15 +49,7 @@ data: - match: - receiver: "stdout" labels: - apiKind: RealtimeAPI - - match: - - receiver: "stdout" - labels: - apiKind: BatchAPI - - match: - - receiver: "stdout" - labels: - apiKind: TaskAPI + cortex.dev/api: true receivers: - name: "stdout" file: diff --git a/pkg/operator/resources/job/batchapi/k8s_specs.go b/pkg/operator/resources/job/batchapi/k8s_specs.go index 1782d0cbbb..a8a7ea41b7 100644 --- a/pkg/operator/resources/job/batchapi/k8s_specs.go +++ b/pkg/operator/resources/job/batchapi/k8s_specs.go @@ -63,19 +63,21 @@ func pythonPredictorJobSpec(api *spec.API, job *spec.BatchJob) (*kbatch.Job, err Name: job.JobKey.K8sName(), Parallelism: int32(job.Workers), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -110,19 +112,21 @@ func tensorFlowPredictorJobSpec(api *spec.API, job *spec.BatchJob) (*kbatch.Job, Name: job.JobKey.K8sName(), Parallelism: int32(job.Workers), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -158,19 +162,21 @@ func onnxPredictorJobSpec(api *spec.API, job *spec.BatchJob) (*kbatch.Job, error Name: job.JobKey.K8sName(), Parallelism: int32(job.Workers), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -203,11 +209,12 @@ func virtualServiceSpec(api *spec.API) *istioclientnetworking.VirtualService { Rewrite: pointer.String(path.Join("batch", api.Name)), Annotations: api.ToK8sAnnotations(), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, }) } diff --git a/pkg/operator/resources/job/taskapi/k8s_specs.go b/pkg/operator/resources/job/taskapi/k8s_specs.go index 0e64a3d760..83521eb5e8 100644 --- a/pkg/operator/resources/job/taskapi/k8s_specs.go +++ b/pkg/operator/resources/job/taskapi/k8s_specs.go @@ -48,11 +48,12 @@ func virtualServiceSpec(api *spec.API) *istioclientnetworking.VirtualService { Rewrite: pointer.String(path.Join("tasks", api.Name)), Annotations: api.ToK8sAnnotations(), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, }) } @@ -82,19 +83,21 @@ func k8sJobSpec(api *spec.API, job *spec.TaskJob) (*kbatch.Job, error) { Name: job.JobKey.K8sName(), Parallelism: int32(job.Workers), Labels: map[string]string{ - "apiName": api.Name, - "apiID": api.ID, - "specID": api.SpecID, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiID": api.ID, + "specID": api.SpecID, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "predictorID": api.PredictorID, - "jobID": job.ID, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "predictorID": api.PredictorID, + "jobID": job.ID, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", diff --git a/pkg/operator/resources/realtimeapi/k8s_specs.go b/pkg/operator/resources/realtimeapi/k8s_specs.go index e9ec4fa801..141825d345 100644 --- a/pkg/operator/resources/realtimeapi/k8s_specs.go +++ b/pkg/operator/resources/realtimeapi/k8s_specs.go @@ -52,12 +52,13 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D MaxSurge: pointer.String(api.UpdateStrategy.MaxSurge), MaxUnavailable: pointer.String(api.UpdateStrategy.MaxUnavailable), Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "apiID": api.ID, - "specID": api.SpecID, - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "apiID": api.ID, + "specID": api.SpecID, + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: api.ToK8sAnnotations(), Selector: map[string]string{ @@ -66,10 +67,11 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -100,12 +102,13 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo MaxSurge: pointer.String(api.UpdateStrategy.MaxSurge), MaxUnavailable: pointer.String(api.UpdateStrategy.MaxUnavailable), Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "apiID": api.ID, - "specID": api.SpecID, - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "apiID": api.ID, + "specID": api.SpecID, + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: api.ToK8sAnnotations(), Selector: map[string]string{ @@ -114,10 +117,11 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -148,12 +152,13 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym MaxSurge: pointer.String(api.UpdateStrategy.MaxSurge), MaxUnavailable: pointer.String(api.UpdateStrategy.MaxUnavailable), Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "apiID": api.ID, - "specID": api.SpecID, - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "apiID": api.ID, + "specID": api.SpecID, + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: api.ToK8sAnnotations(), Selector: map[string]string{ @@ -162,10 +167,11 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym }, PodSpec: k8s.PodSpec{ Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, Annotations: map[string]string{ "traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0", @@ -192,8 +198,9 @@ func serviceSpec(api *spec.API) *kcore.Service { TargetPort: operator.DefaultPortInt32, Annotations: api.ToK8sAnnotations(), Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), + "apiName": api.Name, + "apiKind": api.Kind.String(), + "cortex.dev/api": "true", }, Selector: map[string]string{ "apiName": api.Name, @@ -215,12 +222,13 @@ func virtualServiceSpec(api *spec.API) *istioclientnetworking.VirtualService { Rewrite: pointer.String("predict"), Annotations: api.ToK8sAnnotations(), Labels: map[string]string{ - "apiName": api.Name, - "apiKind": api.Kind.String(), - "apiID": api.ID, - "specID": api.SpecID, - "deploymentID": api.DeploymentID, - "predictorID": api.PredictorID, + "apiName": api.Name, + "apiKind": api.Kind.String(), + "apiID": api.ID, + "specID": api.SpecID, + "deploymentID": api.DeploymentID, + "predictorID": api.PredictorID, + "cortex.dev/api": "true", }, }) } diff --git a/pkg/operator/resources/trafficsplitter/k8s_specs.go b/pkg/operator/resources/trafficsplitter/k8s_specs.go index a0070268bd..224206ebef 100644 --- a/pkg/operator/resources/trafficsplitter/k8s_specs.go +++ b/pkg/operator/resources/trafficsplitter/k8s_specs.go @@ -37,10 +37,11 @@ func virtualServiceSpec(trafficSplitter *spec.API) *istioclientnetworking.Virtua Rewrite: pointer.String("predict"), Annotations: trafficSplitter.ToK8sAnnotations(), Labels: map[string]string{ - "apiName": trafficSplitter.Name, - "apiKind": trafficSplitter.Kind.String(), - "apiID": trafficSplitter.ID, - "specID": trafficSplitter.SpecID, + "apiName": trafficSplitter.Name, + "apiKind": trafficSplitter.Kind.String(), + "apiID": trafficSplitter.ID, + "specID": trafficSplitter.SpecID, + "cortex.dev/api": "true", }, }) } From 062c0291a6aa562d0a219b783931c6c90eeaa05f Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 25 Feb 2021 16:02:57 +0100 Subject: [PATCH 12/12] Add note to versions.md --- dev/versions.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev/versions.md b/dev/versions.md index 7c7a0098ff..e74b823587 100644 --- a/dev/versions.md +++ b/dev/versions.md @@ -330,6 +330,13 @@ supported () 1. Update the base image version in `images/grafana/Dockerfile`. 1. Update `grafana.yaml` as necessary, if that's the case. +## Event Exporter + +1. Find the latest release + on [GitHub](https://github.com/opsgenie/kubernetes-event-exporter). +1. Update the base image version in `images/event-exporter/Dockerfile`. +1. Update `event-exporter.yaml` as necessary, if that's the case. + ## aws-iam-authenticator 1. Find the latest release [here](https://docs.aws.amazon.com/eks/latest/userguide/install-aws-iam-authenticator.html)