diff --git a/Makefile b/Makefile index 7a2cbb019d..6e284b333f 100644 --- a/Makefile +++ b/Makefile @@ -24,13 +24,13 @@ devstart: @./dev/operator_local.sh || true oinstall: - @./cortex-installer.sh -c=./dev/config/cortex.sh install operator + @./cortex.sh -c=./dev/config/cortex.sh install operator oupdate: - @./cortex-installer.sh -c=./dev/config/cortex.sh update operator + @./cortex.sh -c=./dev/config/cortex.sh update operator ouninstall: - @./cortex-installer.sh -c=./dev/config/cortex.sh uninstall operator + @./cortex.sh -c=./dev/config/cortex.sh uninstall operator ostop: @kubectl -n=cortex delete --ignore-not-found=true deployment operator @@ -145,6 +145,7 @@ ci-build-images: @./build/build-image.sh images/argo-controller argo-controller @./build/build-image.sh images/argo-executor argo-executor @./build/build-image.sh images/python-packager python-packager + @./build/build-image.sh images/manager manager ci-push-images: @./build/push-image.sh spark @@ -161,6 +162,7 @@ ci-push-images: @./build/push-image.sh argo-controller @./build/push-image.sh argo-executor @./build/push-image.sh python-packager + @./build/push-image.sh manager ci-build-cli: @./build/cli.sh diff --git a/cortex-installer.sh b/cortex-installer.sh deleted file mode 100755 index 630e2b0b91..0000000000 --- a/cortex-installer.sh +++ /dev/null @@ -1,2042 +0,0 @@ -#!/bin/bash - -# Copyright 2019 Cortex Labs, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -############ -### HELP ### -############ - -CORTEX_SH_TMP_DIR="$HOME/.cortex-sh-tmp" - -function show_help() { - echo " -Usage: - ./cortex-installer.sh command [sub-command] [flags] - -Available Commands: - install operator install the operator (and the AWS CLI if necessary) - install cli install the CLI - install kubernetes-tools install aws-iam-authenticator, eksctl, kubectl - - uninstall operator uninstall the operator - uninstall cli uninstall the CLI - uninstall kubernetes-tools uninstall aws-iam-authenticator, eksctl, kubectl - - update operator update the operator config and restart the operator - - get endpoints show the operator and API endpoints - -Flags: - -c, --config path to a cortex config file - -h, --help -" -} - -#################### -### FLAG PARSING ### -#################### - -flag_help=false -positional_args=() - -while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -c|--config) - export CORTEX_CONFIG="$2" - shift - shift - ;; - -h|--help) - flag_help="true" - shift - ;; - *) - positional_args+=("$1") - shift - ;; - esac -done - -set -- "${positional_args[@]}" -positional_args=() -for i in "$@"; do - case $i in - -c=*|--config=*) - export CORTEX_CONFIG="${i#*=}" - shift - ;; - -h=*|--help=*) - flag_help="true" - ;; - *) - positional_args+=("$1") - shift - ;; - esac -done - -set -- "${positional_args[@]}" -if [ "$flag_help" == "true" ]; then - show_help - exit 0 -fi - -for arg in "$@"; do - if [[ "$arg" == -* ]]; then - echo "unknown flag: $arg" - show_help - exit 1 - fi -done - -##################### -### CONFIGURATION ### -##################### - -if [ "$CORTEX_CONFIG" != "" ] && [ -f "$CORTEX_CONFIG" ]; then - source $CORTEX_CONFIG -fi - -set -u - -export CORTEX_VERSION_STABLE=master - -# Defaults -random_id=$(cat /dev/urandom | LC_CTYPE=C tr -dc 'a-z0-9' | fold -w 12 | head -n 1) - -export CORTEX_LOG_GROUP="${CORTEX_LOG_GROUP:-cortex}" -export CORTEX_BUCKET="${CORTEX_BUCKET:-cortex-$random_id}" -export CORTEX_REGION="${CORTEX_REGION:-us-west-2}" -export CORTEX_NAMESPACE="${CORTEX_NAMESPACE:-cortex}" - -export CORTEX_IMAGE_ARGO_CONTROLLER="${CORTEX_IMAGE_ARGO_CONTROLLER:-cortexlabs/argo-controller:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_ARGO_EXECUTOR="${CORTEX_IMAGE_ARGO_EXECUTOR:-cortexlabs/argo-executor:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_FLUENTD="${CORTEX_IMAGE_FLUENTD:-cortexlabs/fluentd:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_NGINX_BACKEND="${CORTEX_IMAGE_NGINX_BACKEND:-cortexlabs/nginx-backend:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_NGINX_CONTROLLER="${CORTEX_IMAGE_NGINX_CONTROLLER:-cortexlabs/nginx-controller:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_OPERATOR="${CORTEX_IMAGE_OPERATOR:-cortexlabs/operator:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_SPARK="${CORTEX_IMAGE_SPARK:-cortexlabs/spark:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_SPARK_OPERATOR="${CORTEX_IMAGE_SPARK_OPERATOR:-cortexlabs/spark-operator:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_TF_SERVE="${CORTEX_IMAGE_TF_SERVE:-cortexlabs/tf-serve:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_TF_TRAIN="${CORTEX_IMAGE_TF_TRAIN:-cortexlabs/tf-train:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}" -export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}" - -export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:-""}" -export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:-""}" -export CORTEX_ENABLE_TELEMETRY=${CORTEX_ENABLE_TELEMETRY:-""} - -################ -### CHECK OS ### -################ - -case "$OSTYPE" in - darwin*) PARSED_OS="darwin" ;; - linux*) PARSED_OS="linux" ;; - *) echo -e "\nerror: only mac and linux are supported"; exit 1 ;; -esac - -########################## -### TOP-LEVEL COMMANDS ### -########################## - -function install_operator() { - check_dep_curl - check_dep_aws - check_dep_kubectl - - setup_bucket - setup_cloudwatch_logs - - prompt_for_telemetry - - echo -e "\nInstalling the Cortex operator ..." - - setup_namespace - setup_configmap - setup_secrets - setup_spark - setup_argo - setup_nginx - setup_fluentd - setup_operator - - validate_cortex -} - -function install_cli() { - install_cortex_cli -} - -function install_kubernetes_tools() { - install_aws_iam_authenticator - install_eksctl - install_kubectl -} - -function uninstall_operator() { - check_dep_kubectl - - echo - if kubectl get namespace $CORTEX_NAMESPACE >/dev/null 2>&1 || kubectl get customresourcedefinition sparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 || kubectl get customresourcedefinition scheduledsparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 || kubectl get customresourcedefinition workflows.argoproj.io >/dev/null 2>&1; then - echo "Uninstalling the Cortex operator from your Kubernetes cluster ..." - - # Remove finalizers on sparkapplications (they sometimes create deadlocks) - if kubectl get namespace $CORTEX_NAMESPACE >/dev/null 2>&1 && kubectl get customresourcedefinition sparkapplications.sparkoperator.k8s.io >/dev/null 2>&1; then - set +e - kubectl -n=$CORTEX_NAMESPACE get sparkapplications.sparkoperator.k8s.io -o name | xargs -L1 \ - kubectl -n=$CORTEX_NAMESPACE patch -p '{"metadata":{"finalizers": []}}' --type=merge >/dev/null 2>&1 - set -e - fi - - kubectl delete --ignore-not-found=true customresourcedefinition scheduledsparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 - kubectl delete --ignore-not-found=true customresourcedefinition sparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 - kubectl delete --ignore-not-found=true customresourcedefinition workflows.argoproj.io >/dev/null 2>&1 - kubectl delete --ignore-not-found=true namespace $CORTEX_NAMESPACE >/dev/null 2>&1 - echo "✓ Uninstalled the Cortex operator" - else - echo "The Cortex operator is not installed on your Kubernetes cluster" - fi -} - -function uninstall_cli() { - uninstall_cortex_cli -} - -function uninstall_kubernetes_tools() { - uninstall_kubectl - uninstall_eksctl - uninstall_aws_iam_authenticator -} - -# Note: if namespace is changed, the old namespace will not be deleted -function update_operator() { - check_dep_curl - check_dep_aws - check_dep_kubectl - - kubectl -n=$CORTEX_NAMESPACE delete --ignore-not-found=true deployment operator >/dev/null 2>&1 - kubectl -n=$CORTEX_NAMESPACE delete --ignore-not-found=true daemonset fluentd >/dev/null 2>&1 # Pods in DaemonSets cannot be modified - install_operator -} - -function get_endpoints() { - check_dep_kubectl - - operator_endpoint=$(get_operator_endpoint) - apis_endpoint=$(get_apis_endpoint) - echo - echo "Operator endpoint: $operator_endpoint" - echo "APIs endpoint: $apis_endpoint" -} - -################# -### AWS SETUP ### -################# - -function setup_bucket() { - if ! aws s3api head-bucket --bucket $CORTEX_BUCKET --output json 2>/dev/null; then - if aws s3 ls "s3://$CORTEX_BUCKET" --output json 2>&1 | grep -q 'NoSuchBucket'; then - echo -e "\nCreating S3 bucket: $CORTEX_BUCKET" - aws s3api create-bucket --bucket $CORTEX_BUCKET \ - --region $CORTEX_REGION \ - --create-bucket-configuration LocationConstraint=$CORTEX_REGION \ - >/dev/null - else - echo -e "\nA bucket named \"${CORTEX_BUCKET}\" already exists, but you do not have access to it" - exit 1 - fi - else - echo -e "\nUsing existing S3 bucket: $CORTEX_BUCKET" - fi -} - -function setup_cloudwatch_logs() { - if ! aws logs list-tags-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION --output json 2>&1 | grep -q "\"tags\":"; then - echo -e "\nCreating CloudWatch log group: $CORTEX_LOG_GROUP" - aws logs create-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION - else - echo -e "\nUsing existing CloudWatch log group: $CORTEX_LOG_GROUP" - fi -} - -####################### -### NAMESPACE SETUP ### -####################### - -function setup_namespace() { - echo " -apiVersion: v1 -kind: Namespace -metadata: - name: ${CORTEX_NAMESPACE} -" | kubectl apply -f - >/dev/null -} - -####################### -### CONFIGMAP SETUP ### -####################### - -function setup_configmap() { - kubectl -n=$CORTEX_NAMESPACE create configmap 'cortex-config' \ - --from-literal='LOG_GROUP'=$CORTEX_LOG_GROUP \ - --from-literal='BUCKET'=$CORTEX_BUCKET \ - --from-literal='REGION'=$CORTEX_REGION \ - --from-literal='NAMESPACE'=$CORTEX_NAMESPACE \ - --from-literal='IMAGE_OPERATOR'=$CORTEX_IMAGE_OPERATOR \ - --from-literal='IMAGE_SPARK'=$CORTEX_IMAGE_SPARK \ - --from-literal='IMAGE_TF_TRAIN'=$CORTEX_IMAGE_TF_TRAIN \ - --from-literal='IMAGE_TF_SERVE'=$CORTEX_IMAGE_TF_SERVE \ - --from-literal='IMAGE_TF_API'=$CORTEX_IMAGE_TF_API \ - --from-literal='IMAGE_PYTHON_PACKAGER'=$CORTEX_IMAGE_PYTHON_PACKAGER \ - --from-literal='IMAGE_TF_TRAIN_GPU'=$CORTEX_IMAGE_TF_TRAIN_GPU \ - --from-literal='IMAGE_TF_SERVE_GPU'=$CORTEX_IMAGE_TF_SERVE_GPU \ - --from-literal='ENABLE_TELEMETRY'=$CORTEX_ENABLE_TELEMETRY \ - -o yaml --dry-run | kubectl apply -f - >/dev/null -} - -####################### -### SECRETS SETUP ### -####################### - -function setup_secrets() { - kubectl -n=$CORTEX_NAMESPACE create secret generic 'aws-credentials' \ - --from-literal='AWS_ACCESS_KEY_ID'=$AWS_ACCESS_KEY_ID \ - --from-literal='AWS_SECRET_ACCESS_KEY'=$AWS_SECRET_ACCESS_KEY \ - -o yaml --dry-run | kubectl apply -f - >/dev/null -} - -################## -### ARGO SETUP ### -################## - -function setup_argo() { - echo " -apiVersion: v1 -kind: ServiceAccount -metadata: - name: argo-executor - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: argo-executor - namespace: ${CORTEX_NAMESPACE} -subjects: -- kind: ServiceAccount - name: argo-executor - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: workflows.argoproj.io - namespace: ${CORTEX_NAMESPACE} -spec: - group: argoproj.io - names: - kind: Workflow - plural: workflows - shortNames: - - wf - scope: Namespaced - version: v1alpha1 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: argo-controller - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: argo-controller - namespace: ${CORTEX_NAMESPACE} -rules: -- apiGroups: [\"\"] - resources: [pods, pods/exec] - verbs: [create, get, list, watch, update, patch, delete] -- apiGroups: [\"\"] - resources: [configmaps] - verbs: [get, watch, list] -- apiGroups: [\"\"] - resources: [persistentvolumeclaims] - verbs: [create, delete] -- apiGroups: [argoproj.io] - resources: [workflows, workflows/finalizers] - verbs: [get, list, watch, update, patch, delete] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: argo - namespace: ${CORTEX_NAMESPACE} -subjects: -- kind: ServiceAccount - name: argo-controller - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: Role - name: argo-controller - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: argo-controller - namespace: ${CORTEX_NAMESPACE} -data: - config: | - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: argo-controller - namespace: ${CORTEX_NAMESPACE} -spec: - selector: - matchLabels: - app: argo-controller - template: - metadata: - labels: - app: argo-controller - spec: - containers: - - args: - - --configmap - - argo-controller - - --executor-image - - ${CORTEX_IMAGE_ARGO_EXECUTOR} - - --executor-image-pull-policy - - Always - command: - - workflow-controller - image: ${CORTEX_IMAGE_ARGO_CONTROLLER} - imagePullPolicy: Always - name: argo-controller - serviceAccountName: argo-controller -" | kubectl apply -f - >/dev/null -} - -################### -### SPARK SETUP ### -################### - -function setup_spark() { - echo " -apiVersion: v1 -kind: ServiceAccount -metadata: - name: spark-operator - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: spark-operator - namespace: ${CORTEX_NAMESPACE} -rules: -- apiGroups: [\"\"] - resources: [pods] - verbs: [\"*\"] -- apiGroups: [\"\"] - resources: [services, configmaps, secrets] - verbs: [create, get, delete] -- apiGroups: [extensions] - resources: [ingresses] - verbs: [create, get, delete] -- apiGroups: [\"\"] - resources: [nodes] - verbs: [get] -- apiGroups: [\"\"] - resources: [events] - verbs: [create, update, patch] -- apiGroups: [apiextensions.k8s.io] - resources: [customresourcedefinitions] - verbs: [create, get, update, delete] -- apiGroups: [admissionregistration.k8s.io] - resources: [mutatingwebhookconfigurations] - verbs: [create, get, update, delete] -- apiGroups: [sparkoperator.k8s.io] - resources: [sparkapplications, scheduledsparkapplications] - verbs: [\"*\"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: spark-operator - namespace: ${CORTEX_NAMESPACE} -subjects: - - kind: ServiceAccount - name: spark-operator - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: Role - name: spark-operator - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: spark-operator - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: spark-operator - app.kubernetes.io/version: v2.4.0-v1alpha1 -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: spark-operator - app.kubernetes.io/version: v2.4.0-v1alpha1 - strategy: - type: Recreate - template: - metadata: - labels: - app.kubernetes.io/name: spark-operator - app.kubernetes.io/version: v2.4.0-v1alpha1 - initializers: - pending: [] - spec: - serviceAccountName: spark-operator - containers: - - name: spark-operator - image: ${CORTEX_IMAGE_SPARK_OPERATOR} - imagePullPolicy: Always - command: [\"/usr/bin/spark-operator\"] - args: - - -namespace=${CORTEX_NAMESPACE} - - -install-crds=false - - -logtostderr ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: sparkapplications.sparkoperator.k8s.io -spec: - group: sparkoperator.k8s.io - names: - kind: SparkApplication - listKind: SparkApplicationList - plural: sparkapplications - shortNames: - - sparkapp - singular: sparkapplication - scope: Namespaced - validation: - openAPIV3Schema: - properties: - spec: - properties: - deps: - properties: - downloadTimeout: - minimum: 1 - type: integer - maxSimultaneousDownloads: - minimum: 1 - type: integer - driver: - properties: - cores: - exclusiveMinimum: true - minimum: 0 - type: number - podName: - pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' - executor: - properties: - cores: - exclusiveMinimum: true - minimum: 0 - type: number - instances: - minimum: 1 - type: integer - mode: - enum: - - cluster - - client - monitoring: - properties: - prometheus: - properties: - port: - maximum: 49151 - minimum: 1024 - type: integer - pythonVersion: - enum: - - \"2\" - - \"3\" - restartPolicy: - properties: - onFailureRetries: - minimum: 0 - type: integer - onFailureRetryInterval: - minimum: 1 - type: integer - onSubmissionFailureRetries: - minimum: 0 - type: integer - onSubmissionFailureRetryInterval: - minimum: 1 - type: integer - type: - enum: - - Never - - OnFailure - - Always - type: - enum: - - Java - - Scala - - Python - - R - version: v1alpha1 ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: scheduledsparkapplications.sparkoperator.k8s.io -spec: - group: sparkoperator.k8s.io - names: - kind: ScheduledSparkApplication - listKind: ScheduledSparkApplicationList - plural: scheduledsparkapplications - shortNames: - - scheduledsparkapp - singular: scheduledsparkapplication - scope: Namespaced - validation: - openAPIV3Schema: - properties: - spec: - properties: - concurrencyPolicy: - enum: - - Allow - - Forbid - - Replace - failedRunHistoryLimit: - minimum: 1 - type: integer - schedule: - type: string - successfulRunHistoryLimit: - minimum: 1 - type: integer - template: - properties: - deps: - properties: - downloadTimeout: - minimum: 1 - type: integer - maxSimultaneousDownloads: - minimum: 1 - type: integer - driver: - properties: - cores: - exclusiveMinimum: true - minimum: 0 - type: number - podName: - pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' - executor: - properties: - cores: - exclusiveMinimum: true - minimum: 0 - type: number - instances: - minimum: 1 - type: integer - mode: - enum: - - cluster - - client - monitoring: - properties: - prometheus: - properties: - port: - maximum: 49151 - minimum: 1024 - type: integer - pythonVersion: - enum: - - \"2\" - - \"3\" - restartPolicy: - properties: - onFailureRetries: - minimum: 0 - type: integer - onFailureRetryInterval: - minimum: 1 - type: integer - onSubmissionFailureRetries: - minimum: 0 - type: integer - onSubmissionFailureRetryInterval: - minimum: 1 - type: integer - type: - enum: - - Never - - OnFailure - - Always - type: - enum: - - Java - - Scala - - Python - - R - version: v1alpha1 ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: spark - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: spark - namespace: ${CORTEX_NAMESPACE} -rules: -- apiGroups: - - \"\" # \"\" indicates the core API group - resources: [pods] - verbs: [\"*\"] -- apiGroups: - - \"\" # \"\" indicates the core API group - resources: [services] - verbs: [\"*\"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: spark - namespace: ${CORTEX_NAMESPACE} -subjects: -- kind: ServiceAccount - name: spark - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: Role - name: spark - apiGroup: rbac.authorization.k8s.io -" | kubectl apply -f - >/dev/null -} - -################### -### NGINX SETUP ### -################### - -function setup_nginx() { - echo " ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: nginx - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: nginx - namespace: ${CORTEX_NAMESPACE} -rules: - - apiGroups: [\"\"] - resources: [endpoints, pods, secrets] - verbs: [list, watch] - - apiGroups: [\"\"] - resources: [nodes, services, ingresses] - verbs: [get, list, watch] - - apiGroups: [\"\"] - resources: [events] - verbs: [create, patch] - - apiGroups: [\"extensions\"] - resources: [ingresses] - verbs: [get, list, watch] - - apiGroups: [\"extensions\"] - resources: [ingresses/status] - verbs: [update] - - apiGroups: [\"\"] - resources: [pods, secrets, namespaces, endpoints] - verbs: [get] - - apiGroups: [\"\"] - resources: [configmaps] - resourceNames: - # Defaults to \"-\" - # Here: \"-\" - # This has to be adapted if you change either parameter - # when launching the nginx-ingress-controller. - - \"ingress-controller-leader-operator\" - - \"ingress-controller-leader-apis\" - verbs: [get, update] - - apiGroups: [\"\"] - resources: [configmaps] - verbs: [get, list, watch, create] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: nginx - namespace: ${CORTEX_NAMESPACE} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: nginx -subjects: - - kind: ServiceAccount - name: nginx - namespace: ${CORTEX_NAMESPACE} ---- -kind: ConfigMap -apiVersion: v1 -metadata: - name: nginx-configuration - namespace: ${CORTEX_NAMESPACE} -data: - use-proxy-protocol: \"true\" ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-backend-operator - labels: - app.kubernetes.io/name: nginx-backend-operator - app.kubernetes.io/part-of: ingress-nginx - namespace: ${CORTEX_NAMESPACE} -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: nginx-backend-operator - app.kubernetes.io/part-of: ingress-nginx - template: - metadata: - labels: - app.kubernetes.io/name: nginx-backend-operator - app.kubernetes.io/part-of: ingress-nginx - spec: - terminationGracePeriodSeconds: 60 - containers: - - name: nginx-backend-operator - # Any image is permissible as long as: - # 1. It serves a 404 page at / - # 2. It serves 200 on a /healthz endpoint - image: ${CORTEX_IMAGE_NGINX_BACKEND} - imagePullPolicy: Always - livenessProbe: - httpGet: - path: /healthz - port: 8080 - scheme: HTTP - initialDelaySeconds: 30 - timeoutSeconds: 5 - ports: - - containerPort: 8080 - resources: - limits: - cpu: 10m - memory: 20Mi - requests: - cpu: 10m - memory: 20Mi ---- -apiVersion: v1 -kind: Service -metadata: - name: nginx-backend-operator - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-backend-operator - app.kubernetes.io/part-of: ingress-nginx -spec: - ports: - - port: 80 - targetPort: 8080 - selector: - app.kubernetes.io/name: nginx-backend-operator - app.kubernetes.io/part-of: ingress-nginx ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-controller-operator - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-controller-operator - app.kubernetes.io/part-of: ingress-nginx -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: nginx-controller-operator - app.kubernetes.io/part-of: ingress-nginx - template: - metadata: - labels: - app.kubernetes.io/name: nginx-controller-operator - app.kubernetes.io/part-of: ingress-nginx - spec: - serviceAccountName: nginx - containers: - - name: nginx-controller - image: ${CORTEX_IMAGE_NGINX_CONTROLLER} - imagePullPolicy: Always - args: - - /nginx-ingress-controller - - --watch-namespace=${CORTEX_NAMESPACE} - - --default-backend-service=${CORTEX_NAMESPACE}/nginx-backend-operator - - --configmap=${CORTEX_NAMESPACE}/nginx-configuration - - --publish-service=${CORTEX_NAMESPACE}/nginx-controller-operator - - --annotations-prefix=nginx.ingress.kubernetes.io - - --ingress-class=operator - securityContext: - capabilities: - drop: - - ALL - add: - - NET_BIND_SERVICE - # www-data -> 33 - runAsUser: 33 - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - ports: - - name: http - containerPort: 80 - - name: https - containerPort: 443 - livenessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - readinessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 ---- -kind: Service -apiVersion: v1 -metadata: - name: nginx-controller-operator - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-controller-operator - app.kubernetes.io/part-of: ingress-nginx - annotations: - # Enable PROXY protocol - service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: '*' - # Ensure the ELB idle timeout is less than nginx keep-alive timeout. By default, - # NGINX keep-alive is set to 75s. If using WebSockets, the value will need to be - # increased to '3600' to avoid any potential issues. - service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: '60' -spec: - type: LoadBalancer - selector: - app.kubernetes.io/name: nginx-controller-operator - app.kubernetes.io/part-of: ingress-nginx - ports: - - name: http - port: 80 - targetPort: http - - name: https - port: 443 - targetPort: https ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-backend-apis - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - namespace: ${CORTEX_NAMESPACE} -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - template: - metadata: - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - spec: - terminationGracePeriodSeconds: 60 - containers: - - name: nginx-backend-apis - # Any image is permissible as long as: - # 1. It serves a 404 page at / - # 2. It serves 200 on a /healthz endpoint - image: ${CORTEX_IMAGE_NGINX_BACKEND} - imagePullPolicy: Always - livenessProbe: - httpGet: - path: /healthz - port: 8080 - scheme: HTTP - initialDelaySeconds: 30 - timeoutSeconds: 5 - ports: - - containerPort: 8080 - resources: - limits: - cpu: 10m - memory: 20Mi - requests: - cpu: 10m - memory: 20Mi ---- -apiVersion: v1 -kind: Service -metadata: - name: nginx-backend-apis - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx -spec: - ports: - - port: 80 - targetPort: 8080 - selector: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-controller-apis - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - template: - metadata: - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - spec: - serviceAccountName: nginx - containers: - - name: nginx-controller - image: ${CORTEX_IMAGE_NGINX_CONTROLLER} - imagePullPolicy: Always - args: - - /nginx-ingress-controller - - --watch-namespace=${CORTEX_NAMESPACE} - - --default-backend-service=${CORTEX_NAMESPACE}/nginx-backend-apis - - --configmap=${CORTEX_NAMESPACE}/nginx-configuration - - --publish-service=${CORTEX_NAMESPACE}/nginx-backend-apis - - --annotations-prefix=nginx.ingress.kubernetes.io - - --ingress-class=apis - securityContext: - capabilities: - drop: - - ALL - add: - - NET_BIND_SERVICE - # www-data -> 33 - runAsUser: 33 - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - ports: - - name: http - containerPort: 80 - - name: https - containerPort: 443 - livenessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 - readinessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 10254 - scheme: HTTP - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 1 ---- -kind: Service -apiVersion: v1 -metadata: - name: nginx-controller-apis - namespace: ${CORTEX_NAMESPACE} - labels: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - annotations: - # Enable PROXY protocol - service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: '*' - # Ensure the ELB idle timeout is less than nginx keep-alive timeout. By default, - # NGINX keep-alive is set to 75s. If using WebSockets, the value will need to be - # increased to '3600' to avoid any potential issues. - service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: '60' -spec: - type: LoadBalancer - selector: - app.kubernetes.io/name: nginx-backend-apis - app.kubernetes.io/part-of: ingress-nginx - ports: - - name: http - port: 80 - targetPort: http - - name: https - port: 443 - targetPort: https -" | kubectl apply -f - >/dev/null -} - -##################### -### FLUENTD SETUP ### -##################### - -function setup_fluentd() { - echo " -apiVersion: v1 -kind: ServiceAccount -metadata: - name: fluentd - namespace: ${CORTEX_NAMESPACE} - labels: - app: fluentd ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: fluentd - namespace: ${CORTEX_NAMESPACE} -rules: -- apiGroups: [\"\"] - resources: [pods] - verbs: [get, list, watch] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: fluentd - namespace: ${CORTEX_NAMESPACE} -subjects: -- kind: ServiceAccount - name: fluentd - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: Role - name: fluentd - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: fluentd - namespace: ${CORTEX_NAMESPACE} -data: - fluent.conf: | - - @type null - - - @type tail - enable_stat_watcher false - path /var/log/containers/**_${CORTEX_NAMESPACE}_**.log - pos_file /var/log/fluentd-containers.log.pos - time_format %Y-%m-%dT%H:%M:%S.%NZ - tag * - format json - read_from_head true - - - @type cloudwatch_logs - log_group_name \"#{ENV['LOG_GROUP_NAME']}\" - auto_create_stream true - use_tag_as_stream true - ---- -apiVersion: extensions/v1beta1 -kind: DaemonSet -metadata: - name: fluentd - namespace: ${CORTEX_NAMESPACE} -spec: - template: - metadata: - labels: - app: fluentd - spec: - serviceAccountName: fluentd - initContainers: - - name: copy-fluentd-config - image: busybox - command: ['sh', '-c', 'cp /config-volume/* /etc/fluentd'] - volumeMounts: - - name: config-volume - mountPath: /config-volume - - name: config - mountPath: /etc/fluentd - containers: - - name: fluentd - image: ${CORTEX_IMAGE_FLUENTD} - imagePullPolicy: Always - env: - - name: AWS_REGION - value: ${CORTEX_REGION} - - name: LOG_GROUP_NAME - value: ${CORTEX_LOG_GROUP} - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: aws-credentials - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-credentials - key: AWS_SECRET_ACCESS_KEY - volumeMounts: - - name: varlog - mountPath: /var/log - - name: varlibdockercontainers - mountPath: /var/lib/docker/containers - readOnly: true - - name: config - mountPath: /fluentd/etc - terminationGracePeriodSeconds: 30 - volumes: - - name: varlog - hostPath: - path: /var/log - - name: varlibdockercontainers - hostPath: - path: /var/lib/docker/containers - - name: config - emptyDir: {} - - name: config-volume - configMap: - name: fluentd -" | kubectl apply -f - >/dev/null -} - -###################### -### OPERATOR SETUP ### -###################### - -function setup_operator() { - echo " -apiVersion: v1 -kind: ServiceAccount -metadata: - name: operator - namespace: ${CORTEX_NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: operator - namespace: ${CORTEX_NAMESPACE} -subjects: -- kind: ServiceAccount - name: operator - namespace: ${CORTEX_NAMESPACE} -roleRef: - kind: ClusterRole - name: cluster-admin - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: operator - namespace: ${CORTEX_NAMESPACE} - labels: - workloadType: operator -spec: - replicas: 1 - selector: - matchLabels: - workloadId: operator - template: - metadata: - labels: - workloadId: operator - workloadType: operator - spec: - containers: - - name: operator - image: ${CORTEX_IMAGE_OPERATOR} - imagePullPolicy: Always - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: aws-credentials - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-credentials - key: AWS_SECRET_ACCESS_KEY - volumeMounts: - - name: cortex-config - mountPath: /configs/cortex - volumes: - - name: cortex-config - configMap: - name: cortex-config - serviceAccountName: operator ---- -kind: Service -apiVersion: v1 -metadata: - name: operator - namespace: ${CORTEX_NAMESPACE} - labels: - workloadType: operator -spec: - selector: - workloadId: operator - ports: - - port: 8888 - targetPort: 8888 ---- -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: operator - namespace: ${CORTEX_NAMESPACE} - labels: - workloadType: operator - annotations: - kubernetes.io/ingress.class: operator -spec: - rules: - - http: - paths: - - path: / - backend: - serviceName: operator - servicePort: 8888 -" | kubectl apply -f - >/dev/null -} - -function validate_cortex() { - set +e - - echo -en "\nWaiting for the Cortex operator to be ready " - - operator_load_balancer="waiting" - api_load_balancer="waiting" - operator_endpoint_reachable="waiting" - operator_pod_ready_cycles=0 - operator_endpoint="" - - while true; do - echo -n "." - sleep 5 - - operator_pod_name=$(kubectl -n=$CORTEX_NAMESPACE get pods -o=name --sort-by=.metadata.creationTimestamp | grep "^pod/operator-" | tail -1) - if [ "$operator_pod_name" == "" ]; then - operator_pod_ready_cycles=0 - else - is_ready=$(kubectl -n=$CORTEX_NAMESPACE get "$operator_pod_name" -o jsonpath='{.status.containerStatuses[0].ready}') - if [ "$is_ready" == "true" ]; then - ((operator_pod_ready_cycles++)) - else - operator_pod_ready_cycles=0 - fi - fi - - if [ "$operator_load_balancer" != "ready" ]; then - out=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]') - if [[ $out != *'"loadBalancer":{"ingress":[{"'* ]]; then - continue - fi - operator_load_balancer="ready" - fi - - if [ "$api_load_balancer" != "ready" ]; then - out=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-apis -o json | tr -d '[:space:]') - if [[ $out != *'"loadBalancer":{"ingress":[{"'* ]]; then - continue - fi - api_load_balancer="ready" - fi - - if [ "$operator_endpoint" = "" ]; then - operator_endpoint=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/') - fi - - if [ "$operator_endpoint_reachable" != "ready" ]; then - if ! curl $operator_endpoint >/dev/null 2>&1; then - continue - fi - operator_endpoint_reachable="ready" - fi - - if [ "$operator_pod_ready_cycles" == "0" ] && [ "$operator_pod_name" != "" ]; then - num_restart=$(kubectl -n=$CORTEX_NAMESPACE get "$operator_pod_name" -o jsonpath='{.status.containerStatuses[0].restartCount}') - if [[ $num_restart -ge 2 ]]; then - echo -e "\n\nAn error occurred when starting the Cortex operator. View the logs with:" - echo " kubectl logs $operator_pod_name --namespace=$CORTEX_NAMESPACE" - exit 1 - fi - continue - fi - - if [[ $operator_pod_ready_cycles -lt 3 ]]; then - continue - fi - - echo " ✓" - break - done - - echo -e "\nCortex is ready!" - - get_endpoints - - if command -v cortex >/dev/null; then - echo -e "\nPlease run \`cortex configure\` to make sure your CLI is configured correctly" - fi -} - -function get_operator_endpoint() { - set -eo pipefail - kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/' -} - -function get_apis_endpoint() { - set -eo pipefail - kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-apis -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/' -} - -############################# -### DEPENDENCY MANAGEMENT ### -############################# - -function check_dep_curl() { - if ! command -v curl >/dev/null; then - echo -e "\nerror: please install \`curl\`" - exit 1 - fi -} - -function check_dep_unzip() { - if ! command -v unzip >/dev/null; then - echo -e "\nerror: please install \`unzip\`" - exit 1 - fi -} - -function check_dep_kubectl() { - set -e - - if ! command -v kubectl >/dev/null 2>&1; then - echo - read -p "kubectl is required. Would you like cortex-installer.sh to install it? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - install_kubectl - else - exit 1 - fi - fi - - if ! kubectl config current-context >/dev/null 2>&1; then - echo -e "\nerror: kubectl is not configured to connect with your cluster. If you are using eksctl, you can run this command to configure kubectl:" - echo " eksctl utils write-kubeconfig --name=cortex" - exit 1 - fi - - set +e - get_nodes_output=$(kubectl get nodes -o jsonpath='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}' 2>/dev/null) - if [ $? -ne 0 ]; then - echo -e "\nerror: either your AWS credentials are incorrect or kubectl is not properly configured to connect with your cluster" - echo "If you are using eksctl, you can run this command to re-configure kubectl:" - echo " eksctl utils write-kubeconfig --name=cortex" - echo "If you are changing IAM users, you must edit the aws-auth ConfigMap (using your previous IAM credentials) to add the new IAM user; see https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html" - exit 1 - fi - set -e - num_nodes_ready=$(echo $get_nodes_output | tr ';' "\n" | grep "Ready=True" | wc -l) - if ! [[ $num_nodes_ready -ge 1 ]]; then - echo -e "\nerror: your cluster has no registered nodes" - exit 1 - fi -} - -function install_kubectl() { - set -e - - if command -v kubectl >/dev/null; then - echo -e "\nkubectl is already installed" - return - fi - - check_dep_curl - - echo -e "\nInstalling kubectl (/usr/local/bin/kubectl) ..." - - rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR - curl -s -Lo $CORTEX_SH_TMP_DIR/kubectl https://storage.googleapis.com/kubernetes-release/release/v1.13.3/bin/$PARSED_OS/amd64/kubectl - chmod +x $CORTEX_SH_TMP_DIR/kubectl - - if [ $(id -u) = 0 ]; then - mv $CORTEX_SH_TMP_DIR/kubectl /usr/local/bin/kubectl - else - ask_sudo - sudo mv $CORTEX_SH_TMP_DIR/kubectl /usr/local/bin/kubectl - fi - - rm -rf $CORTEX_SH_TMP_DIR - echo "✓ Installed kubectl" -} - -function uninstall_kubectl() { - set -e - - if ! command -v kubectl >/dev/null; then - echo -e "\nkubectl is not installed" - return - fi - - if [[ ! -f /usr/local/bin/kubectl ]]; then - echo -e "\nkubectl was not found at /usr/local/bin/kubectl, please uninstall it manually" - return - fi - - echo - read -p "Would you like to uninstall kubectl (/usr/local/bin/kubectl)? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - if [ $(id -u) = 0 ]; then - rm /usr/local/bin/kubectl - else - ask_sudo - sudo rm /usr/local/bin/kubectl - fi - rm -rf $HOME/.kube - echo "✓ Uninstalled kubectl" - else - return - fi -} - -function check_dep_aws() { - set -e - - if ! command -v aws >/dev/null 2>&1; then - echo - read -p "The AWS CLI is required. Would you like cortex-installer.sh to install it? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - install_aws - else - exit 1 - fi - fi - - if [ -z "$AWS_ACCESS_KEY_ID" ]; then - echo -e "\nerror: please export AWS_ACCESS_KEY_ID" - exit 1 - fi - - if [ -z "$AWS_SECRET_ACCESS_KEY" ]; then - echo -e "\nerror: please export AWS_SECRET_ACCESS_KEY" - exit 1 - fi -} - -function install_aws() { - set -e - - if command -v aws >/dev/null; then - echo "The AWS CLI is already installed" - return - fi - - check_dep_curl - check_dep_unzip - - if command -v python >/dev/null; then - py_path=$(which python) - elif command -v python3 >/dev/null; then - py_path=$(which python3) - else - echo -e "\nerror: please install python or python3 using your package manager" - exit 1 - fi - - if ! $py_path -c "import distutils.sysconfig" >/dev/null 2>&1; then - if command -v python3 >/dev/null; then - echo -e "\nerror: please install python3-distutils using your package manager" - else - echo -e "\nerror: please install python distutils" - fi - exit 1 - fi - - echo -e "\nInstalling the AWS CLI (/usr/local/bin/aws) ..." - - rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR - curl -s -o $CORTEX_SH_TMP_DIR/awscli-bundle.zip https://s3.amazonaws.com/aws-cli/awscli-bundle.zip - unzip $CORTEX_SH_TMP_DIR/awscli-bundle.zip -d $CORTEX_SH_TMP_DIR >/dev/null - - if [ $(id -u) = 0 ]; then - $py_path $CORTEX_SH_TMP_DIR/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws >/dev/null - else - ask_sudo - sudo $py_path $CORTEX_SH_TMP_DIR/awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws >/dev/null - fi - - rm -rf $CORTEX_SH_TMP_DIR - echo "✓ Installed the AWS CLI" -} - -function uninstall_aws() { - set -e - - if ! command -v aws >/dev/null; then - echo -e "\nThe AWS CLI is not installed" - return - fi - - if [[ ! -f /usr/local/bin/aws ]]; then - echo -e "\nThe AWS CLI was not found at /usr/local/bin/aws, please uninstall it manually" - return - fi - - echo - read -p "Would you like to uninstall the AWS CLI (/usr/local/bin/aws)? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - if [ $(id -u) = 0 ]; then - rm -rf /usr/local/aws - rm /usr/local/bin/aws - else - ask_sudo - sudo rm -rf /usr/local/aws - sudo rm /usr/local/bin/aws - fi - rm -rf $HOME/.aws - echo "✓ Uninstalled the AWS CLI" - else - return - fi -} - -function install_eksctl() { - set -e - - if command -v eksctl >/dev/null; then - echo -e "\neksctl is already installed" - return - fi - - check_dep_curl - - echo -e "\nInstalling eksctl (/usr/local/bin/eksctl) ..." - - rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR - (cd $CORTEX_SH_TMP_DIR && curl -s --location "https://github.com/weaveworks/eksctl/releases/download/0.1.21/eksctl_$(uname -s)_amd64.tar.gz" | tar xz) - chmod +x $CORTEX_SH_TMP_DIR/eksctl - - if [ $(id -u) = 0 ]; then - mv $CORTEX_SH_TMP_DIR/eksctl /usr/local/bin/eksctl - else - ask_sudo - sudo mv $CORTEX_SH_TMP_DIR/eksctl /usr/local/bin/eksctl - fi - - rm -rf $CORTEX_SH_TMP_DIR - echo "✓ Installed eksctl" -} - -function uninstall_eksctl() { - set -e - - if ! command -v eksctl >/dev/null; then - echo -e "\neksctl is not installed" - return - fi - - if [[ ! -f /usr/local/bin/eksctl ]]; then - echo -e "\neksctl was not found at /usr/local/bin/eksctl, please uninstall it manually" - return - fi - - echo - read -p "Would you like to uninstall eksctl (/usr/local/bin/eksctl)? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - if [ $(id -u) = 0 ]; then - rm /usr/local/bin/eksctl - else - ask_sudo - sudo rm /usr/local/bin/eksctl - fi - echo "✓ Uninstalled eksctl" - else - return - fi -} - -function install_aws_iam_authenticator() { - set -e - - if command -v aws-iam-authenticator >/dev/null; then - echo -e "\naws-iam-authenticator is already installed" - return - fi - - check_dep_curl - - echo -e "\nInstalling aws-iam-authenticator (/usr/local/bin/aws-iam-authenticator) ..." - - rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR - curl -s -o $CORTEX_SH_TMP_DIR/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.11.5/2018-12-06/bin/$PARSED_OS/amd64/aws-iam-authenticator - chmod +x $CORTEX_SH_TMP_DIR/aws-iam-authenticator - - if [ $(id -u) = 0 ]; then - mv $CORTEX_SH_TMP_DIR/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator - else - ask_sudo - sudo mv $CORTEX_SH_TMP_DIR/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator - fi - - rm -rf $CORTEX_SH_TMP_DIR - echo "✓ Installed aws-iam-authenticator" -} - -function uninstall_aws_iam_authenticator() { - set -e - - if ! command -v aws-iam-authenticator >/dev/null; then - echo -e "\naws-iam-authenticator is not installed" - return - fi - - if [[ ! -f /usr/local/bin/aws-iam-authenticator ]]; then - echo -e "\naws-iam-authenticator was not found at /usr/local/bin/aws-iam-authenticator, please uninstall it manually" - return - fi - - echo - read -p "Would you like to uninstall aws-iam-authenticator (/usr/local/bin/aws-iam-authenticator)? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - if [ $(id -u) = 0 ]; then - rm /usr/local/bin/aws-iam-authenticator - else - ask_sudo - sudo rm /usr/local/bin/aws-iam-authenticator - fi - echo "✓ Uninstalled aws-iam-authenticator" - else - return - fi -} - -function install_cortex_cli() { - set -e - - if command -v cortex >/dev/null; then - echo "The Cortex CLI is already installed" - return - fi - - check_dep_curl - - echo -e "\nInstalling the Cortex CLI (/usr/local/bin/cortex) ..." - - rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR - curl -s -o $CORTEX_SH_TMP_DIR/cortex https://s3-us-west-2.amazonaws.com/get-cortex/$CORTEX_VERSION_STABLE/cli/$PARSED_OS/cortex - chmod +x $CORTEX_SH_TMP_DIR/cortex - - if [ $(id -u) = 0 ]; then - mv $CORTEX_SH_TMP_DIR/cortex /usr/local/bin/cortex - else - ask_sudo - sudo mv $CORTEX_SH_TMP_DIR/cortex /usr/local/bin/cortex - fi - - rm -rf $CORTEX_SH_TMP_DIR - echo "✓ Installed the Cortex CLI" - - bash_profile_path=$(get_bash_profile) - if [ ! "$bash_profile_path" = "" ]; then - if ! grep -Fxq "source <(cortex completion)" "$bash_profile_path"; then - echo - read -p "Would you like to modify your bash profile ($bash_profile_path) to enable cortex command completion and the cx alias? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - echo -e "\nsource <(cortex completion)" >> $bash_profile_path - echo "✓ Your bash profile ($bash_profile_path) has been updated" - echo - echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" - echo - echo "Command to update your current terminal session:" - echo " source $bash_profile_path" - else - echo "Your bash profile has not been modified. If you would like to modify it manually, add this line to your bash profile:" - echo " source <(cortex completion)" - echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" - fi - fi - else - echo -e "\nIf your would like to enable cortex command completion and the cx alias, add this line to your bash profile:" - echo " source <(cortex completion)" - echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" - fi -} - -function uninstall_cortex_cli() { - set -e - - rm -rf $HOME/.cortex - - if ! command -v cortex >/dev/null; then - echo -e "\nThe Cortex CLI is not installed" - return - fi - - if [[ ! -f /usr/local/bin/cortex ]]; then - echo -e "\nThe Cortex CLI was not found at /usr/local/bin/cortex, please uninstall it manually" - return - fi - - if [ $(id -u) = 0 ]; then - rm /usr/local/bin/cortex - else - ask_sudo - sudo rm /usr/local/bin/cortex - fi - echo -e "\n✓ Uninstalled the Cortex CLI" - - bash_profile_path=$(get_bash_profile) - if [ ! "$bash_profile_path" = "" ]; then - if grep -Fxq "source <(cortex completion)" "$bash_profile_path"; then - echo - read -p "Would you like to remove \"source <(cortex completion)\" from your bash profile ($bash_profile_path)? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - sed '/^source <(cortex completion)$/d' "$bash_profile_path" > "${bash_profile_path}_cortex_modified" && mv -f "${bash_profile_path}_cortex_modified" "$bash_profile_path" - echo "✓ Your bash profile ($bash_profile_path) has been updated" - fi - fi - fi -} - -function get_bash_profile() { - if [ "$PARSED_OS" = "darwin" ]; then - if [ -f $HOME/.bash_profile ]; then - echo $HOME/.bash_profile - return - elif [ -f $HOME/.bashrc ]; then - echo $HOME/.bashrc - return - fi - else - if [ -f $HOME/.bashrc ]; then - echo $HOME/.bashrc - return - elif [ -f $HOME/.bash_profile ]; then - echo $HOME/.bash_profile - return - fi - fi - - echo "" -} - -function ask_sudo() { - if ! sudo -n true 2>/dev/null; then - echo -e "\nPlease enter your sudo password" - fi -} - -function prompt_for_telemetry() { - if [ "$CORTEX_ENABLE_TELEMETRY" != "true" ] && [ "$CORTEX_ENABLE_TELEMETRY" != "false" ]; then - while true - do - echo - read -p "Would you like to help improve Cortex by anonymously sending error reports and usage stats to the dev team? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then - export CORTEX_ENABLE_TELEMETRY=true - break - elif [[ $REPLY =~ ^[Nn]$ ]]; then - export CORTEX_ENABLE_TELEMETRY=false - break - fi - echo "Unexpected value, please enter \"Y\" or \"n\"" - done - fi -} - -###################### -### ARG PROCESSING ### -###################### - -arg1=${1:-""} -arg2=${2:-""} -arg3=${3:-""} - -if [ -z "$arg1" ]; then - show_help - exit 0 -fi - -if [ "$arg1" = "install" ]; then - if [ ! "$arg3" = "" ]; then - echo -e "\nerror: too many arguments for install command" - show_help - exit 1 - elif [ "$arg2" = "operator" ]; then - install_operator - elif [ "$arg2" = "cli" ]; then - install_cli - elif [ "$arg2" = "kubernetes-tools" ]; then - install_kubernetes_tools - elif [ "$arg2" = "" ]; then - echo -e "\nerror: missing subcommand for install" - show_help - exit 1 - else - echo -e "\nerror: invalid subcommand for install: $arg2" - show_help - exit 1 - fi -elif [ "$arg1" = "uninstall" ]; then - if [ ! "$arg3" = "" ]; then - echo -e "\nerror: too many arguments for uninstall command" - show_help - exit 1 - elif [ "$arg2" = "operator" ]; then - uninstall_operator - elif [ "$arg2" = "cli" ]; then - uninstall_cli - elif [ "$arg2" = "kubernetes-tools" ]; then - uninstall_kubernetes_tools - elif [ "$arg2" = "" ]; then - echo -e "\nerror: missing subcommand for uninstall" - show_help - exit 1 - else - echo -e "\nerror: invalid subcommand for uninstall: $arg2" - show_help - exit 1 - fi -elif [ "$arg1" = "update" ]; then - if [ ! "$arg3" = "" ]; then - echo -e "\nerror: too many arguments for update command" - show_help - exit 1 - elif [ "$arg2" = "operator" ]; then - update_operator - elif [ "$arg2" = "" ]; then - echo -e "\nerror: missing subcommand for update" - show_help - exit 1 - else - echo -e "\nerror: invalid subcommand for update: $arg2" - show_help - exit 1 - fi -elif [ "$arg1" = "get" ]; then - if [ ! "$arg3" = "" ]; then - echo -e "\nerror: too many arguments for get command" - show_help - exit 1 - elif [ "$arg2" = "endpoints" ]; then - get_endpoints - elif [ "$arg2" = "" ]; then - echo -e "\nerror: missing subcommand for get" - show_help - exit 1 - else - echo -e "\nerror: invalid subcommand for get: $arg2" - show_help - exit 1 - fi -else - echo -e "\nerror: unknown command: $arg1" - show_help - exit 1 -fi diff --git a/cortex.sh b/cortex.sh new file mode 100755 index 0000000000..82994db88f --- /dev/null +++ b/cortex.sh @@ -0,0 +1,453 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +#################### +### FLAG PARSING ### +#################### + +flag_help=false +positional_args=() + +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -c|--config) + export CORTEX_CONFIG="$2" + shift + shift + ;; + -h|--help) + flag_help="true" + shift + ;; + *) + positional_args+=("$1") + shift + ;; + esac +done + +set -- "${positional_args[@]}" +positional_args=() +for i in "$@"; do + case $i in + -c=*|--config=*) + export CORTEX_CONFIG="${i#*=}" + shift + ;; + -h=*|--help=*) + flag_help="true" + ;; + *) + positional_args+=("$1") + shift + ;; + esac +done + +set -- "${positional_args[@]}" +if [ "$flag_help" == "true" ]; then + show_help + exit 0 +fi + +for arg in "$@"; do + if [[ "$arg" == -* ]]; then + echo "unknown flag: $arg" + show_help + exit 1 + fi +done + +##################### +### CONFIGURATION ### +##################### + +if [ "$CORTEX_CONFIG" != "" ]; then + if [ ! -f "$CORTEX_CONFIG" ]; then + echo "Cortex config file does not exist: $CORTEX_CONFIG" + exit 1 + fi + source $CORTEX_CONFIG +fi + +set -u + +export CORTEX_VERSION_STABLE=master + +# Defaults +export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:-""}" +export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:-""}" + +export CORTEX_LOG_GROUP="${CORTEX_LOG_GROUP:-cortex}" +export CORTEX_BUCKET="${CORTEX_BUCKET:-""}" +export CORTEX_REGION="${CORTEX_REGION:-us-west-2}" + +export CORTEX_CLUSTER="${CORTEX_CLUSTER:-cortex}" +export CORTEX_NAMESPACE="${CORTEX_NAMESPACE:-cortex}" +export CORTEX_NODE_TYPE="${CORTEX_NODE_TYPE:-t3.medium}" +export CORTEX_NODES_MIN="${CORTEX_NODES_MIN:-2}" +export CORTEX_NODES_MAX="${CORTEX_NODES_MAX:-5}" + +export CORTEX_IMAGE_ARGO_CONTROLLER="${CORTEX_IMAGE_ARGO_CONTROLLER:-cortexlabs/argo-controller:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_ARGO_EXECUTOR="${CORTEX_IMAGE_ARGO_EXECUTOR:-cortexlabs/argo-executor:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_FLUENTD="${CORTEX_IMAGE_FLUENTD:-cortexlabs/fluentd:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_NGINX_BACKEND="${CORTEX_IMAGE_NGINX_BACKEND:-cortexlabs/nginx-backend:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_NGINX_CONTROLLER="${CORTEX_IMAGE_NGINX_CONTROLLER:-cortexlabs/nginx-controller:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_OPERATOR="${CORTEX_IMAGE_OPERATOR:-cortexlabs/operator:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_SPARK="${CORTEX_IMAGE_SPARK:-cortexlabs/spark:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_SPARK_OPERATOR="${CORTEX_IMAGE_SPARK_OPERATOR:-cortexlabs/spark-operator:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_TF_SERVE="${CORTEX_IMAGE_TF_SERVE:-cortexlabs/tf-serve:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_TF_TRAIN="${CORTEX_IMAGE_TF_TRAIN:-cortexlabs/tf-train:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}" +export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}" + +export CORTEX_ENABLE_TELEMETRY="${CORTEX_ENABLE_TELEMETRY:-""}" + +########################## +### TOP-LEVEL COMMANDS ### +########################## + +function install_eks() { + docker run --entrypoint /root/install_eks.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + -e CORTEX_NODE_TYPE=$CORTEX_NODE_TYPE \ + -e CORTEX_NODES_MIN=$CORTEX_NODES_MIN \ + -e CORTEX_NODES_MAX=$CORTEX_NODES_MAX \ + cortexlabs/manager +} + +function uninstall_eks() { + docker run --entrypoint /root/uninstall_eks.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + cortexlabs/manager +} + +function install_cortex() { + docker run --entrypoint /root/install_cortex.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + -e CORTEX_NAMESPACE=$CORTEX_NAMESPACE \ + -e CORTEX_LOG_GROUP=$CORTEX_LOG_GROUP \ + -e CORTEX_BUCKET=$CORTEX_BUCKET \ + -e CORTEX_REGION=$CORTEX_REGION \ + -e CORTEX_IMAGE_ARGO_CONTROLLER=$CORTEX_IMAGE_ARGO_CONTROLLER \ + -e CORTEX_IMAGE_ARGO_EXECUTOR=$CORTEX_IMAGE_ARGO_EXECUTOR \ + -e CORTEX_IMAGE_FLUENTD=$CORTEX_IMAGE_FLUENTD \ + -e CORTEX_IMAGE_NGINX_BACKEND=$CORTEX_IMAGE_NGINX_BACKEND \ + -e CORTEX_IMAGE_NGINX_CONTROLLER=$CORTEX_IMAGE_NGINX_CONTROLLER \ + -e CORTEX_IMAGE_OPERATOR=$CORTEX_IMAGE_OPERATOR \ + -e CORTEX_IMAGE_SPARK=$CORTEX_IMAGE_SPARK \ + -e CORTEX_IMAGE_SPARK_OPERATOR=$CORTEX_IMAGE_SPARK_OPERATOR \ + -e CORTEX_IMAGE_TF_SERVE=$CORTEX_IMAGE_TF_SERVE \ + -e CORTEX_IMAGE_TF_TRAIN=$CORTEX_IMAGE_TF_TRAIN \ + -e CORTEX_IMAGE_TF_API=$CORTEX_IMAGE_TF_API \ + -e CORTEX_IMAGE_PYTHON_PACKAGER=$CORTEX_IMAGE_PYTHON_PACKAGER \ + -e CORTEX_IMAGE_TF_SERVE_GPU=$CORTEX_IMAGE_TF_SERVE_GPU \ + -e CORTEX_IMAGE_TF_TRAIN_GPU=$CORTEX_IMAGE_TF_TRAIN_GPU \ + -e CORTEX_ENABLE_TELEMETRY=$CORTEX_ENABLE_TELEMETRY \ + cortexlabs/manager +} + +function uninstall_cortex() { + docker run --entrypoint /root/uninstall_cortex.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + -e CORTEX_NAMESPACE=$CORTEX_NAMESPACE \ + cortexlabs/manager +} + +function uninstall_operator() { + docker run --entrypoint /root/uninstall_operator.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + -e CORTEX_NAMESPACE=$CORTEX_NAMESPACE \ + cortexlabs/manager +} + +function info() { + docker run --entrypoint /root/info.sh \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e CORTEX_CLUSTER=$CORTEX_CLUSTER \ + -e CORTEX_NAMESPACE=$CORTEX_NAMESPACE \ + cortexlabs/manager +} + +################ +### CHECK OS ### +################ + +case "$OSTYPE" in + darwin*) PARSED_OS="darwin" ;; + linux*) PARSED_OS="linux" ;; + *) echo -e "\nerror: only mac and linux are supported"; exit 1 ;; +esac + +############################# +### DEPENDENCY MANAGEMENT ### +############################# + +function check_dep_curl() { + if ! command -v curl >/dev/null; then + echo -e "\nerror: please install \`curl\`" + exit 1 + fi +} + +function install_cli() { + set -e + + if command -v cortex >/dev/null; then + echo "The Cortex CLI is already installed" + return + fi + + check_dep_curl + + echo -e "\nInstalling the Cortex CLI (/usr/local/bin/cortex) ..." + + CORTEX_SH_TMP_DIR="$HOME/.cortex-sh-tmp" + rm -rf $CORTEX_SH_TMP_DIR && mkdir -p $CORTEX_SH_TMP_DIR + curl -s -o $CORTEX_SH_TMP_DIR/cortex https://s3-us-west-2.amazonaws.com/get-cortex/$CORTEX_VERSION_STABLE/cli/$PARSED_OS/cortex + chmod +x $CORTEX_SH_TMP_DIR/cortex + + if [ $(id -u) = 0 ]; then + mv $CORTEX_SH_TMP_DIR/cortex /usr/local/bin/cortex + else + ask_sudo + sudo mv $CORTEX_SH_TMP_DIR/cortex /usr/local/bin/cortex + fi + + rm -rf $CORTEX_SH_TMP_DIR + echo "✓ Installed the Cortex CLI" + + bash_profile_path=$(get_bash_profile) + if [ ! "$bash_profile_path" = "" ]; then + if ! grep -Fxq "source <(cortex completion)" "$bash_profile_path"; then + echo + read -p "Would you like to modify your bash profile ($bash_profile_path) to enable cortex command completion and the cx alias? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo -e "\nsource <(cortex completion)" >> $bash_profile_path + echo "✓ Your bash profile ($bash_profile_path) has been updated" + echo + echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" + echo + echo "Command to update your current terminal session:" + echo " source $bash_profile_path" + else + echo "Your bash profile has not been modified. If you would like to modify it manually, add this line to your bash profile:" + echo " source <(cortex completion)" + echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" + fi + fi + else + echo -e "\nIf your would like to enable cortex command completion and the cx alias, add this line to your bash profile:" + echo " source <(cortex completion)" + echo "Note: \`bash_completion\` must be installed on your system for cortex command completion to function properly" + fi +} + +function uninstall_cli() { + set -e + + rm -rf $HOME/.cortex + + if ! command -v cortex >/dev/null; then + echo -e "\nThe Cortex CLI is not installed" + return + fi + + if [[ ! -f /usr/local/bin/cortex ]]; then + echo -e "\nThe Cortex CLI was not found at /usr/local/bin/cortex, please uninstall it manually" + return + fi + + if [ $(id -u) = 0 ]; then + rm /usr/local/bin/cortex + else + ask_sudo + sudo rm /usr/local/bin/cortex + fi + echo -e "\n✓ Uninstalled the Cortex CLI" + + bash_profile_path=$(get_bash_profile) + if [ ! "$bash_profile_path" = "" ]; then + if grep -Fxq "source <(cortex completion)" "$bash_profile_path"; then + echo + read -p "Would you like to remove \"source <(cortex completion)\" from your bash profile ($bash_profile_path)? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + sed '/^source <(cortex completion)$/d' "$bash_profile_path" > "${bash_profile_path}_cortex_modified" && mv -f "${bash_profile_path}_cortex_modified" "$bash_profile_path" + echo "✓ Your bash profile ($bash_profile_path) has been updated" + fi + fi + fi +} + +function get_bash_profile() { + if [ "$PARSED_OS" = "darwin" ]; then + if [ -f $HOME/.bash_profile ]; then + echo $HOME/.bash_profile + return + elif [ -f $HOME/.bashrc ]; then + echo $HOME/.bashrc + return + fi + else + if [ -f $HOME/.bashrc ]; then + echo $HOME/.bashrc + return + elif [ -f $HOME/.bash_profile ]; then + echo $HOME/.bash_profile + return + fi + fi + + echo "" +} + +function ask_sudo() { + if ! sudo -n true 2>/dev/null; then + echo -e "\nPlease enter your sudo password" + fi +} + +function prompt_for_telemetry() { + if [ "$CORTEX_ENABLE_TELEMETRY" != "true" ] && [ "$CORTEX_ENABLE_TELEMETRY" != "false" ]; then + while true + do + echo + read -p "Would you like to help improve Cortex by anonymously sending error reports and usage stats to the dev team? [Y/n] " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + export CORTEX_ENABLE_TELEMETRY=true + break + elif [[ $REPLY =~ ^[Nn]$ ]]; then + export CORTEX_ENABLE_TELEMETRY=false + break + fi + echo "Unexpected value, please enter \"Y\" or \"n\"" + done + fi +} + +############ +### HELP ### +############ + +function show_help() { + echo " +Usage: + ./cortex.sh command [sub-command] [flags] + +Available Commands: + install install Cortex + uninstall uninstall Cortex + update update Cortex + info information about Cortex + + install cli install the Cortex CLI + uninstall cli uninstall the Cortex CLI + +Flags: + -c, --config path to a Cortex config file + -h, --help +" +} + +###################### +### ARG PROCESSING ### +###################### + +arg1=${1:-""} +arg2=${2:-""} +arg3=${3:-""} + +if [ -z "$arg1" ]; then + show_help + exit 0 +fi + +if [ "$arg1" = "install" ]; then + if [ ! "$arg3" = "" ]; then + echo -e "\nerror: too many arguments for install command" + show_help + exit 1 + elif [ "$arg2" = "" ]; then + prompt_for_telemetry && install_eks && install_cortex && info + elif [ "$arg2" = "cli" ]; then + install_cli + elif [ "$arg2" = "" ]; then + echo -e "\nerror: missing subcommand for install" + show_help + exit 1 + else + echo -e "\nerror: invalid subcommand for install: $arg2" + show_help + exit 1 + fi +elif [ "$arg1" = "uninstall" ]; then + if [ ! "$arg3" = "" ]; then + echo -e "\nerror: too many arguments for uninstall command" + show_help + exit 1 + elif [ "$arg2" = "" ]; then + uninstall_cortex && uninstall_eks + elif [ "$arg2" = "cli" ]; then + uninstall_cli + elif [ "$arg2" = "" ]; then + echo -e "\nerror: missing subcommand for uninstall" + show_help + exit 1 + else + echo -e "\nerror: invalid subcommand for uninstall: $arg2" + show_help + exit 1 + fi +elif [ "$arg1" = "update" ]; then + if [ ! "$arg2" = "" ]; then + echo -e "\nerror: too many arguments for get command" + show_help + exit 1 + else + uninstall_operator && install_cortex + fi +elif [ "$arg1" = "info" ]; then + if [ ! "$arg2" = "" ]; then + echo -e "\nerror: too many arguments for get command" + show_help + exit 1 + else + info + fi +else + echo -e "\nerror: unknown command: $arg1" + show_help + exit 1 +fi diff --git a/dev/registry.sh b/dev/registry.sh index 468ee09d67..e0cf1fc67d 100755 --- a/dev/registry.sh +++ b/dev/registry.sh @@ -49,6 +49,7 @@ function create_registry() { aws ecr create-repository --repository-name=cortexlabs/python-packager --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true + aws ecr create-repository --repository-name=cortexlabs/manager --region=$REGISTRY_REGION || true } ### HELPERS ### @@ -133,6 +134,7 @@ elif [ "$cmd" = "update" ]; then build_and_push $ROOT/images/tf-serve tf-serve latest build_and_push $ROOT/images/tf-serve-gpu tf-serve-gpu latest build_and_push $ROOT/images/python-packager python-packager latest + build_and_push $ROOT/images/manager manager latest fi build_and_push $ROOT/images/spark spark latest diff --git a/docs/cluster/config.md b/docs/cluster/config.md index 898c5b3150..feee822ac3 100644 --- a/docs/cluster/config.md +++ b/docs/cluster/config.md @@ -1,6 +1,6 @@ # Config -These environment variables can be modified and exported before running `cortex-installer.sh` commands. Alternatively, a configuration file may be provided to `cortex-installer.sh` via the `--config` flag (e.g. `cortex-installer.sh --config=./config.sh install operator`). Default values are shown. +These environment variables can be modified and exported before running `cortex.sh` commands. Alternatively, a configuration file may be provided to `cortex.sh` via the `--config` flag (e.g. `cortex.sh --config=./config.sh install`). Default values are shown. @@ -20,12 +20,21 @@ export CORTEX_BUCKET="cortex-[RANDOM_ID]" # The AWS region Cortex will use export CORTEX_REGION="us-west-2" +# The name of the EKS cluster Cortex will use +export CORTEX_CLUSTER="cortex" + +# The AWS node type Cortex will use +export CORTEX_NODE_TYPE="t3.medium" + +# Minimum number of nodes in the cluster +export CORTEX_NODES_MIN=2 + +# Maximum number of nodes in the cluster +export CORTEX_NODES_MAX=5 + # The name of the Kubernetes namespace Cortex will use export CORTEX_NAMESPACE="cortex" -# Flag to enable collecting error reports and usage stats. If flag is not set to either "true" or "false", you will be prompted. -export CORTEX_ENABLE_TELEMETRY="" - # Image paths export CORTEX_IMAGE_ARGO_CONTROLLER="cortexlabs/argo-controller:master" export CORTEX_IMAGE_ARGO_EXECUTOR="cortexlabs/argo-executor:master" @@ -41,4 +50,7 @@ export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master" export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master" export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master" export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master" + +# Flag to enable collecting error reports and usage stats. If flag is not set to either "true" or "false", you will be prompted. +export CORTEX_ENABLE_TELEMETRY="" ``` diff --git a/docs/cluster/install.md b/docs/cluster/install.md index 20ed12f141..563bc228a1 100644 --- a/docs/cluster/install.md +++ b/docs/cluster/install.md @@ -4,57 +4,33 @@ 1. [AWS credentials](aws.md) -## Download the install script +## Spin up Cortex in your AWS account ```bash # Download -curl -O https://raw.githubusercontent.com/cortexlabs/cortex/master/cortex-installer.sh +curl -O https://raw.githubusercontent.com/cortexlabs/cortex/master/cortex.sh # Change permissions -chmod +x cortex-installer.sh +chmod +x cortex.sh # Set AWS credentials export AWS_ACCESS_KEY_ID=*** export AWS_SECRET_ACCESS_KEY=*** -``` - -## Kubernetes - -Cortex runs on Kubernetes and requires access to `kubectl`. If you don't already have a Kubernetes cluster, [eksctl](https://eksctl.io) is a simple tool to create and manage one. - -**We recommend a minimum cluster size of 2 [t3.medium](https://aws.amazon.com/ec2/instance-types) AWS instances. Cortex may not run successfully on clusters with less compute resources.** -```bash -# Install kubectl, eksctl, and aws-iam-authenticator -./cortex-installer.sh install kubernetes-tools - -# Spin up an EKS cluster (this takes ~20 minutes; see eksctl.io for more options) -eksctl create cluster --name=cortex --nodes=2 --node-type=t3.medium -``` - -This cluster configuration will cost about $0.29 per hour in AWS fees. - -## Install the operator - -The Cortex operator is a service that runs on Kubernetes, translates declarative configuration into workloads, and orchestrates those workloads on the cluster. Its installation is configurable. For a full list of configuration options please refer to the [operator config](config.md) documentation. - -```bash -# Install the Cortex operator -./cortex-installer.sh install operator +# Install Cortex +./cortex.sh install ``` ## Install the CLI -The CLI runs on developer machines (e.g. your laptop) and communicates with the operator. - ```bash # Install the Cortex CLI -./cortex-installer.sh install cli +./cortex.sh install cli # Get the operator endpoint -./cortex-installer.sh get endpoints +./cortex.sh get endpoints # Configure the CLI cortex configure diff --git a/docs/cluster/uninstall.md b/docs/cluster/uninstall.md index 3c5dd5ebac..312dcecb75 100644 --- a/docs/cluster/uninstall.md +++ b/docs/cluster/uninstall.md @@ -1,33 +1,29 @@ # Uninstall -## Download the uninstall script +## Uninstall Cortex ```bash # Download -curl -O https://raw.githubusercontent.com/cortexlabs/cortex/master/cortex-installer.sh +curl -O https://raw.githubusercontent.com/cortexlabs/cortex/master/cortex.sh # Change permissions -chmod +x cortex-installer.sh +chmod +x cortex.sh # Set AWS credentials export AWS_ACCESS_KEY_ID=*** export AWS_SECRET_ACCESS_KEY=*** -``` - -## Uninstall the operator -```bash -# Uninstall the Cortex operator -./cortex-installer.sh uninstall operator +# Uninstall +./cortex.sh uninstall ``` ## Uninstall the CLI ```bash # Uninstall the Cortex CLI -./cortex-installer.sh uninstall cli +./cortex.sh uninstall cli ``` ## Clean up AWS @@ -43,22 +39,4 @@ aws s3 rb --force s3:// # Delete the log group aws logs delete-log-group --log-group-name cortex --region us-west-2 - -# Uninstall the AWS CLI (if you used cortex-installer.sh to install it) -sudo rm -rf /usr/local/aws && sudo rm /usr/local/bin/aws && rm -rf ~/.aws -``` - -## Spin down Kubernetes - -If you used [`eksctl`](https://eksctl.io) to create your cluster, you can use it to spin the cluster down. - -**Make sure the Cortex operator is uninstalled to prevent AWS resource deletion deadlocks.** - -```bash -# Spin down an EKS cluster -eksctl delete cluster --name=cortex -# Confirm that both eksctl CloudFormation stacks have been deleted via the AWS console - -# Uninstall kubectl, eksctl, and aws-iam-authenticator -./cortex-installer.sh uninstall kubernetes-tools ``` diff --git a/images/manager/Dockerfile b/images/manager/Dockerfile new file mode 100644 index 0000000000..9eed3cc4a4 --- /dev/null +++ b/images/manager/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.7-alpine3.10 + +WORKDIR /root + +ENV PATH /root/.local/bin:$PATH + +RUN pip3 install awscli --upgrade --user && \ + rm -rf /root/.cache/pip* + +RUN apk add --no-cache bash curl gettext jq + +RUN curl --location "https://github.com/weaveworks/eksctl/releases/download/0.1.38/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \ + mv /tmp/eksctl /usr/local/bin + +RUN curl -o aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.13.7/2019-06-11/bin/linux/amd64/aws-iam-authenticator && \ + chmod +x ./aws-iam-authenticator && \ + mv ./aws-iam-authenticator /usr/local/bin/aws-iam-authenticator + +RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.15.0/bin/linux/amd64/kubectl && \ + chmod +x ./kubectl && \ + mv ./kubectl /usr/local/bin/kubectl + +COPY manager /root + +ENTRYPOINT ["/bin/bash"] diff --git a/manager/info.sh b/manager/info.sh new file mode 100755 index 0000000000..8cc88ae420 --- /dev/null +++ b/manager/info.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +function get_operator_endpoint() { + set -eo pipefail + kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/' +} + +function get_apis_endpoint() { + set -eo pipefail + kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-apis -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/' +} + +echo +eksctl utils write-kubeconfig --name=$CORTEX_CLUSTER + +operator_endpoint=$(get_operator_endpoint) +apis_endpoint=$(get_apis_endpoint) + +echo +echo "Operator endpoint: $operator_endpoint" +echo "APIs endpoint: $apis_endpoint" diff --git a/manager/install_cortex.sh b/manager/install_cortex.sh new file mode 100755 index 0000000000..54087460dd --- /dev/null +++ b/manager/install_cortex.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +function setup_bucket() { + if [ "$CORTEX_BUCKET" == "" ]; then + account_id_hash=$(aws sts get-caller-identity | jq .Account | sha256sum | cut -f1 -d" " | cut -c -10) + CORTEX_BUCKET="cortex-${account_id_hash}" + fi + + if ! aws s3api head-bucket --bucket $CORTEX_BUCKET --output json 2>/dev/null; then + if aws s3 ls "s3://$CORTEX_BUCKET" --output json 2>&1 | grep -q 'NoSuchBucket'; then + echo -e "\n✓ Creating an S3 bucket: $CORTEX_BUCKET" + aws s3api create-bucket --bucket $CORTEX_BUCKET \ + --region $CORTEX_REGION \ + --create-bucket-configuration LocationConstraint=$CORTEX_REGION \ + >/dev/null + else + echo -e "\nA bucket named \"${CORTEX_BUCKET}\" already exists, but you do not have access to it" + exit 1 + fi + else + echo -e "\n✓ Using an existing S3 bucket: $CORTEX_BUCKET" + fi +} + +function setup_cloudwatch_logs() { + if ! aws logs list-tags-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION --output json 2>&1 | grep -q "\"tags\":"; then + echo -e "\n✓ Creating a CloudWatch log group: $CORTEX_LOG_GROUP" + aws logs create-log-group --log-group-name $CORTEX_LOG_GROUP --region $CORTEX_REGION + else + echo -e "\n✓ Using an existing CloudWatch log group: $CORTEX_LOG_GROUP" + fi +} + +function setup_configmap() { + kubectl -n=$CORTEX_NAMESPACE create configmap 'cortex-config' \ + --from-literal='LOG_GROUP'=$CORTEX_LOG_GROUP \ + --from-literal='BUCKET'=$CORTEX_BUCKET \ + --from-literal='REGION'=$CORTEX_REGION \ + --from-literal='NAMESPACE'=$CORTEX_NAMESPACE \ + --from-literal='IMAGE_OPERATOR'=$CORTEX_IMAGE_OPERATOR \ + --from-literal='IMAGE_SPARK'=$CORTEX_IMAGE_SPARK \ + --from-literal='IMAGE_TF_TRAIN'=$CORTEX_IMAGE_TF_TRAIN \ + --from-literal='IMAGE_TF_SERVE'=$CORTEX_IMAGE_TF_SERVE \ + --from-literal='IMAGE_TF_API'=$CORTEX_IMAGE_TF_API \ + --from-literal='IMAGE_PYTHON_PACKAGER'=$CORTEX_IMAGE_PYTHON_PACKAGER \ + --from-literal='IMAGE_TF_TRAIN_GPU'=$CORTEX_IMAGE_TF_TRAIN_GPU \ + --from-literal='IMAGE_TF_SERVE_GPU'=$CORTEX_IMAGE_TF_SERVE_GPU \ + --from-literal='ENABLE_TELEMETRY'=$CORTEX_ENABLE_TELEMETRY \ + -o yaml --dry-run | kubectl apply -f - >/dev/null +} + +function setup_secrets() { + kubectl -n=$CORTEX_NAMESPACE create secret generic 'aws-credentials' \ + --from-literal='AWS_ACCESS_KEY_ID'=$AWS_ACCESS_KEY_ID \ + --from-literal='AWS_SECRET_ACCESS_KEY'=$AWS_SECRET_ACCESS_KEY \ + -o yaml --dry-run | kubectl apply -f - >/dev/null +} + +function validate_cortex() { + set +e + + echo -en "\nWaiting for Cortex to be ready " + + operator_load_balancer="waiting" + api_load_balancer="waiting" + operator_endpoint_reachable="waiting" + operator_pod_ready_cycles=0 + operator_endpoint="" + + while true; do + echo -n "." + sleep 5 + + operator_pod_name=$(kubectl -n=$CORTEX_NAMESPACE get pods -o=name --sort-by=.metadata.creationTimestamp | grep "^pod/operator-" | tail -1) + if [ "$operator_pod_name" == "" ]; then + operator_pod_ready_cycles=0 + else + is_ready=$(kubectl -n=$CORTEX_NAMESPACE get "$operator_pod_name" -o jsonpath='{.status.containerStatuses[0].ready}') + if [ "$is_ready" == "true" ]; then + ((operator_pod_ready_cycles++)) + else + operator_pod_ready_cycles=0 + fi + fi + + if [ "$operator_load_balancer" != "ready" ]; then + out=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]') + if [[ $out != *'"loadBalancer":{"ingress":[{"'* ]]; then + continue + fi + operator_load_balancer="ready" + fi + + if [ "$api_load_balancer" != "ready" ]; then + out=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-apis -o json | tr -d '[:space:]') + if [[ $out != *'"loadBalancer":{"ingress":[{"'* ]]; then + continue + fi + api_load_balancer="ready" + fi + + if [ "$operator_endpoint" = "" ]; then + operator_endpoint=$(kubectl -n=$CORTEX_NAMESPACE get service nginx-controller-operator -o json | tr -d '[:space:]' | sed 's/.*{\"hostname\":\"\(.*\)\".*/\1/') + fi + + if [ "$operator_endpoint_reachable" != "ready" ]; then + if ! curl $operator_endpoint >/dev/null 2>&1; then + continue + fi + operator_endpoint_reachable="ready" + fi + + if [ "$operator_pod_ready_cycles" == "0" ] && [ "$operator_pod_name" != "" ]; then + num_restart=$(kubectl -n=$CORTEX_NAMESPACE get "$operator_pod_name" -o jsonpath='{.status.containerStatuses[0].restartCount}') + if [[ $num_restart -ge 2 ]]; then + echo -e "\n\nAn error occurred when starting the Cortex operator. View the logs with:" + echo " kubectl logs $operator_pod_name --namespace=$CORTEX_NAMESPACE" + exit 1 + fi + continue + fi + + if [[ $operator_pod_ready_cycles -lt 3 ]]; then + continue + fi + + break + done + + echo -e "\n\n✓ Cortex is ready!" + + if command -v cortex >/dev/null; then + echo -e "\nPlease run \`cortex configure\` to make sure your CLI is configured correctly" + fi +} + +echo +eksctl utils write-kubeconfig --name=$CORTEX_CLUSTER + +echo -e "\nInstalling Cortex ..." + +setup_bucket +setup_cloudwatch_logs + +envsubst < manifests/namespace.yaml | kubectl apply -f - >/dev/null + +setup_configmap +setup_secrets + +envsubst < manifests/spark.yaml | kubectl apply -f - >/dev/null +envsubst < manifests/argo.yaml | kubectl apply -f - >/dev/null +envsubst < manifests/nginx.yaml | kubectl apply -f - >/dev/null +envsubst < manifests/fluentd.yaml | kubectl apply -f - >/dev/null +envsubst < manifests/operator.yaml | kubectl apply -f - >/dev/null + +validate_cortex diff --git a/manager/install_eks.sh b/manager/install_eks.sh new file mode 100755 index 0000000000..23e797e51f --- /dev/null +++ b/manager/install_eks.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +echo -e "\nSpinning up the cluster ... (this will about 15 minutes)" + +echo +eksctl create cluster --name=$CORTEX_CLUSTER --asg-access --node-type=$CORTEX_NODE_TYPE --nodes-min=$CORTEX_NODES_MIN --nodes-max=$CORTEX_NODES_MAX + +echo -e "\n✓ Spun up the cluster" diff --git a/manager/manifests/argo.yaml b/manager/manifests/argo.yaml new file mode 100644 index 0000000000..b4f6271583 --- /dev/null +++ b/manager/manifests/argo.yaml @@ -0,0 +1,132 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo-executor + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: argo-executor + namespace: $CORTEX_NAMESPACE +subjects: +- kind: ServiceAccount + name: argo-executor + namespace: $CORTEX_NAMESPACE +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +--- + +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: workflows.argoproj.io + namespace: $CORTEX_NAMESPACE +spec: + group: argoproj.io + names: + kind: Workflow + plural: workflows + shortNames: + - wf + scope: Namespaced + version: v1alpha1 +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo-controller + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: argo-controller + namespace: $CORTEX_NAMESPACE +rules: +- apiGroups: [""] + resources: [pods, pods/exec] + verbs: [create, get, list, watch, update, patch, delete] +- apiGroups: [""] + resources: [configmaps] + verbs: [get, watch, list] +- apiGroups: [""] + resources: [persistentvolumeclaims] + verbs: [create, delete] +- apiGroups: [argoproj.io] + resources: [workflows, workflows/finalizers] + verbs: [get, list, watch, update, patch, delete] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: argo + namespace: $CORTEX_NAMESPACE +subjects: +- kind: ServiceAccount + name: argo-controller + namespace: $CORTEX_NAMESPACE +roleRef: + kind: Role + name: argo-controller + apiGroup: rbac.authorization.k8s.io +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: argo-controller + namespace: $CORTEX_NAMESPACE +data: + config: | + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: argo-controller + namespace: $CORTEX_NAMESPACE +spec: + selector: + matchLabels: + app: argo-controller + template: + metadata: + labels: + app: argo-controller + spec: + containers: + - args: + - --configmap + - argo-controller + - --executor-image + - $CORTEX_IMAGE_ARGO_EXECUTOR + - --executor-image-pull-policy + - Always + command: + - workflow-controller + image: $CORTEX_IMAGE_ARGO_CONTROLLER + imagePullPolicy: Always + name: argo-controller + serviceAccountName: argo-controller diff --git a/manager/manifests/fluentd.yaml b/manager/manifests/fluentd.yaml new file mode 100644 index 0000000000..9049e66b83 --- /dev/null +++ b/manager/manifests/fluentd.yaml @@ -0,0 +1,138 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fluentd + namespace: $CORTEX_NAMESPACE + labels: + app: fluentd +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: fluentd + namespace: $CORTEX_NAMESPACE +rules: +- apiGroups: [""] + resources: [pods] + verbs: [get, list, watch] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: fluentd + namespace: $CORTEX_NAMESPACE +subjects: +- kind: ServiceAccount + name: fluentd + namespace: $CORTEX_NAMESPACE +roleRef: + kind: Role + name: fluentd + apiGroup: rbac.authorization.k8s.io +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluentd + namespace: $CORTEX_NAMESPACE +data: + fluent.conf: | + + @type null + + + @type tail + enable_stat_watcher false + path /var/log/containers/**_$CORTEX_NAMESPACE}_**.lo + pos_file /var/log/fluentd-containers.log.pos + time_format %Y-%m-%dT%H:%M:%S.%NZ + tag * + format json + read_from_head true + + + @type cloudwatch_logs + log_group_name "#{ENV['LOG_GROUP_NAME']}" + auto_create_stream true + use_tag_as_stream true + +--- + +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: fluentd + namespace: $CORTEX_NAMESPACE +spec: + template: + metadata: + labels: + app: fluentd + spec: + serviceAccountName: fluentd + initContainers: + - name: copy-fluentd-config + image: busybox + command: ['sh', '-c', 'cp /config-volume/* /etc/fluentd'] + volumeMounts: + - name: config-volume + mountPath: /config-volume + - name: config + mountPath: /etc/fluentd + containers: + - name: fluentd + image: $CORTEX_IMAGE_FLUENTD + imagePullPolicy: Always + env: + - name: AWS_REGION + value: $CORTEX_REGION + - name: LOG_GROUP_NAME + value: $CORTEX_LOG_GROUP + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_SECRET_ACCESS_KEY + volumeMounts: + - name: varlog + mountPath: /var/log + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: config + mountPath: /fluentd/etc + terminationGracePeriodSeconds: 30 + volumes: + - name: varlog + hostPath: + path: /var/log + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: config + emptyDir: {} + - name: config-volume + configMap: + name: fluentd diff --git a/manager/manifests/namespace.yaml b/manager/manifests/namespace.yaml new file mode 100644 index 0000000000..2bb3d82f47 --- /dev/null +++ b/manager/manifests/namespace.yaml @@ -0,0 +1,18 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: $CORTEX_NAMESPACE diff --git a/manager/manifests/nginx.yaml b/manager/manifests/nginx.yaml new file mode 100644 index 0000000000..c554bc4652 --- /dev/null +++ b/manager/manifests/nginx.yaml @@ -0,0 +1,418 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nginx + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: nginx + namespace: $CORTEX_NAMESPACE +rules: + - apiGroups: [""] + resources: [endpoints, pods, secrets] + verbs: [list, watch] + - apiGroups: [""] + resources: [nodes, services, ingresses] + verbs: [get, list, watch] + - apiGroups: [""] + resources: [events] + verbs: [create, patch] + - apiGroups: ["extensions"] + resources: [ingresses] + verbs: [get, list, watch] + - apiGroups: ["extensions"] + resources: [ingresses/status] + verbs: [update] + - apiGroups: [""] + resources: [pods, secrets, namespaces, endpoints] + verbs: [get] + - apiGroups: [""] + resources: [configmaps] + resourceNames: + # Defaults to "-" + # Here: "-" + # This has to be adapted if you change either parameter + # when launching the nginx-ingress-controller. + - "ingress-controller-leader-operator" + - "ingress-controller-leader-apis" + verbs: [get, update] + - apiGroups: [""] + resources: [configmaps] + verbs: [get, list, watch, create] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: nginx + namespace: $CORTEX_NAMESPACE +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx +subjects: + - kind: ServiceAccount + name: nginx + namespace: $CORTEX_NAMESPACE +--- + +kind: ConfigMap +apiVersion: v1 +metadata: + name: nginx-configuration + namespace: $CORTEX_NAMESPACE +data: + use-proxy-protocol: "true" +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-backend-operator + labels: + app.kubernetes.io/name: nginx-backend-operator + app.kubernetes.io/part-of: ingress-nginx + namespace: $CORTEX_NAMESPACE +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: nginx-backend-operator + app.kubernetes.io/part-of: ingress-nginx + template: + metadata: + labels: + app.kubernetes.io/name: nginx-backend-operator + app.kubernetes.io/part-of: ingress-nginx + spec: + terminationGracePeriodSeconds: 60 + containers: + - name: nginx-backend-operator + # Any image is permissible as long as: + # 1. It serves a 404 page at / + # 2. It serves 200 on a /healthz endpoint + image: $CORTEX_IMAGE_NGINX_BACKEND + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + ports: + - containerPort: 8080 + resources: + limits: + cpu: 10m + memory: 20Mi + requests: + cpu: 10m + memory: 20Mi +--- + +apiVersion: v1 +kind: Service +metadata: + name: nginx-backend-operator + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-backend-operator + app.kubernetes.io/part-of: ingress-nginx +spec: + ports: + - port: 80 + targetPort: 8080 + selector: + app.kubernetes.io/name: nginx-backend-operator + app.kubernetes.io/part-of: ingress-nginx +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-controller-operator + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-controller-operator + app.kubernetes.io/part-of: ingress-nginx +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: nginx-controller-operator + app.kubernetes.io/part-of: ingress-nginx + template: + metadata: + labels: + app.kubernetes.io/name: nginx-controller-operator + app.kubernetes.io/part-of: ingress-nginx + spec: + serviceAccountName: nginx + containers: + - name: nginx-controller + image: $CORTEX_IMAGE_NGINX_CONTROLLER + imagePullPolicy: Always + args: + - /nginx-ingress-controller + - --watch-namespace=$CORTEX_NAMESPACE + - --default-backend-service=$CORTEX_NAMESPACE/nginx-backend-operator + - --configmap=$CORTEX_NAMESPACE/nginx-configuration + - --publish-service=$CORTEX_NAMESPACE/nginx-controller-operator + - --annotations-prefix=nginx.ingress.kubernetes.io + - --ingress-class=operator + securityContext: + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + # www-data -> 33 + runAsUser: 33 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - name: http + containerPort: 80 + - name: https + containerPort: 443 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 +--- + +kind: Service +apiVersion: v1 +metadata: + name: nginx-controller-operator + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-controller-operator + app.kubernetes.io/part-of: ingress-nginx + annotations: + # Enable PROXY protocol + service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: '*' + # Ensure the ELB idle timeout is less than nginx keep-alive timeout. By default, + # NGINX keep-alive is set to 75s. If using WebSockets, the value will need to be + # increased to '3600' to avoid any potential issues. + service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: '60' +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: nginx-controller-operator + app.kubernetes.io/part-of: ingress-nginx + ports: + - name: http + port: 80 + targetPort: http + - name: https + port: 443 + targetPort: https +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-backend-apis + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + namespace: $CORTEX_NAMESPACE +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + template: + metadata: + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + spec: + terminationGracePeriodSeconds: 60 + containers: + - name: nginx-backend-apis + # Any image is permissible as long as: + # 1. It serves a 404 page at / + # 2. It serves 200 on a /healthz endpoint + image: $CORTEX_IMAGE_NGINX_BACKEND + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + ports: + - containerPort: 8080 + resources: + limits: + cpu: 10m + memory: 20Mi + requests: + cpu: 10m + memory: 20Mi +--- + +apiVersion: v1 +kind: Service +metadata: + name: nginx-backend-apis + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx +spec: + ports: + - port: 80 + targetPort: 8080 + selector: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-controller-apis + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + template: + metadata: + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + spec: + serviceAccountName: nginx + containers: + - name: nginx-controller + image: $CORTEX_IMAGE_NGINX_CONTROLLER + imagePullPolicy: Always + args: + - /nginx-ingress-controller + - --watch-namespace=$CORTEX_NAMESPACE + - --default-backend-service=$CORTEX_NAMESPACE/nginx-backend-apis + - --configmap=$CORTEX_NAMESPACE/nginx-configuration + - --publish-service=$CORTEX_NAMESPACE/nginx-backend-apis + - --annotations-prefix=nginx.ingress.kubernetes.io + - --ingress-class=apis + securityContext: + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + # www-data -> 33 + runAsUser: 33 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - name: http + containerPort: 80 + - name: https + containerPort: 443 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 +--- + +kind: Service +apiVersion: v1 +metadata: + name: nginx-controller-apis + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + annotations: + # Enable PROXY protocol + service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: '*' + # Ensure the ELB idle timeout is less than nginx keep-alive timeout. By default, + # NGINX keep-alive is set to 75s. If using WebSockets, the value will need to be + # increased to '3600' to avoid any potential issues. + service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: '60' +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: nginx-backend-apis + app.kubernetes.io/part-of: ingress-nginx + ports: + - name: http + port: 80 + targetPort: http + - name: https + port: 443 + targetPort: https diff --git a/manager/manifests/operator.yaml b/manager/manifests/operator.yaml new file mode 100644 index 0000000000..16669dd2b1 --- /dev/null +++ b/manager/manifests/operator.yaml @@ -0,0 +1,111 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: operator + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: operator + namespace: $CORTEX_NAMESPACE +subjects: +- kind: ServiceAccount + name: operator + namespace: $CORTEX_NAMESPACE +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: operator + namespace: $CORTEX_NAMESPACE + labels: + workloadType: operator +spec: + replicas: 1 + selector: + matchLabels: + workloadId: operator + template: + metadata: + labels: + workloadId: operator + workloadType: operator + spec: + containers: + - name: operator + image: $CORTEX_IMAGE_OPERATOR + imagePullPolicy: Always + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-credentials + key: AWS_SECRET_ACCESS_KEY + volumeMounts: + - name: cortex-config + mountPath: /configs/cortex + volumes: + - name: cortex-config + configMap: + name: cortex-config + serviceAccountName: operator +--- + +kind: Service +apiVersion: v1 +metadata: + name: operator + namespace: $CORTEX_NAMESPACE + labels: + workloadType: operator +spec: + selector: + workloadId: operator + ports: + - port: 8888 + targetPort: 8888 +--- + +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: operator + namespace: $CORTEX_NAMESPACE + labels: + workloadType: operator + annotations: + kubernetes.io/ingress.class: operator +spec: + rules: + - http: + paths: + - path: / + backend: + serviceName: operator + servicePort: 8888 diff --git a/manager/manifests/spark.yaml b/manager/manifests/spark.yaml new file mode 100644 index 0000000000..f15a393cf8 --- /dev/null +++ b/manager/manifests/spark.yaml @@ -0,0 +1,331 @@ +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spark-operator + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spark-operator + namespace: $CORTEX_NAMESPACE +rules: +- apiGroups: [""] + resources: [pods] + verbs: ["*"] +- apiGroups: [""] + resources: [services, configmaps, secrets] + verbs: [create, get, delete] +- apiGroups: [extensions] + resources: [ingresses] + verbs: [create, get, delete] +- apiGroups: [""] + resources: [nodes] + verbs: [get] +- apiGroups: [""] + resources: [events] + verbs: [create, update, patch] +- apiGroups: [apiextensions.k8s.io] + resources: [customresourcedefinitions] + verbs: [create, get, update, delete] +- apiGroups: [admissionregistration.k8s.io] + resources: [mutatingwebhookconfigurations] + verbs: [create, get, update, delete] +- apiGroups: [sparkoperator.k8s.io] + resources: [sparkapplications, scheduledsparkapplications] + verbs: ["*"] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spark-operator + namespace: $CORTEX_NAMESPACE +subjects: + - kind: ServiceAccount + name: spark-operator + namespace: $CORTEX_NAMESPACE +roleRef: + kind: Role + name: spark-operator + apiGroup: rbac.authorization.k8s.io +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: spark-operator + namespace: $CORTEX_NAMESPACE + labels: + app.kubernetes.io/name: spark-operator + app.kubernetes.io/version: v2.4.0-v1alpha1 +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: spark-operator + app.kubernetes.io/version: v2.4.0-v1alpha1 + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: spark-operator + app.kubernetes.io/version: v2.4.0-v1alpha1 + initializers: + pending: [] + spec: + serviceAccountName: spark-operator + containers: + - name: spark-operator + image: $CORTEX_IMAGE_SPARK_OPERATOR + imagePullPolicy: Always + command: ["/usr/bin/spark-operator"] + args: + - -namespace=$CORTEX_NAMESPACE + - -install-crds=false + - -logtostderr +--- + +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: sparkapplications.sparkoperator.k8s.io +spec: + group: sparkoperator.k8s.io + names: + kind: SparkApplication + listKind: SparkApplicationList + plural: sparkapplications + shortNames: + - sparkapp + singular: sparkapplication + scope: Namespaced + validation: + openAPIV3Schema: + properties: + spec: + properties: + deps: + properties: + downloadTimeout: + minimum: 1 + type: integer + maxSimultaneousDownloads: + minimum: 1 + type: integer + driver: + properties: + cores: + exclusiveMinimum: true + minimum: 0 + type: number + podName: + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' + executor: + properties: + cores: + exclusiveMinimum: true + minimum: 0 + type: number + instances: + minimum: 1 + type: integer + mode: + enum: + - cluster + - client + monitoring: + properties: + prometheus: + properties: + port: + maximum: 49151 + minimum: 1024 + type: integer + pythonVersion: + enum: + - "2" + - "3" + restartPolicy: + properties: + onFailureRetries: + minimum: 0 + type: integer + onFailureRetryInterval: + minimum: 1 + type: integer + onSubmissionFailureRetries: + minimum: 0 + type: integer + onSubmissionFailureRetryInterval: + minimum: 1 + type: integer + type: + enum: + - Never + - OnFailure + - Always + type: + enum: + - Java + - Scala + - Python + - R + version: v1alpha1 +--- + +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: scheduledsparkapplications.sparkoperator.k8s.io +spec: + group: sparkoperator.k8s.io + names: + kind: ScheduledSparkApplication + listKind: ScheduledSparkApplicationList + plural: scheduledsparkapplications + shortNames: + - scheduledsparkapp + singular: scheduledsparkapplication + scope: Namespaced + validation: + openAPIV3Schema: + properties: + spec: + properties: + concurrencyPolicy: + enum: + - Allow + - Forbid + - Replace + failedRunHistoryLimit: + minimum: 1 + type: integer + schedule: + type: string + successfulRunHistoryLimit: + minimum: 1 + type: integer + template: + properties: + deps: + properties: + downloadTimeout: + minimum: 1 + type: integer + maxSimultaneousDownloads: + minimum: 1 + type: integer + driver: + properties: + cores: + exclusiveMinimum: true + minimum: 0 + type: number + podName: + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' + executor: + properties: + cores: + exclusiveMinimum: true + minimum: 0 + type: number + instances: + minimum: 1 + type: integer + mode: + enum: + - cluster + - client + monitoring: + properties: + prometheus: + properties: + port: + maximum: 49151 + minimum: 1024 + type: integer + pythonVersion: + enum: + - "2" + - "3" + restartPolicy: + properties: + onFailureRetries: + minimum: 0 + type: integer + onFailureRetryInterval: + minimum: 1 + type: integer + onSubmissionFailureRetries: + minimum: 0 + type: integer + onSubmissionFailureRetryInterval: + minimum: 1 + type: integer + type: + enum: + - Never + - OnFailure + - Always + type: + enum: + - Java + - Scala + - Python + - R + version: v1alpha1 +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spark + namespace: $CORTEX_NAMESPACE +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spark + namespace: $CORTEX_NAMESPACE +rules: +- apiGroups: + - "" # "" indicates the core API group + resources: [pods] + verbs: ["*"] +- apiGroups: + - "" # "" indicates the core API group + resources: [services] + verbs: ["*"] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spark + namespace: $CORTEX_NAMESPACE +subjects: +- kind: ServiceAccount + name: spark + namespace: $CORTEX_NAMESPACE +roleRef: + kind: Role + name: spark + apiGroup: rbac.authorization.k8s.io diff --git a/manager/uninstall_cortex.sh b/manager/uninstall_cortex.sh new file mode 100755 index 0000000000..692c7ceaea --- /dev/null +++ b/manager/uninstall_cortex.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +echo +eksctl utils write-kubeconfig --name=$CORTEX_CLUSTER + +echo -e "\nUninstalling Cortex ..." + +# Remove finalizers on sparkapplications (they sometimes create deadlocks) +if kubectl get namespace $CORTEX_NAMESPACE >/dev/null 2>&1 && kubectl get customresourcedefinition sparkapplications.sparkoperator.k8s.io >/dev/null 2>&1; then + set +e + kubectl -n=$CORTEX_NAMESPACE get sparkapplications.sparkoperator.k8s.io -o name | xargs -L1 \ + kubectl -n=$CORTEX_NAMESPACE patch -p '{"metadata":{"finalizers": []}}' --type=merge >/dev/null 2>&1 + set -e +fi + +kubectl delete --ignore-not-found=true customresourcedefinition scheduledsparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 +kubectl delete --ignore-not-found=true customresourcedefinition sparkapplications.sparkoperator.k8s.io >/dev/null 2>&1 +kubectl delete --ignore-not-found=true customresourcedefinition workflows.argoproj.io >/dev/null 2>&1 +kubectl delete --ignore-not-found=true namespace $CORTEX_NAMESPACE >/dev/null 2>&1 + +echo "✓ Uninstalled Cortex" diff --git a/manager/uninstall_eks.sh b/manager/uninstall_eks.sh new file mode 100755 index 0000000000..c61369e61c --- /dev/null +++ b/manager/uninstall_eks.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +echo -e "\nSpinning down the cluster ... (this will take a few minutes)" + +echo +eksctl delete cluster --name=$CORTEX_CLUSTER + +echo -e "\n✓ Spun down the cluster" diff --git a/manager/uninstall_operator.sh b/manager/uninstall_operator.sh new file mode 100755 index 0000000000..f7719039dd --- /dev/null +++ b/manager/uninstall_operator.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Copyright 2019 Cortex Labs, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +# Note: if namespace is changed, the old namespace will not be deleted + +echo +eksctl utils write-kubeconfig --name=$CORTEX_CLUSTER + +echo -e "\nUninstalling the Cortex operator ..." + +kubectl -n=$CORTEX_NAMESPACE delete --ignore-not-found=true deployment operator >/dev/null 2>&1 +kubectl -n=$CORTEX_NAMESPACE delete --ignore-not-found=true daemonset fluentd >/dev/null 2>&1 # Pods in DaemonSets cannot be modified + +echo "✓ Uninstalled the Cortex operator" diff --git a/pkg/operator/endpoints/errors.go b/pkg/operator/endpoints/errors.go index 938df26c52..fa93f656c1 100644 --- a/pkg/operator/endpoints/errors.go +++ b/pkg/operator/endpoints/errors.go @@ -118,7 +118,7 @@ func ErrorAuthHeaderMalformed() error { func ErrorAuthAPIError() error { return Error{ Kind: ErrAuthAPIError, - message: "the operator is unable to verify user's credentials using AWS STS; export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY, and run `./cortex-installer.sh update operator` to update the operator's AWS credentials", + message: "the operator is unable to verify user's credentials using AWS STS; export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY, and run `./cortex.sh update operator` to update the operator's AWS credentials", } } diff --git a/pkg/operator/workloads/errors.go b/pkg/operator/workloads/errors.go index 1a952ad377..612d898f30 100644 --- a/pkg/operator/workloads/errors.go +++ b/pkg/operator/workloads/errors.go @@ -107,7 +107,7 @@ func ErrorWorkflowAppMismatch() error { func ErrorCortexInstallationBroken() error { return Error{ Kind: ErrCortexInstallationBroken, - message: "cortex is out of date, or not installed properly on your cluster; run `./cortex-installer.sh uninstall operator && ./cortex-installer.sh install operator`", + message: "cortex is out of date, or not installed properly on your cluster; run `./cortex.sh update`", } }