From f113a3510815ce3b2714825c317fb369618939a4 Mon Sep 17 00:00:00 2001 From: vishal Date: Mon, 29 Mar 2021 13:47:39 -0400 Subject: [PATCH 1/2] Generate ami mapping and use it in cluster up --- build/generate_ami_mapping.go | 296 ++++++++++++++++++++++++++++++++++ manager/generate_eks.py | 27 +++- manager/install.sh | 2 +- manager/manifests/ami.json | 88 ++++++++++ 4 files changed, 406 insertions(+), 7 deletions(-) create mode 100644 build/generate_ami_mapping.go create mode 100644 manager/manifests/ami.json diff --git a/build/generate_ami_mapping.go b/build/generate_ami_mapping.go new file mode 100644 index 0000000000..541e6b14ee --- /dev/null +++ b/build/generate_ami_mapping.go @@ -0,0 +1,296 @@ +/* +Copyright 2021 Cortex Labs, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + "sort" + "time" + + "github.com/pkg/errors" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" +) + +// copied from https://github.com/weaveworks/eksctl/blob/c211e68d3c8cf3c7f800768bfa0251dda17e011c/pkg/apis/eksctl.io/v1alpha5/types.go +// most of this code can be removed once eksctl can be imported: https://github.com/weaveworks/eksctl/issues/813 +const ( + eksResourceAccountStandard = "602401143452" + + // eksResourceAccountAPEast1 defines the AWS EKS account ID that provides node resources in ap-east-1 region + eksResourceAccountAPEast1 = "800184023465" + + // eksResourceAccountMESouth1 defines the AWS EKS account ID that provides node resources in me-south-1 region + eksResourceAccountMESouth1 = "558608220178" + + // eksResourceAccountCNNorthWest1 defines the AWS EKS account ID that provides node resources in cn-northwest-1 region + eksResourceAccountCNNorthWest1 = "961992271922" + + // eksResourceAccountCNNorth1 defines the AWS EKS account ID that provides node resources in cn-north-1 + eksResourceAccountCNNorth1 = "918309763551" + + // eksResourceAccountAFSouth1 defines the AWS EKS account ID that provides node resources in af-south-1 + eksResourceAccountAFSouth1 = "877085696533" + + // eksResourceAccountEUSouth1 defines the AWS EKS account ID that provides node resources in eu-south-1 + eksResourceAccountEUSouth1 = "590381155156" + + // eksResourceAccountUSGovWest1 defines the AWS EKS account ID that provides node resources in us-gov-west-1 + eksResourceAccountUSGovWest1 = "013241004608" + + // eksResourceAccountUSGovEast1 defines the AWS EKS account ID that provides node resources in us-gov-east-1 + eksResourceAccountUSGovEast1 = "151742754352" +) + +// Regions +const ( + // RegionUSWest1 represents the US West Region North California + RegionUSWest1 = "us-west-1" + + // RegionUSWest2 represents the US West Region Oregon + RegionUSWest2 = "us-west-2" + + // RegionUSEast1 represents the US East Region North Virginia + RegionUSEast1 = "us-east-1" + + // RegionUSEast2 represents the US East Region Ohio + RegionUSEast2 = "us-east-2" + + // RegionCACentral1 represents the Canada Central Region + RegionCACentral1 = "ca-central-1" + + // RegionEUWest1 represents the EU West Region Ireland + RegionEUWest1 = "eu-west-1" + + // RegionEUWest2 represents the EU West Region London + RegionEUWest2 = "eu-west-2" + + // RegionEUWest3 represents the EU West Region Paris + RegionEUWest3 = "eu-west-3" + + // RegionEUNorth1 represents the EU North Region Stockholm + RegionEUNorth1 = "eu-north-1" + + // RegionEUCentral1 represents the EU Central Region Frankfurt + RegionEUCentral1 = "eu-central-1" + + // RegionEUSouth1 represents te Eu South Region Milan + RegionEUSouth1 = "eu-south-1" + + // RegionAPNorthEast1 represents the Asia-Pacific North East Region Tokyo + RegionAPNorthEast1 = "ap-northeast-1" + + // RegionAPNorthEast2 represents the Asia-Pacific North East Region Seoul + RegionAPNorthEast2 = "ap-northeast-2" + + // RegionAPNorthEast3 represents the Asia-Pacific North East region Osaka + RegionAPNorthEast3 = "ap-northeast-3" + + // RegionAPSouthEast1 represents the Asia-Pacific South East Region Singapore + RegionAPSouthEast1 = "ap-southeast-1" + + // RegionAPSouthEast2 represents the Asia-Pacific South East Region Sydney + RegionAPSouthEast2 = "ap-southeast-2" + + // RegionAPSouth1 represents the Asia-Pacific South Region Mumbai + RegionAPSouth1 = "ap-south-1" + + // RegionAPEast1 represents the Asia Pacific Region Hong Kong + RegionAPEast1 = "ap-east-1" + + // RegionMESouth1 represents the Middle East Region Bahrain + RegionMESouth1 = "me-south-1" + + // RegionSAEast1 represents the South America Region Sao Paulo + RegionSAEast1 = "sa-east-1" + + // RegionAFSouth1 represents the Africa Region Cape Town + RegionAFSouth1 = "af-south-1" + + // RegionCNNorthwest1 represents the China region Ningxia + RegionCNNorthwest1 = "cn-northwest-1" + + // RegionCNNorth1 represents the China region Beijing + RegionCNNorth1 = "cn-north-1" + + // RegionUSGovWest1 represents the region GovCloud (US-West) + RegionUSGovWest1 = "us-gov-west-1" + + // RegionUSGovEast1 represents the region GovCloud (US-East) + RegionUSGovEast1 = "us-gov-east-1" + + // DefaultRegion defines the default region, where to deploy the EKS cluster + DefaultRegion = RegionUSWest2 +) + +// SupportedRegions are the regions where EKS is available +func SupportedRegions() []string { + return []string{ + RegionUSWest1, + RegionUSWest2, + RegionUSEast1, + RegionUSEast2, + RegionCACentral1, + RegionEUWest1, + RegionEUWest2, + RegionEUWest3, + RegionEUNorth1, + RegionEUCentral1, + RegionEUSouth1, + RegionAPNorthEast1, + RegionAPNorthEast2, + RegionAPNorthEast3, + RegionAPSouthEast1, + RegionAPSouthEast2, + RegionAPSouth1, + RegionAPEast1, + RegionMESouth1, + RegionSAEast1, + RegionAFSouth1, + // RegionCNNorthwest1, + // RegionCNNorth1, + // RegionUSGovWest1, + // RegionUSGovEast1, + } +} + +func EKSResourceAccountID(region string) string { + switch region { + case RegionAPEast1: + return eksResourceAccountAPEast1 + case RegionMESouth1: + return eksResourceAccountMESouth1 + case RegionCNNorthwest1: + return eksResourceAccountCNNorthWest1 + case RegionCNNorth1: + return eksResourceAccountCNNorth1 + case RegionUSGovWest1: + return eksResourceAccountUSGovWest1 + case RegionUSGovEast1: + return eksResourceAccountUSGovEast1 + case RegionAFSouth1: + return eksResourceAccountAFSouth1 + case RegionEUSouth1: + return eksResourceAccountEUSouth1 + default: + return eksResourceAccountStandard + } +} + +func main() { + destFile := mustExtractArg() + k8sVersion := "1.18" + regions := map[string]map[string]string{} + for _, region := range SupportedRegions() { + fmt.Print(region) + sess := session.New(&aws.Config{Region: aws.String(region)}) + svc := ec2.New(sess) + cpuAMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-node-%s-v*", k8sVersion)) + if err != nil { + log.Fatal(err.Error()) + } + acceleratedAMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-gpu-node-%s-v*", k8sVersion)) + if err != nil { + log.Fatal(err.Error()) + } + regions[region] = map[string]string{ + "cpu": cpuAMI, + "accelerated": acceleratedAMI, + } + fmt.Println(" ✓") + } + + k8sVersionMap := map[string]interface{}{} + k8sVersionMap[k8sVersion] = regions + marshalledBytes, err := json.MarshalIndent(k8sVersionMap, "", "\t") + if err != nil { + log.Fatal(err.Error()) + } + + err = ioutil.WriteFile(destFile, marshalledBytes, 0664) + if err != nil { + log.Fatal(err.Error()) + } +} + +func FindImage(ec2api ec2iface.EC2API, ownerAccount, namePattern string) (string, error) { + input := &ec2.DescribeImagesInput{ + Owners: []*string{&ownerAccount}, + Filters: []*ec2.Filter{ + { + Name: aws.String("name"), + Values: []*string{&namePattern}, + }, + { + Name: aws.String("virtualization-type"), + Values: []*string{aws.String("hvm")}, + }, + { + Name: aws.String("root-device-type"), + Values: []*string{aws.String("ebs")}, + }, + { + Name: aws.String("is-public"), + Values: []*string{aws.String("true")}, + }, + { + Name: aws.String("state"), + Values: []*string{aws.String("available")}, + }, + }, + } + + output, err := ec2api.DescribeImages(input) + if err != nil { + return "", errors.Wrapf(err, "error querying AWS for images") + } + + if len(output.Images) < 1 { + return "", nil + } + + if len(output.Images) == 1 { + return *output.Images[0].ImageId, nil + } + + // Sort images so newest is first + sort.Slice(output.Images, func(i, j int) bool { + //nolint:gosec + creationLeft, _ := time.Parse(time.RFC3339, *output.Images[i].CreationDate) + //nolint:gosec + creationRight, _ := time.Parse(time.RFC3339, *output.Images[j].CreationDate) + return creationLeft.After(creationRight) + }) + + return *output.Images[0].ImageId, nil +} + +func mustExtractArg() string { + if len(os.Args) != 2 { + fmt.Println("usage: go run generate_ami_mapping.go ") + os.Exit(1) + } + + return os.Args[1] +} diff --git a/manager/generate_eks.py b/manager/generate_eks.py index 3dd59df684..8ddef0973f 100644 --- a/manager/generate_eks.py +++ b/manager/generate_eks.py @@ -15,13 +15,14 @@ import sys import yaml +import json +K8S_VERSION = "1.18" # kubelet config schema: # https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go def default_nodegroup(cluster_config): return { - "ami": "auto", "iam": { "withAddonPolicies": {"autoScaler": True}, "attachPolicyARNs": [ @@ -161,10 +162,12 @@ def get_inf_resources(instance_type): return num_chips, f"{128 * num_chips}Mi" -def get_all_worker_nodegroups(cluster_config: dict) -> list: +def get_all_worker_nodegroups(ami_map: dict, cluster_config: dict) -> list: worker_nodegroups = [] for ng in cluster_config["node_groups"]: worker_nodegroup = default_nodegroup(cluster_config) + worker_nodegroup["ami"] = get_ami(ami_map, ng["instance_type"]) + apply_worker_settings(worker_nodegroup, ng) apply_clusterconfig(worker_nodegroup, ng) @@ -182,12 +185,24 @@ def get_all_worker_nodegroups(cluster_config: dict) -> list: return worker_nodegroups -def generate_eks(cluster_config_path): +def get_ami(ami_map: dict, instance_type: str) -> str: + if is_gpu(instance_type) or is_inf(instance_type): + return ami_map["accelerated"] + return ami_map["cpu"] + + +def generate_eks(cluster_config_path, ami_json_path): with open(cluster_config_path, "r") as f: cluster_config = yaml.safe_load(f) + region = cluster_config["region"] + + with open(ami_json_path, "r") as f: + ami_map = json.load(f)[K8S_VERSION][region] + operator_nodegroup = default_nodegroup(cluster_config) operator_settings = { + "ami": get_ami(ami_map, "t3.medium"), "name": "cx-operator", "instanceType": "t3.medium", "minSize": 2, @@ -196,7 +211,7 @@ def generate_eks(cluster_config_path): } operator_nodegroup = merge_override(operator_nodegroup, operator_settings) - worker_nodegroups = get_all_worker_nodegroups(cluster_config) + worker_nodegroups = get_all_worker_nodegroups(ami_map, cluster_config) nat_gateway = "Disable" if cluster_config["nat_gateway"] == "single": @@ -210,7 +225,7 @@ def generate_eks(cluster_config_path): "metadata": { "name": cluster_config["cluster_name"], "region": cluster_config["region"], - "version": "1.18", + "version": K8S_VERSION, "tags": cluster_config["tags"], }, "vpc": {"nat": {"gateway": nat_gateway}}, @@ -252,4 +267,4 @@ def ignore_aliases(self, data): if __name__ == "__main__": - generate_eks(cluster_config_path=sys.argv[1]) + generate_eks(cluster_config_path=sys.argv[1], ami_json_path=sys.argv[2]) diff --git a/manager/install.sh b/manager/install.sh index 2206d4a88c..f623938a34 100755 --- a/manager/install.sh +++ b/manager/install.sh @@ -147,7 +147,7 @@ function create_eks() { fi echo -e "○ spinning up the cluster (this will take about 25 minutes) ...\n" - python generate_eks.py $CORTEX_CLUSTER_CONFIG_FILE > /workspace/eks.yaml + python generate_eks.py $CORTEX_CLUSTER_CONFIG_FILE manifests/ami.json > /workspace/eks.yaml eksctl create cluster --timeout=$EKSCTL_TIMEOUT --install-neuron-plugin=false --install-nvidia-plugin=false -f /workspace/eks.yaml echo diff --git a/manager/manifests/ami.json b/manager/manifests/ami.json new file mode 100644 index 0000000000..d090b81550 --- /dev/null +++ b/manager/manifests/ami.json @@ -0,0 +1,88 @@ +{ + "1.18": { + "af-south-1": { + "accelerated": "ami-0449e43458923407e", + "cpu": "ami-0114a922955a0b9aa" + }, + "ap-east-1": { + "accelerated": "ami-097271e14e95f0068", + "cpu": "ami-0ba7eff4ab9afaaca" + }, + "ap-northeast-1": { + "accelerated": "ami-06a0016bf0b1dc9fb", + "cpu": "ami-0c4f7ed0f96c7d333" + }, + "ap-northeast-2": { + "accelerated": "ami-03e9c915844970121", + "cpu": "ami-0471e35b27cfeef14" + }, + "ap-northeast-3": { + "accelerated": "ami-039479e380a32721b", + "cpu": "ami-086e0e9bd40db60f5" + }, + "ap-south-1": { + "accelerated": "ami-0cb31a5b6c3ac4cb6", + "cpu": "ami-0c9005c1080938019" + }, + "ap-southeast-1": { + "accelerated": "ami-0a52f540d258f9673", + "cpu": "ami-001789b7865972490" + }, + "ap-southeast-2": { + "accelerated": "ami-053dfd9f640f65168", + "cpu": "ami-00fad97745b2d2c64" + }, + "ca-central-1": { + "accelerated": "ami-035ba6d09c540290f", + "cpu": "ami-0ca9d348267a8c65d" + }, + "eu-central-1": { + "accelerated": "ami-0d42212d35cc8d237", + "cpu": "ami-0f85d2eeb0bea62a7" + }, + "eu-north-1": { + "accelerated": "ami-016de826d1d553d1b", + "cpu": "ami-05dc6dcd932a8159e" + }, + "eu-south-1": { + "accelerated": "ami-0ce76bb81c438e3b9", + "cpu": "ami-0ca535994b7de3e69" + }, + "eu-west-1": { + "accelerated": "ami-023b929963e20eff3", + "cpu": "ami-016c2a4f422f847a2" + }, + "eu-west-2": { + "accelerated": "ami-0ccc26465f00e100d", + "cpu": "ami-08b9b0904b1d53903" + }, + "eu-west-3": { + "accelerated": "ami-0ca7fdd012fc03b73", + "cpu": "ami-09198b441d6e41f77" + }, + "me-south-1": { + "accelerated": "ami-0d109448b9811cc5d", + "cpu": "ami-0801d12e832ce09f7" + }, + "sa-east-1": { + "accelerated": "ami-069a705625dac028c", + "cpu": "ami-01a65422c4c57f063" + }, + "us-east-1": { + "accelerated": "ami-089f9a2a14c92f7e3", + "cpu": "ami-09ba20e934af98ddf" + }, + "us-east-2": { + "accelerated": "ami-0da728680ca5ee572", + "cpu": "ami-0576aabae1709e005" + }, + "us-west-1": { + "accelerated": "ami-085fdad868b9007dd", + "cpu": "ami-09bec0a8c8d4925a6" + }, + "us-west-2": { + "accelerated": "ami-0039397c9cf7f00ce", + "cpu": "ami-01f2f1180914aad2b" + } + } +} \ No newline at end of file From daa54b64a61dc3647cf90c45f67e8b5ae9ea787d Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Mon, 29 Mar 2021 12:02:53 -0700 Subject: [PATCH 2/2] Misc --- build/generate_ami_mapping.go | 3 +++ build/lint.sh | 13 +++++++++++++ manager/manifests/ami.json | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/build/generate_ami_mapping.go b/build/generate_ami_mapping.go index 541e6b14ee..d3f38f027a 100644 --- a/build/generate_ami_mapping.go +++ b/build/generate_ami_mapping.go @@ -33,6 +33,7 @@ import ( "github.com/aws/aws-sdk-go/service/ec2/ec2iface" ) +// run with `go run build/generate_ami_mapping.go manager/manifests/ami.json` // copied from https://github.com/weaveworks/eksctl/blob/c211e68d3c8cf3c7f800768bfa0251dda17e011c/pkg/apis/eksctl.io/v1alpha5/types.go // most of this code can be removed once eksctl can be imported: https://github.com/weaveworks/eksctl/issues/813 const ( @@ -228,6 +229,8 @@ func main() { log.Fatal(err.Error()) } + marshalledBytes = append(marshalledBytes, []byte("\n")...) + err = ioutil.WriteFile(destFile, marshalledBytes, 0664) if err != nil { log.Fatal(err.Error()) diff --git a/build/lint.sh b/build/lint.sh index bbdc70bc13..442a4a01d5 100755 --- a/build/lint.sh +++ b/build/lint.sh @@ -85,6 +85,8 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "**/.idea/*" \ ! -path "**/.history/*" \ ! -path "**/__pycache__/*" \ +! -path "**/.pytest_cache/*" \ +! -path "**.egg-info/*" \ ! -path "./test/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ @@ -94,6 +96,7 @@ output=$(cd "$ROOT" && find . -type f \ ! -name "*requirements.txt" \ ! -name "go.*" \ ! -name "*.md" \ +! -name "*.json" \ ! -name ".*" \ ! -name "*.bin" \ ! -name "Dockerfile" \ @@ -113,6 +116,8 @@ if [ "$is_release_branch" = "true" ]; then ! -path "**/.idea/*" \ ! -path "**/.history/*" \ ! -path "**/__pycache__/*" \ + ! -path "**/.pytest_cache/*" \ + ! -path "**.egg-info/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.git/*" \ @@ -134,6 +139,8 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "**/.history/*" \ ! -path "**/.vscode/*" \ ! -path "**/__pycache__/*" \ +! -path "**/.pytest_cache/*" \ +! -path "**.egg-info/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.git/*" \ @@ -154,6 +161,8 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "**/.history/*" \ ! -path "**/.vscode/*" \ ! -path "**/__pycache__/*" \ +! -path "**/.pytest_cache/*" \ +! -path "**.egg-info/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.git/*" \ @@ -174,6 +183,8 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "**/.idea/*" \ ! -path "**/.history/*" \ ! -path "**/__pycache__/*" \ +! -path "**/.pytest_cache/*" \ +! -path "**.egg-info/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.git/*" \ @@ -194,6 +205,8 @@ output=$(cd "$ROOT" && find . -type f \ ! -path "**/.history/*" \ ! -path "**/.vscode/*" \ ! -path "**/__pycache__/*" \ +! -path "**/.pytest_cache/*" \ +! -path "**.egg-info/*" \ ! -path "./dev/config/*" \ ! -path "./bin/*" \ ! -path "./.git/*" \ diff --git a/manager/manifests/ami.json b/manager/manifests/ami.json index d090b81550..41e72375ed 100644 --- a/manager/manifests/ami.json +++ b/manager/manifests/ami.json @@ -85,4 +85,4 @@ "cpu": "ami-01f2f1180914aad2b" } } -} \ No newline at end of file +}