diff --git a/tests/assets/karpenter/ai-ml-inference-nodepool-xlarge.yaml b/tests/assets/karpenter/ai-ml-inference-nodepool-xlarge.yaml new file mode 100644 index 00000000..a3572c54 --- /dev/null +++ b/tests/assets/karpenter/ai-ml-inference-nodepool-xlarge.yaml @@ -0,0 +1,57 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: ai-ml-inference-xlarge-${AZ} +spec: + disruption: + budgets: + - nodes: 10% + consolidateAfter: 0s + consolidationPolicy: WhenEmptyOrUnderutilized + replicas: 10 + template: + metadata: + labels: + purpose: ml-xlarge + spec: + expireAfter: 720h + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: ai-training + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: node.kubernetes.io/instance-category + operator: In + values: + - m + - r + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - xlarge + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "6" + - key: node.kubernetes.io/instance-type + operator: NotIn + values: + - c7i-flex.xlarge + - c7i.xlarge + - c7a.xlarge \ No newline at end of file diff --git a/tests/assets/karpenter/ai-ml-monitoring-24xlarge.yaml b/tests/assets/karpenter/ai-ml-monitoring-24xlarge.yaml new file mode 100644 index 00000000..3484e947 --- /dev/null +++ b/tests/assets/karpenter/ai-ml-monitoring-24xlarge.yaml @@ -0,0 +1,64 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: ai-ml-monitoring-24xlarge-${AZ} +spec: + disruption: + budgets: + - nodes: 100% + reasons: + - Empty + - nodes: 10% + reasons: + - Drifted + - Underutilized + consolidateAfter: 0s + consolidationPolicy: WhenEmpty + limits: + nodes: "26400" + replicas: 4 + template: + metadata: + labels: + purpose: ml-24xlarge + spec: + expireAfter: 720h0m0s + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: ai-training + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: karpenter.k8s.aws/instance-category + operator: In + values: + - c + - m + - r + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - 24xlarge + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "4" + taints: + - effect: NoSchedule + key: monitoring + value: "true" \ No newline at end of file diff --git a/tests/assets/karpenter/ai-ml-operator-12xlarge.yaml b/tests/assets/karpenter/ai-ml-operator-12xlarge.yaml new file mode 100644 index 00000000..856cb41b --- /dev/null +++ b/tests/assets/karpenter/ai-ml-operator-12xlarge.yaml @@ -0,0 +1,58 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: ai-ml-operator-12xlarge-${AZ} +spec: + disruption: + budgets: + - nodes: 100% + reasons: + - Empty + - nodes: 10% + reasons: + - Drifted + - Underutilized + consolidateAfter: 0s + consolidationPolicy: WhenEmpty + replicas: 5 + template: + metadata: + labels: + purpose: ml-12xlarge + spec: + expireAfter: 720h + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: ai-training + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: node.kubernetes.io/instance-category + operator: In + values: + - c + - m + - r + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - 12xlarge + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "6" \ No newline at end of file diff --git a/tests/assets/karpenter/ai-ml-training-large.yaml b/tests/assets/karpenter/ai-ml-training-large.yaml new file mode 100644 index 00000000..4151d82d --- /dev/null +++ b/tests/assets/karpenter/ai-ml-training-large.yaml @@ -0,0 +1,66 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: ai-ml-training-large-${AZ} +spec: + disruption: + budgets: + - nodes: 100% + reasons: + - Empty + - nodes: 10% + reasons: + - Drifted + - Underutilized + consolidateAfter: 0s + consolidationPolicy: WhenEmpty + limits: + nodes: "26400" + replicas: 1 + template: + metadata: + labels: + drift: drifting-test + purpose: ml-large + spec: + expireAfter: 720h0m0s + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: ai-training + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: karpenter.k8s.aws/instance-category + operator: In + values: + - c + - m + - r + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - medium + - large + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "4" + - key: node.kubernetes.io/instance-type + operator: NotIn + values: + - c7a.medium \ No newline at end of file diff --git a/tests/assets/karpenter/eks-perflab-ai-training-nodeclass.yaml b/tests/assets/karpenter/eks-perflab-ai-training-nodeclass.yaml new file mode 100644 index 00000000..2e84688e --- /dev/null +++ b/tests/assets/karpenter/eks-perflab-ai-training-nodeclass.yaml @@ -0,0 +1,71 @@ +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: ai-training +spec: + amiFamily: AL2023 + amiSelectorTerms: + - alias: "al2023@${ALIAS_VERSION}" + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + volumeSize: 70Gi + volumeType: gp3 + kubelet: + evictionHard: + memory.available: 5% + nodefs.available: 10% + nodefs.inodesFree: 10% + kubeReserved: + cpu: 100m + ephemeral-storage: 1Gi + memory: 100Mi + maxPods: 110 + systemReserved: + cpu: 100m + ephemeral-storage: 1Gi + memory: 100Mi + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + role: KarpenterNodeRole-${CLUSTER_NAME} + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + - tags: + kubernetes.io/cluster/${CLUSTER_NAME}: owned + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + userData: | + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="BOUNDARY" + + --BOUNDARY + Content-Type: application/node.eks.aws + + apiVersion: node.eks.aws/v1alpha1 + kind: NodeConfig + spec: + cluster: + name: ${CLUSTER_NAME} + apiServerEndpoint: ${CLUSTER_ENDPOINT} # Using the actual cluster endpoint + certificateAuthority: ${CLUSTER_CA} + cidr: "172.20.0.0/16" + kubelet: + config: + nodeStatusReportFrequency: "60m" + nodeLeaseDurationSeconds: 120 + maxPods: 110 + clusterDNS: ["172.20.0.10"] + flags: + - --node-labels=karpenter.sh/capacity-type=on-demand,karpenter.sh/nodepool=titan-pool + - --register-with-taints=karpenter.sh/unregistered:NoExecute + --BOUNDARY-- \ No newline at end of file diff --git a/tests/assets/karpenter/eks-perflab-titan-class-nodeclass.yaml b/tests/assets/karpenter/eks-perflab-titan-class-nodeclass.yaml new file mode 100644 index 00000000..de900c33 --- /dev/null +++ b/tests/assets/karpenter/eks-perflab-titan-class-nodeclass.yaml @@ -0,0 +1,65 @@ +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: titan-class +spec: + amiFamily: Custom + amiSelectorTerms: + - alias: "al2023@${ALIAS_VERSION}" + instanceProfile: KarpenterNodeInstanceProfile-${CLUSTER_NAME} + kubelet: + evictionHard: + memory.available: 5% + nodefs.available: 10% + nodefs.inodesFree: 10% + kubeReserved: + cpu: 100m + ephemeral-storage: 1Gi + memory: 100Mi + maxPods: 110 + systemReserved: + cpu: 100m + ephemeral-storage: 1Gi + memory: 100Mi + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + - tags: + kubernetes.io/cluster/${CLUSTER_NAME}: owned + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + - tags: + aws:cloudformation:stack-name: "${CLUSTER_NAME}" + userData: | + MIME-Version: 1.0 + Content-Type: multipart/mixed; boundary="BOUNDARY" + + --BOUNDARY + Content-Type: application/node.eks.aws + + apiVersion: node.eks.aws/v1alpha1 + kind: NodeConfig + spec: + cluster: + name: ${CLUSTER_NAME} + apiServerEndpoint: ${CLUSTER_ENDPOINT} # Using the actual cluster endpoint + certificateAuthority: ${CLUSTER_CA} + cidr: "172.20.0.0/16" + kubelet: + config: + nodeStatusReportFrequency: "60m" + nodeLeaseDurationSeconds: 120 + maxPods: 110 + clusterDNS: ["172.20.0.10"] + flags: + - --node-labels=karpenter.sh/capacity-type=on-demand,karpenter.sh/nodepool=titan-pool + - --register-with-taints=karpenter.sh/unregistered:NoExecute + --BOUNDARY-- \ No newline at end of file diff --git a/tests/assets/karpenter/titan-pool-large.yaml b/tests/assets/karpenter/titan-pool-large.yaml new file mode 100644 index 00000000..69d1ff1c --- /dev/null +++ b/tests/assets/karpenter/titan-pool-large.yaml @@ -0,0 +1,46 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: titan-pool-large-${AZ} +spec: + disruption: + budgets: + - nodes: 10% + consolidateAfter: 0s + consolidationPolicy: WhenEmptyOrUnderutilized + replicas: 10 + template: + spec: + expireAfter: 720h + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: titan-class + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - ${AZ} + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: node.kubernetes.io/instance-category + operator: In + values: + - c + - m + - r + - t + - key: karpenter.k8s.aws/instance-size + operator: In + values: + - large \ No newline at end of file diff --git a/tests/tekton-resources/pipelines/eks/karpenter-ultra.yaml b/tests/tekton-resources/pipelines/eks/karpenter-ultra.yaml index bbaec91b..a6b90bfc 100644 --- a/tests/tekton-resources/pipelines/eks/karpenter-ultra.yaml +++ b/tests/tekton-resources/pipelines/eks/karpenter-ultra.yaml @@ -80,6 +80,8 @@ spec: - default: "" name: aws-account-id type: string + - default: "" + name: slack-hook tasks: - name: awscli-vpc-create params: