|
| 1 | +--- |
| 2 | +apiVersion: tekton.dev/v1beta1 |
| 3 | +kind: Task |
| 4 | +metadata: |
| 5 | + name: load-networking |
| 6 | + namespace: scalability |
| 7 | +spec: |
| 8 | + description: "clusterloader2 task to run various types of cl2 tests on a given cluster." |
| 9 | + params: |
| 10 | + - name: giturl |
| 11 | + description: "git url to clone the package" |
| 12 | + default: https://github.com/mengqiy/perf-tests.git |
| 13 | + - name: cl2-branch |
| 14 | + description: "The branch of clusterloader2 you want to use" |
| 15 | + default: "master" |
| 16 | + - name: nodes-per-namespace |
| 17 | + description: "nodes per namespace to get created for load test " |
| 18 | + default: "100" |
| 19 | + - name: cl2-load-test-throughput |
| 20 | + description: " throughput used for mutate operations" |
| 21 | + default: "15" |
| 22 | + - name: pods-per-node |
| 23 | + description: "pod density" |
| 24 | + default: "10" |
| 25 | + - name: nodes |
| 26 | + description: "number of dataplane nodes to run the load test against" |
| 27 | + default: "1000" |
| 28 | + - name: results-bucket |
| 29 | + description: "Results bucket with path of s3 to upload results" |
| 30 | + - name: region |
| 31 | + default: "us-west-2" |
| 32 | + description: The region where the cluster is in. |
| 33 | + - name: cluster-name |
| 34 | + description: The name of the EKS cluster you want to spin. |
| 35 | + - name: amp-workspace-id |
| 36 | + description: The AMP workspace ID where remote write needs to happen. |
| 37 | + default: "" |
| 38 | + - name: networking-test-config-url |
| 39 | + default: "https://raw.githubusercontent.com/awslabs/kubernetes-iteration-toolkit/main/tests/assets/eks-networking/config-eks-networking.yaml" |
| 40 | + - name: networking-svc-test-config-url |
| 41 | + default: "https://raw.githubusercontent.com/awslabs/kubernetes-iteration-toolkit/main/tests/assets/eks-networking/test-svc.yaml" |
| 42 | + results: |
| 43 | + - name: datapoint |
| 44 | + description: Stores the CL2 result that can be consumed by other tasks (e.g. cloudwatch) |
| 45 | + - name: s3_result |
| 46 | + description: Stores the S3 result path after compute |
| 47 | + workspaces: |
| 48 | + - name: source |
| 49 | + mountPath: /src/k8s.io/ |
| 50 | + - name: results |
| 51 | + - name: config |
| 52 | + mountPath: /config/ |
| 53 | + stepTemplate: |
| 54 | + env: |
| 55 | + - name: KUBECONFIG |
| 56 | + value: /config/kubeconfig |
| 57 | + steps: |
| 58 | + - name: git-clone |
| 59 | + image: alpine/git |
| 60 | + workingDir: $(workspaces.source.path) |
| 61 | + script: | |
| 62 | + git clone $(params.giturl) |
| 63 | + cd $(workspaces.source.path)/perf-tests/ |
| 64 | + git fetch origin --verbose --tags |
| 65 | + git checkout $(params.cl2-branch) |
| 66 | + git branch |
| 67 | + - name: prepare-loadtest |
| 68 | + image: golang:1.24 |
| 69 | + workingDir: $(workspaces.source.path) |
| 70 | + script: | |
| 71 | + S3_RESULT_PATH=$(params.results-bucket) |
| 72 | + echo $S3_RESULT_PATH > $(results.s3_result.path) |
| 73 | + echo "S3 Path: $S3_RESULT_PATH" |
| 74 | + cat > "$(workspaces.source.path)/overrides.yaml" <<EOL |
| 75 | + NODES_PER_NAMESPACE: $(params.nodes-per-namespace) |
| 76 | + PODS_PER_NODE: $(params.pods-per-node) |
| 77 | + NODE_MODE: master |
| 78 | + # DNS test settings |
| 79 | + CL2_ENABLE_DNSTESTS: true |
| 80 | + CL2_USE_ADVANCED_DNSTEST: true |
| 81 | +
|
| 82 | + # Note: default setting for dns client pod |
| 83 | + # bydefault the tester create 5 dns client pods |
| 84 | + # additionally, 1 extra DNS client pod is created for every 100 nodes in the cluster |
| 85 | + CL2_DNS_K8S_HOSTNAMES_PER_CLIENT_QPS: 10 |
| 86 | + CL2_DNS_K8S_HOSTNAMES_CLIENT_PODS_FACTOR: 1 |
| 87 | + CL2_DNS_K8S_HOSTNAMES_TEST_MINUTES: 1 |
| 88 | +
|
| 89 | + # DNS SLOs, ignore the DNS error for now since the dnsperfgo contains non-existing FQDNs |
| 90 | + CL2_DNS_LOOKUP_LATENCY_99_THRESHOLD: 60 |
| 91 | + DNS_ERROR_PERC_THRESHOLD: 100 |
| 92 | +
|
| 93 | + # KubeProxy SLOs |
| 94 | + CL2_ENABLE_VIOLATIONS_FOR_KUBEPROXY_PROGRAMMING_LATENCIES: true |
| 95 | + CL2_NETWORK_LATENCY_THRESHOLD: 20s |
| 96 | + CL2_NETWORK_PROGRAMMING_LATENCY_THRESHOLD: 300s |
| 97 | +
|
| 98 | + CL2_PROMETHEUS_NODE_SELECTOR: "eks.amazonaws.com/nodegroup: monitoring-$(params.cluster-name)-nodes-1" |
| 99 | + CL2_PROMETHEUS_MEMORY_SCALE_FACTOR: 4 |
| 100 | + EOL |
| 101 | + cat $(workspaces.source.path)/overrides.yaml |
| 102 | + cp $(workspaces.source.path)/overrides.yaml $(workspaces.results.path)/overrides.yaml |
| 103 | + |
| 104 | + # Enable Prometheus if the remote workspace id is provided |
| 105 | + if [ -n "$(params.amp-workspace-id)" ]; then |
| 106 | + cat << EOF >> $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/prometheus-prometheus.yaml |
| 107 | + containers: |
| 108 | + - name: aws-sigv4-proxy-sidecar |
| 109 | + image: public.ecr.aws/aws-observability/aws-sigv4-proxy:1.0 |
| 110 | + args: |
| 111 | + - --name |
| 112 | + - aps |
| 113 | + - --region |
| 114 | + - $(params.region) |
| 115 | + - --host |
| 116 | + - aps-workspaces.$(params.region).amazonaws.com |
| 117 | + - --port |
| 118 | + - :8005 |
| 119 | + ports: |
| 120 | + - name: aws-sigv4-proxy |
| 121 | + containerPort: 8005 |
| 122 | + remoteWrite: |
| 123 | + - url: http://localhost:8005/workspaces/$(params.amp-workspace-id)/api/v1/remote_write |
| 124 | + queueConfig: |
| 125 | + capacity: 2500 |
| 126 | + maxSamplesPerSend: 1000 |
| 127 | + maxShards: 200 |
| 128 | + externalLabels: |
| 129 | + cluster_name: $(params.cluster-name) |
| 130 | + s3_path: $S3_RESULT_PATH |
| 131 | + EOF |
| 132 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/prometheus-prometheus.yaml |
| 133 | + cat << EOF >> $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/0prometheus-operator-deployment.yaml |
| 134 | + tolerations: |
| 135 | + - key: monitoring |
| 136 | + operator: Exists |
| 137 | + effect: NoSchedule |
| 138 | + EOF |
| 139 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/0prometheus-operator-deployment.yaml |
| 140 | + # schedule kube-state-pod onto the same node as prometheus |
| 141 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/exporters/kube-state-metrics/deployment.yaml |
| 142 | + cat << EOF >> $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/exporters/kube-state-metrics/deployment.yaml |
| 143 | + tolerations: |
| 144 | + - key: monitoring |
| 145 | + operator: Exists |
| 146 | + effect: NoSchedule |
| 147 | + EOF |
| 148 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/pkg/prometheus/manifests/exporters/kube-state-metrics/deployment.yaml |
| 149 | +
|
| 150 | + fi |
| 151 | + # Building clusterloader2 binary |
| 152 | + cd $(workspaces.source.path)/perf-tests/clusterloader2/ |
| 153 | + GOOS=linux CGO_ENABLED=0 go build -v -o ./clusterloader ./cmd |
| 154 | + - name: run-loadtest |
| 155 | + image: alpine/k8s:1.30.2 |
| 156 | + onError: continue |
| 157 | + script: | |
| 158 | + #!/bin/bash |
| 159 | + if [ -n "$(params.amp-workspace-id)" ]; then |
| 160 | + # Enable prometheus flags |
| 161 | + export ENABLE_PROMETHEUS_SERVER=true |
| 162 | + export PROMETHEUS_PVC_STORAGE_CLASS=gp2 |
| 163 | + export PROMETHEUS_SCRAPE_KUBE_PROXY=true |
| 164 | + export PROMETHEUS_SCRAPE_APISERVER_ONLY=true |
| 165 | + export PROMETHEUS_SCRAPE_KUBE_STATE_METRICS=false |
| 166 | + export PROMETHEUS_KUBE_PROXY_SELECTOR_KEY=k8s-app |
| 167 | + export PROMETHEUS_MEMORY_REQUEST=16Gi |
| 168 | + fi |
| 169 | +
|
| 170 | + # prepare eks networking load test config |
| 171 | + # copy networking config under cl2 folder since the test needs to access the modules under the same folder |
| 172 | + curl -s $(params.networking-test-config-url) -o $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config-eks-networking.yaml |
| 173 | + curl -s $(params.networking-svc-test-config-url) -o $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/test-svc.yaml |
| 174 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config-eks-networking.yaml |
| 175 | + cat $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/test-svc.yaml |
| 176 | + cd $(workspaces.source.path)/perf-tests/clusterloader2/ |
| 177 | +
|
| 178 | + # create the service backed by 5k pods to test kubeproxy network programming performance |
| 179 | + # we can tune the scale of pods later |
| 180 | + kubectl apply -f $(workspaces.source.path)/perf-tests/clusterloader2/testing/load/test-svc.yaml |
| 181 | + kubectl rollout status deployment/test-svc-deployment -n test-svc --timeout=300s |
| 182 | +
|
| 183 | + # run the CL2 test suite for dns performance test |
| 184 | + ENABLE_EXEC_SERVICE=false ./clusterloader --kubeconfig=$KUBECONFIG --testconfig=$(workspaces.source.path)/perf-tests/clusterloader2/testing/load/config-eks-networking.yaml --testoverrides=$(workspaces.source.path)/overrides.yaml --nodes=$(params.nodes) --provider=eks --report-dir=$(workspaces.results.path) --alsologtostderr --v=2 |
| 185 | + exit_code=$? |
| 186 | + if [ $exit_code -eq 0 ]; then |
| 187 | + echo "1" | tee $(results.datapoint.path) |
| 188 | + else |
| 189 | + echo "0" | tee $(results.datapoint.path) |
| 190 | + fi |
| 191 | + exit $exit_code |
| 192 | + timeout: 30000s |
| 193 | + - name: upload-results |
| 194 | + image: amazon/aws-cli |
| 195 | + workingDir: $(workspaces.results.path) |
| 196 | + script: | |
| 197 | + S3_RESULT_PATH=$(cat $(results.s3_result.path)) |
| 198 | + echo "S3 Path: $S3_RESULT_PATH" |
| 199 | + aws sts get-caller-identity |
| 200 | + # we expect to see all files from loadtest that clusterloader2 outputs here in this dir |
| 201 | + ls -larth |
| 202 | + aws s3 cp . s3://$S3_RESULT_PATH/ --recursive |
0 commit comments