From 57a4073a795852473869a3be78d832016684c890 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 8 Jul 2024 13:12:57 -0700 Subject: [PATCH 01/42] Add code implementation of resource deploying scripts --- .../suite/manifests/reconfig/cafe-routes.yaml | 57 +++++++ tests/suite/manifests/reconfig/cafe.yaml | 65 ++++++++ .../certificate-ns-and-cafe-secret.yaml | 14 ++ tests/suite/manifests/reconfig/gateway.yaml | 25 +++ .../manifests/reconfig/reference-grant.yaml | 14 ++ tests/suite/reconfig_test.go | 145 ++++++++++++++++++ 6 files changed, 320 insertions(+) create mode 100644 tests/suite/manifests/reconfig/cafe-routes.yaml create mode 100644 tests/suite/manifests/reconfig/cafe.yaml create mode 100644 tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml create mode 100644 tests/suite/manifests/reconfig/gateway.yaml create mode 100644 tests/suite/manifests/reconfig/reference-grant.yaml create mode 100644 tests/suite/reconfig_test.go diff --git a/tests/suite/manifests/reconfig/cafe-routes.yaml b/tests/suite/manifests/reconfig/cafe-routes.yaml new file mode 100644 index 0000000000..006a8eba92 --- /dev/null +++ b/tests/suite/manifests/reconfig/cafe-routes.yaml @@ -0,0 +1,57 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: cafe-tls-redirect +spec: + parentRefs: + - name: gateway + namespace: default + sectionName: http + hostnames: + - "cafe.example.com" + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + port: 443 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: coffee +spec: + parentRefs: + - name: gateway + namespace: default + sectionName: https + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /coffee + backendRefs: + - name: coffee + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: tea +spec: + parentRefs: + - name: gateway + sectionName: https + namespace: default + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /tea + backendRefs: + - name: tea + port: 80 diff --git a/tests/suite/manifests/reconfig/cafe.yaml b/tests/suite/manifests/reconfig/cafe.yaml new file mode 100644 index 0000000000..2d03ae59ff --- /dev/null +++ b/tests/suite/manifests/reconfig/cafe.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coffee +spec: + replicas: 1 + selector: + matchLabels: + app: coffee + template: + metadata: + labels: + app: coffee + spec: + containers: + - name: coffee + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: coffee +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: coffee +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tea +spec: + replicas: 1 + selector: + matchLabels: + app: tea + template: + metadata: + labels: + app: tea + spec: + containers: + - name: tea + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: tea +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: tea diff --git a/tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml b/tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml new file mode 100644 index 0000000000..d4037e2d67 --- /dev/null +++ b/tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: certificate +--- +apiVersion: v1 +kind: Secret +metadata: + name: cafe-secret + namespace: certificate +type: kubernetes.io/tls +data: + tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/suite/manifests/reconfig/gateway.yaml b/tests/suite/manifests/reconfig/gateway.yaml new file mode 100644 index 0000000000..fd9d52675b --- /dev/null +++ b/tests/suite/manifests/reconfig/gateway.yaml @@ -0,0 +1,25 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: gateway +spec: + gatewayClassName: nginx + listeners: + - name: http + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: "All" + - name: https + port: 443 + protocol: HTTPS + allowedRoutes: + namespaces: + from: "All" + tls: + mode: Terminate + certificateRefs: + - kind: Secret + name: cafe-secret + namespace: certificate diff --git a/tests/suite/manifests/reconfig/reference-grant.yaml b/tests/suite/manifests/reconfig/reference-grant.yaml new file mode 100644 index 0000000000..053bbbdcc2 --- /dev/null +++ b/tests/suite/manifests/reconfig/reference-grant.yaml @@ -0,0 +1,14 @@ +apiVersion: gateway.networking.k8s.io/v1beta1 +kind: ReferenceGrant +metadata: + name: access-to-cafe-secret + namespace: certificate +spec: + to: + - group: "" + kind: Secret + name: cafe-secret # if you omit this name, then Gateways in default ns can access all Secrets in the certificate ns + from: + - group: gateway.networking.k8s.io + kind: Gateway + namespace: default diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go new file mode 100644 index 0000000000..42097cfa24 --- /dev/null +++ b/tests/suite/reconfig_test.go @@ -0,0 +1,145 @@ +package suite + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + + "sigs.k8s.io/controller-runtime/pkg/client" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("Reconfiguration Performance Testing", Label("reconfiguration"), func() { + var () + + BeforeEach(func() { + }) + + AfterEach(func() { + teardown(releaseName) + }) + + It("test 1", func() { + Expect(createResourcesGWLast(30)).To(Succeed()) + }) + + It("test 2", func() { + Expect(createResourcesRoutesLast(30)).To(Succeed()) + }) +}) + +func createResourcesGWLast(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + for i := 1; i <= resourceCount; i++ { + ns := v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) + } + + ns := v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconfig", + }, + } + Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles( + []string{ + "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/reference-grant.yaml", + }, + ns.Name)).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + + time.Sleep(60 * time.Second) + + Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, ns.Name)).To(Succeed()) + + return nil +} + +func createResourcesRoutesLast(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + for i := 1; i <= resourceCount; i++ { + ns := v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) + } + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + + time.Sleep(60 * time.Second) + + ns := v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconfig", + }, + } + Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles( + []string{ + "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/reference-grant.yaml", + "reconfig/gateway.yaml", + }, + ns.Name)).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + + return nil +} + +func createUniqueResources(resourceCount int, fileName string) error { + for i := 1; i <= resourceCount; i++ { + nsName := "namespace" + strconv.Itoa(i) + // Command to run sed and capture its output + //nolint:gosec + sedCmd := exec.Command("sed", + "-e", + "s/coffee/coffee"+nsName+"/g", + "-e", + "s/tea/tea"+nsName+"/g", + fileName, + ) + // Command to apply using kubectl + kubectlCmd := exec.Command("kubectl", "apply", "-n", nsName, "-f", "-") + + sedOutput, err := sedCmd.Output() + if err != nil { + fmt.Println(err.Error() + ": " + string(sedOutput)) + return err + } + kubectlCmd.Stdin = bytes.NewReader(sedOutput) + + output, err := kubectlCmd.CombinedOutput() + if err != nil { + fmt.Println(err.Error() + ": " + string(output)) + return err + } + } + return nil +} + +// function to confirm resources were created + +// function to delete resources From 5e12b365f7d7994149d41e1a4a28f692f97f1246 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 10 Jul 2024 11:00:05 -0700 Subject: [PATCH 02/42] Add functions to check for resources and clean them up --- tests/suite/reconfig_test.go | 63 ++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 42097cfa24..382e1e4e3a 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -10,7 +10,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" + core "k8s.io/api/core/v1" + v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -21,6 +22,10 @@ var _ = Describe("Reconfiguration Performance Testing", Label("reconfiguration") var () BeforeEach(func() { + // possibly instead of teardown, can scale to 0 replicas. + teardown(releaseName) + + setup(getDefaultSetupCfg()) }) AfterEach(func() { @@ -29,10 +34,14 @@ var _ = Describe("Reconfiguration Performance Testing", Label("reconfiguration") It("test 1", func() { Expect(createResourcesGWLast(30)).To(Succeed()) + Expect(checkResourceCreation(30)).To(Succeed()) + cleanupResources(30) }) It("test 2", func() { Expect(createResourcesRoutesLast(30)).To(Succeed()) + Expect(checkResourceCreation(30)).To(Succeed()) + cleanupResources(30) }) }) @@ -41,7 +50,7 @@ func createResourcesGWLast(resourceCount int) error { defer cancel() for i := 1; i <= resourceCount; i++ { - ns := v1.Namespace{ + ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: "namespace" + strconv.Itoa(i), }, @@ -49,7 +58,7 @@ func createResourcesGWLast(resourceCount int) error { Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) } - ns := v1.Namespace{ + ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: "reconfig", }, @@ -78,7 +87,7 @@ func createResourcesRoutesLast(resourceCount int) error { defer cancel() for i := 1; i <= resourceCount; i++ { - ns := v1.Namespace{ + ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: "namespace" + strconv.Itoa(i), }, @@ -90,7 +99,7 @@ func createResourcesRoutesLast(resourceCount int) error { time.Sleep(60 * time.Second) - ns := v1.Namespace{ + ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: "reconfig", }, @@ -140,6 +149,46 @@ func createUniqueResources(resourceCount int, fileName string) error { return nil } -// function to confirm resources were created +func checkResourceCreation(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var namespaces core.NamespaceList + if err := k8sClient.List(ctx, &namespaces); err != nil { + return fmt.Errorf("error getting namespaces: %w", err) + } + Expect(len(namespaces.Items)).To(BeNumerically(">=", resourceCount)) + + var routes v1.HTTPRouteList + if err := k8sClient.List(ctx, &routes); err != nil { + return fmt.Errorf("error getting HTTPRoutes: %w", err) + } + Expect(len(routes.Items)).To(BeNumerically(">=", resourceCount*3)) + + var pods core.PodList + if err := k8sClient.List(ctx, &pods); err != nil { + return fmt.Errorf("error getting Pods: %w", err) + } + Expect(len(pods.Items)).To(BeNumerically(">=", resourceCount*2)) -// function to delete resources + return nil +} + +func cleanupResources(resourceCount int) { + for i := 1; i <= resourceCount; i++ { + nsName := "namespace" + strconv.Itoa(i) + Expect(resourceManager.DeleteNamespace(nsName)).To(Succeed()) + } + + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconfig", + }, + } + + Expect(resourceManager.DeleteFromFiles([]string{ + "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/reference-grant.yaml", + "reconfig/gateway.yaml", + }, ns.Name)).To(Succeed()) +} From 8a04efc38706e703e76ac5efe7168f21485b94c9 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 10 Jul 2024 11:55:13 -0700 Subject: [PATCH 03/42] Add results template --- tests/suite/reconfig_test.go | 80 ++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 382e1e4e3a..627c5f510a 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -4,8 +4,11 @@ import ( "bytes" "context" "fmt" + "io" "os/exec" + "path/filepath" "strconv" + "text/template" "time" . "github.com/onsi/ginkgo/v2" @@ -16,11 +19,24 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Reconfiguration Performance Testing", Label("reconfiguration"), func() { +var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration"), func() { var () + BeforeAll(func() { + resultsDir, err := framework.CreateResultsDir("reconfig", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, framework.CreateResultsFilename("md", version, *plusEnabled)) + outFile, err := framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) + + }) + BeforeEach(func() { // possibly instead of teardown, can scale to 0 replicas. teardown(releaseName) @@ -37,12 +53,12 @@ var _ = Describe("Reconfiguration Performance Testing", Label("reconfiguration") Expect(checkResourceCreation(30)).To(Succeed()) cleanupResources(30) }) - - It("test 2", func() { - Expect(createResourcesRoutesLast(30)).To(Succeed()) - Expect(checkResourceCreation(30)).To(Succeed()) - cleanupResources(30) - }) + // + //It("test 2", func() { + // Expect(createResourcesRoutesLast(30)).To(Succeed()) + // Expect(checkResourceCreation(30)).To(Succeed()) + // cleanupResources(30) + //}) }) func createResourcesGWLast(resourceCount int) error { @@ -191,4 +207,54 @@ func cleanupResources(resourceCount int) { "reconfig/reference-grant.yaml", "reconfig/gateway.yaml", }, ns.Name)).To(Succeed()) + +} + +type reconfigTestResults struct { + Name string + EventsBuckets []bucket + ReloadBuckets []bucket + NumResources int + TimeToReadyTotal int + TimeToReadyAvgSingle int + NGINXReloads int + NGINXReloadAvgTime int + EventsCount int + EventsAvgTime int +} + +const reconfigResultTemplate = ` +## Test {{ .Name }} + +- NumResources: {{ .NumResources }} + +### Reloads and Time to Ready + +- TimeToReadyTotal: {{ .TimeToReadyTotal }} +- TimeToReadyAvgSingle: {{ .TimeToReadyAvgSingle }} +- NGINX Reloads: {{ .NGINXReloads }} +- NGINX Reload Average Time: {{ .NGINXReloadAvgTime }} +- Reload distribution: +{{- range .ReloadBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +### Event Batch Processing + +- Event Batch Total: {{ .EventsCount }} +- Event Batch Processing Average Time: {{ .EventsAvgTime }}ms +- Event Batch Processing distribution: +{{- range .EventsBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +` + +func writeReconfigResults(dest io.Writer, results reconfigTestResults) error { + tmpl, err := template.New("results").Parse(reconfigResultTemplate) + if err != nil { + return err + } + + return tmpl.Execute(dest, results) } From d233c927832512432c7f358d91c0a25b4705c865 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 12 Jul 2024 11:27:50 -0700 Subject: [PATCH 04/42] Add basic prometheus metrics --- tests/suite/reconfig_test.go | 321 +++++++++++++++++++++-------------- 1 file changed, 197 insertions(+), 124 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 627c5f510a..1c20c9213e 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -14,6 +14,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" core "k8s.io/api/core/v1" + ctlr "sigs.k8s.io/controller-runtime" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -24,7 +25,13 @@ import ( ) var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration"), func() { - var () + var ( + scrapeInterval = 15 * time.Second + promInstance framework.PrometheusInstance + promPortForwardStopCh = make(chan struct{}) + + //ngfPodName string + ) BeforeAll(func() { resultsDir, err := framework.CreateResultsDir("reconfig", version) @@ -35,180 +42,246 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(err).ToNot(HaveOccurred()) Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) + promCfg := framework.PrometheusConfig{ + ScrapeInterval: scrapeInterval, + } + + promInstance, err = framework.InstallPrometheus(resourceManager, promCfg) + Expect(err).ToNot(HaveOccurred()) + + k8sConfig := ctlr.GetConfigOrDie() + + if !clusterInfo.IsGKE { + Expect(promInstance.PortForward(k8sConfig, promPortForwardStopCh)).To(Succeed()) + } }) BeforeEach(func() { // possibly instead of teardown, can scale to 0 replicas. teardown(releaseName) - setup(getDefaultSetupCfg()) + //setup(getDefaultSetupCfg()) + + //podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + //Expect(err).ToNot(HaveOccurred()) + //Expect(podNames).To(HaveLen(1)) + //ngfPodName = podNames[0] }) AfterEach(func() { teardown(releaseName) + }) - It("test 1", func() { - Expect(createResourcesGWLast(30)).To(Succeed()) - Expect(checkResourceCreation(30)).To(Succeed()) - cleanupResources(30) + AfterAll(func() { + close(promPortForwardStopCh) + Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) }) - // - //It("test 2", func() { - // Expect(createResourcesRoutesLast(30)).To(Succeed()) - // Expect(checkResourceCreation(30)).To(Succeed()) - // cleanupResources(30) - //}) -}) -func createResourcesGWLast(resourceCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() + createUniqueResources := func(resourceCount int, fileName string) error { + for i := 1; i <= resourceCount; i++ { + nsName := "namespace" + strconv.Itoa(i) + // Command to run sed and capture its output + //nolint:gosec + sedCmd := exec.Command("sed", + "-e", + "s/coffee/coffee"+nsName+"/g", + "-e", + "s/tea/tea"+nsName+"/g", + fileName, + ) + // Command to apply using kubectl + kubectlCmd := exec.Command("kubectl", "apply", "-n", nsName, "-f", "-") + + sedOutput, err := sedCmd.Output() + if err != nil { + fmt.Println(err.Error() + ": " + string(sedOutput)) + return err + } + kubectlCmd.Stdin = bytes.NewReader(sedOutput) + + output, err := kubectlCmd.CombinedOutput() + if err != nil { + fmt.Println(err.Error() + ": " + string(output)) + return err + } + } + return nil + } + + createResourcesGWLast := func(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + for i := 1; i <= resourceCount; i++ { + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) + } - for i := 1; i <= resourceCount; i++ { ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "namespace" + strconv.Itoa(i), + Name: "reconfig", }, } - Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) - } + Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles( + []string{ + "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/reference-grant.yaml", + }, + ns.Name)).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + + time.Sleep(60 * time.Second) + + Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, ns.Name)).To(Succeed()) - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, + return nil } - Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) - Expect(resourceManager.ApplyFromFiles( - []string{ - "reconfig/certificate-ns-and-cafe-secret.yaml", - "reconfig/reference-grant.yaml", - }, - ns.Name)).To(Succeed()) - Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + //createResourcesRoutesLast := func(resourceCount int) error { + // ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + // defer cancel() + // + // for i := 1; i <= resourceCount; i++ { + // ns := core.Namespace{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: "namespace" + strconv.Itoa(i), + // }, + // } + // Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) + // } + // + // Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + // + // time.Sleep(60 * time.Second) + // + // ns := core.Namespace{ + // ObjectMeta: metav1.ObjectMeta{ + // Name: "reconfig", + // }, + // } + // Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + // Expect(resourceManager.ApplyFromFiles( + // []string{ + // "reconfig/certificate-ns-and-cafe-secret.yaml", + // "reconfig/reference-grant.yaml", + // "reconfig/gateway.yaml", + // }, + // ns.Name)).To(Succeed()) + // + // Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + // + // return nil + //} + + checkResourceCreation := func(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() - Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + var namespaces core.NamespaceList + if err := k8sClient.List(ctx, &namespaces); err != nil { + return fmt.Errorf("error getting namespaces: %w", err) + } + Expect(len(namespaces.Items)).To(BeNumerically(">=", resourceCount)) - time.Sleep(60 * time.Second) + var routes v1.HTTPRouteList + if err := k8sClient.List(ctx, &routes); err != nil { + return fmt.Errorf("error getting HTTPRoutes: %w", err) + } + Expect(len(routes.Items)).To(BeNumerically(">=", resourceCount*3)) - Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, ns.Name)).To(Succeed()) + var pods core.PodList + if err := k8sClient.List(ctx, &pods); err != nil { + return fmt.Errorf("error getting Pods: %w", err) + } + Expect(len(pods.Items)).To(BeNumerically(">=", resourceCount*2)) - return nil -} + return nil + } -func createResourcesRoutesLast(resourceCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() + cleanupResources := func(resourceCount int) { + for i := 1; i <= resourceCount; i++ { + nsName := "namespace" + strconv.Itoa(i) + Expect(resourceManager.DeleteNamespace(nsName)).To(Succeed()) + } - for i := 1; i <= resourceCount; i++ { ns := core.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: "namespace" + strconv.Itoa(i), + Name: "reconfig", }, } - Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) - } - - Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) - time.Sleep(60 * time.Second) - - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, - } - Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) - Expect(resourceManager.ApplyFromFiles( - []string{ + Expect(resourceManager.DeleteFromFiles([]string{ "reconfig/certificate-ns-and-cafe-secret.yaml", "reconfig/reference-grant.yaml", "reconfig/gateway.yaml", - }, - ns.Name)).To(Succeed()) + }, ns.Name)).To(Succeed()) + } - Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + getFirstValueOfVector := func(query string) float64 { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) - return nil -} + val, err := framework.GetFirstValueOfPrometheusVector(result) + Expect(err).ToNot(HaveOccurred()) -func createUniqueResources(resourceCount int, fileName string) error { - for i := 1; i <= resourceCount; i++ { - nsName := "namespace" + strconv.Itoa(i) - // Command to run sed and capture its output - //nolint:gosec - sedCmd := exec.Command("sed", - "-e", - "s/coffee/coffee"+nsName+"/g", - "-e", - "s/tea/tea"+nsName+"/g", - fileName, - ) - // Command to apply using kubectl - kubectlCmd := exec.Command("kubectl", "apply", "-n", nsName, "-f", "-") + return val + } - sedOutput, err := sedCmd.Output() - if err != nil { - fmt.Println(err.Error() + ": " + string(sedOutput)) - return err - } - kubectlCmd.Stdin = bytes.NewReader(sedOutput) + runTestWithMetrics := func(resourceCount int, test func(resourceCount int) error, startWithNGFSetup bool) { + if startWithNGFSetup { + setup(getDefaultSetupCfg()) - output, err := kubectlCmd.CombinedOutput() - if err != nil { - fmt.Println(err.Error() + ": " + string(output)) - return err + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + } else { + output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) + Expect(err).ToNot(HaveOccurred(), string(output)) } - } - return nil -} -func checkResourceCreation(resourceCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() + Expect(test(resourceCount)).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - var namespaces core.NamespaceList - if err := k8sClient.List(ctx, &namespaces); err != nil { - return fmt.Errorf("error getting namespaces: %w", err) - } - Expect(len(namespaces.Items)).To(BeNumerically(">=", resourceCount)) + if !startWithNGFSetup { + setup(getDefaultSetupCfg()) - var routes v1.HTTPRouteList - if err := k8sClient.List(ctx, &routes); err != nil { - return fmt.Errorf("error getting HTTPRoutes: %w", err) - } - Expect(len(routes.Items)).To(BeNumerically(">=", resourceCount*3)) + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + } - var pods core.PodList - if err := k8sClient.List(ctx, &pods); err != nil { - return fmt.Errorf("error getting Pods: %w", err) - } - Expect(len(pods.Items)).To(BeNumerically(">=", resourceCount*2)) + // lowest scrape interval on prometheus is 10 seconds, so we sleep here to make sure we get at least a single + // scrape + time.Sleep(2 * scrapeInterval) - return nil -} - -func cleanupResources(resourceCount int) { - for i := 1; i <= resourceCount; i++ { - nsName := "namespace" + strconv.Itoa(i) - Expect(resourceManager.DeleteNamespace(nsName)).To(Succeed()) - } + reloadCount := getFirstValueOfVector( + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_milliseconds_count`), + ) - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, + fmt.Println(reloadCount) + cleanupResources(30) } - Expect(resourceManager.DeleteFromFiles([]string{ - "reconfig/certificate-ns-and-cafe-secret.yaml", - "reconfig/reference-grant.yaml", - "reconfig/gateway.yaml", - }, ns.Name)).To(Succeed()) + It("test 1", func() { + //Skip("no") + runTestWithMetrics(30, createResourcesGWLast, true) + }) -} + //It("test 2", func() { + // Expect(createResourcesRoutesLast(30)).To(Succeed()) + // Expect(checkResourceCreation(30)).To(Succeed()) + // cleanupResources(30) + //}) +}) type reconfigTestResults struct { Name string From d2ad309df4e6be66979f62a077f6b017468028ed Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 12 Jul 2024 15:39:13 -0700 Subject: [PATCH 05/42] Add queries --- tests/framework/queries.go | 211 +++++++++++++++++++++++++++++++++++ tests/suite/reconfig_test.go | 79 ++++++++++--- 2 files changed, 277 insertions(+), 13 deletions(-) create mode 100644 tests/framework/queries.go diff --git a/tests/framework/queries.go b/tests/framework/queries.go new file mode 100644 index 0000000000..d57635da08 --- /dev/null +++ b/tests/framework/queries.go @@ -0,0 +1,211 @@ +package framework + +import ( + "errors" + "fmt" + "time" + + . "github.com/onsi/gomega" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" +) + +func getFirstValueOfVector(query string, promInstance PrometheusInstance) float64 { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + val, err := GetFirstValueOfPrometheusVector(result) + Expect(err).ToNot(HaveOccurred()) + + return val +} + +func getBuckets(query string, promInstance PrometheusInstance) []bucket { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + res, ok := result.(model.Vector) + Expect(ok).To(BeTrue()) + + buckets := make([]bucket, 0, len(res)) + + for _, sample := range res { + le := sample.Metric["le"] + val := float64(sample.Value) + bucket := bucket{ + Le: string(le), + Val: int(val), + } + buckets = append(buckets, bucket) + } + + return buckets +} + +func GetReloadCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) + + //return getFirstValueOfVector( + // fmt.Sprintf( + // `nginx_gateway_fabric_nginx_reloads_milliseconds_count{pod="%[1]s"}`+ + // ` - `+ + // `nginx_gateway_fabric_nginx_reloads_milliseconds_count{pod="%[1]s"} @ %d`, + // ngfPodName, + // startTime.Unix(), + // ), + // promInstance, + //) +} + +func getReloadErrsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func getReloadAvgTime(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func getReloadBuckets(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) []bucket { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func getEventsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func getEventsAvtTime(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func getEventsBuckets(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) []bucket { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func CreateMetricExistChecker(promInstance PrometheusInstance, query string, getTime func() time.Time, modifyTime func()) func() error { + return func() error { + queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) + + result, err := promInstance.Query(queryWithTimestamp) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + modifyTime() + return errors.New("empty result") + } + + return nil + } +} + +func CreateEndTimeFinder(promInstance PrometheusInstance, query string, startTime time.Time, t *time.Time, queryRangeStep time.Duration) func() error { + return func() error { + result, err := promInstance.QueryRange(query, v1.Range{ + Start: startTime, + End: *t, + Step: queryRangeStep, + }) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + *t = time.Now() + return errors.New("empty result") + } + + return nil + } +} + +func createResponseChecker(url, address string, requestTimeout time.Duration) func() error { + return func() error { + status, _, err := Get(url, address, requestTimeout) + if err != nil { + return fmt.Errorf("bad response: %w", err) + } + + if status != 200 { + return fmt.Errorf("unexpected status code: %d", status) + } + + return nil + } +} + +type bucket struct { + Le string + Val int +} diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 1c20c9213e..6f1c457531 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -27,6 +27,7 @@ import ( var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration"), func() { var ( scrapeInterval = 15 * time.Second + queryRangeStep = 15 * time.Second promInstance framework.PrometheusInstance promPortForwardStopCh = make(chan struct{}) @@ -226,28 +227,50 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }, ns.Name)).To(Succeed()) } - getFirstValueOfVector := func(query string) float64 { - result, err := promInstance.Query(query) - Expect(err).ToNot(HaveOccurred()) + runTestWithMetrics := func(resourceCount int, test func(resourceCount int) error, startWithNGFSetup bool) { - val, err := framework.GetFirstValueOfPrometheusVector(result) - Expect(err).ToNot(HaveOccurred()) + var ( + metricExistTimeout = 2 * time.Minute + metricExistPolling = 1 * time.Second + ngfPodName string + ) - return val - } + startTime := time.Now() + + getStartTime := func() time.Time { return startTime } + modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } - runTestWithMetrics := func(resourceCount int, test func(resourceCount int) error, startWithNGFSetup bool) { if startWithNGFSetup { setup(getDefaultSetupCfg()) podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).To(HaveLen(1)) + ngfPodName = podNames[0] + startTime = time.Now() } else { output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) Expect(err).ToNot(HaveOccurred(), string(output)) } + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + framework.CreateMetricExistChecker( + promInstance, + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + Expect(test(resourceCount)).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) @@ -257,15 +280,45 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).To(HaveLen(1)) + ngfPodName = podNames[0] + startTime = time.Now() } - // lowest scrape interval on prometheus is 10 seconds, so we sleep here to make sure we get at least a single - // scrape time.Sleep(2 * scrapeInterval) - reloadCount := getFirstValueOfVector( - fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_milliseconds_count`), - ) + endTime := time.Now() + + Eventually( + framework.CreateEndTimeFinder( + promInstance, + fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), + startTime, + &endTime, + queryRangeStep, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + + getEndTime := func() time.Time { return endTime } + noOpModifier := func() {} + + queries = []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + framework.CreateMetricExistChecker( + promInstance, + q, + getEndTime, + noOpModifier, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + + reloadCount := framework.GetReloadCount(promInstance, ngfPodName, startTime) fmt.Println(reloadCount) cleanupResources(30) From 9358660fea9e790f43b2951277f65ba7a05a6ce8 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 17 Jul 2024 09:20:45 -0700 Subject: [PATCH 06/42] Add queries and results --- tests/framework/queries.go | 165 ++++++++++++-------------- tests/suite/reconfig_test.go | 219 ++++++++++++++++++++++------------- 2 files changed, 212 insertions(+), 172 deletions(-) diff --git a/tests/framework/queries.go b/tests/framework/queries.go index d57635da08..d45786cd7a 100644 --- a/tests/framework/queries.go +++ b/tests/framework/queries.go @@ -5,154 +5,135 @@ import ( "fmt" "time" - . "github.com/onsi/gomega" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" ) -func getFirstValueOfVector(query string, promInstance PrometheusInstance) float64 { +// TODO: having gomega and regular queries here is bad + +func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { result, err := promInstance.Query(query) - Expect(err).ToNot(HaveOccurred()) + if err != nil { + return 0, err + } val, err := GetFirstValueOfPrometheusVector(result) - Expect(err).ToNot(HaveOccurred()) + if err != nil { + return 0, err + } - return val + return val, nil } -func getBuckets(query string, promInstance PrometheusInstance) []bucket { +func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) { result, err := promInstance.Query(query) - Expect(err).ToNot(HaveOccurred()) + if err != nil { + return nil, err + } res, ok := result.(model.Vector) - Expect(ok).To(BeTrue()) + if !ok { + return nil, errors.New("could not convert result to vector") + } - buckets := make([]bucket, 0, len(res)) + buckets := make([]Bucket, 0, len(res)) for _, sample := range res { le := sample.Metric["le"] val := float64(sample.Value) - bucket := bucket{ + bucket := Bucket{ Le: string(le), Val: int(val), } buckets = append(buckets, bucket) } - return buckets -} - -func GetReloadCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) - - //return getFirstValueOfVector( - // fmt.Sprintf( - // `nginx_gateway_fabric_nginx_reloads_milliseconds_count{pod="%[1]s"}`+ - // ` - `+ - // `nginx_gateway_fabric_nginx_reloads_milliseconds_count{pod="%[1]s"} @ %d`, - // ngfPodName, - // startTime.Unix(), - // ), - // promInstance, - //) + return buckets, nil } -func getReloadErrsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { +func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func getReloadAvgTime(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { +//func getReloadErrsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { +// return getFirstValueOfVector( +// fmt.Sprintf( +// `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ +// ` - `+ +// `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, +// ngfPodName, +// startTime.Unix(), +// ), +// promInstance, +// ) +//} + +func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( - `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ ` / `+ - `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func getReloadBuckets(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) []bucket { +func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { return getBuckets( fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func getEventsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { +func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func getEventsAvtTime(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { +func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( - `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ ` / `+ - `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func getEventsBuckets(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) []bucket { +func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { return getBuckets( fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`, ngfPodName, - startTime.Unix(), ), promInstance, ) } -func CreateMetricExistChecker(promInstance PrometheusInstance, query string, getTime func() time.Time, modifyTime func()) func() error { +func CreateMetricExistChecker( + promInstance PrometheusInstance, + query string, + getTime func() time.Time, + modifyTime func(), +) func() error { return func() error { queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) @@ -170,7 +151,13 @@ func CreateMetricExistChecker(promInstance PrometheusInstance, query string, get } } -func CreateEndTimeFinder(promInstance PrometheusInstance, query string, startTime time.Time, t *time.Time, queryRangeStep time.Duration) func() error { +func CreateEndTimeFinder( + promInstance PrometheusInstance, + query string, + startTime time.Time, + t *time.Time, + queryRangeStep time.Duration, +) func() error { return func() error { result, err := promInstance.QueryRange(query, v1.Range{ Start: startTime, @@ -190,22 +177,22 @@ func CreateEndTimeFinder(promInstance PrometheusInstance, query string, startTim } } -func createResponseChecker(url, address string, requestTimeout time.Duration) func() error { - return func() error { - status, _, err := Get(url, address, requestTimeout) - if err != nil { - return fmt.Errorf("bad response: %w", err) - } - - if status != 200 { - return fmt.Errorf("unexpected status code: %d", status) - } - - return nil - } -} - -type bucket struct { +//func createResponseChecker(url, address string, requestTimeout time.Duration) func() error { +// return func() error { +// status, _, err := Get(url, address, requestTimeout) +// if err != nil { +// return fmt.Errorf("bad response: %w", err) +// } +// +// if status != 200 { +// return fmt.Errorf("unexpected status code: %d", status) +// } +// +// return nil +// } +//} + +type Bucket struct { Le string Val int } diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 6f1c457531..d029e78910 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "io" + "os" "os/exec" "path/filepath" "strconv" @@ -27,11 +28,11 @@ import ( var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration"), func() { var ( scrapeInterval = 15 * time.Second - queryRangeStep = 15 * time.Second + queryRangeStep = 5 * time.Second promInstance framework.PrometheusInstance promPortForwardStopCh = make(chan struct{}) - //ngfPodName string + outFile *os.File ) BeforeAll(func() { @@ -39,7 +40,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(err).ToNot(HaveOccurred()) filename := filepath.Join(resultsDir, framework.CreateResultsFilename("md", version, *plusEnabled)) - outFile, err := framework.CreateResultsFile(filename) + outFile, err = framework.CreateResultsFile(filename) Expect(err).ToNot(HaveOccurred()) Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) @@ -58,25 +59,15 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) BeforeEach(func() { - // possibly instead of teardown, can scale to 0 replicas. teardown(releaseName) - - //setup(getDefaultSetupCfg()) - - //podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - //Expect(err).ToNot(HaveOccurred()) - //Expect(podNames).To(HaveLen(1)) - //ngfPodName = podNames[0] - }) - - AfterEach(func() { - teardown(releaseName) - }) AfterAll(func() { + teardown(releaseName) close(promPortForwardStopCh) Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) + + // might want to call cleanupResources here with 150 or the max resources. }) createUniqueResources := func(resourceCount int, fileName string) error { @@ -147,41 +138,41 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig return nil } - //createResourcesRoutesLast := func(resourceCount int) error { - // ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - // defer cancel() - // - // for i := 1; i <= resourceCount; i++ { - // ns := core.Namespace{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: "namespace" + strconv.Itoa(i), - // }, - // } - // Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) - // } - // - // Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) - // - // time.Sleep(60 * time.Second) - // - // ns := core.Namespace{ - // ObjectMeta: metav1.ObjectMeta{ - // Name: "reconfig", - // }, - // } - // Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) - // Expect(resourceManager.ApplyFromFiles( - // []string{ - // "reconfig/certificate-ns-and-cafe-secret.yaml", - // "reconfig/reference-grant.yaml", - // "reconfig/gateway.yaml", - // }, - // ns.Name)).To(Succeed()) - // - // Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) - // - // return nil - //} + createResourcesRoutesLast := func(resourceCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + for i := 1; i <= resourceCount; i++ { + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) + } + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) + + time.Sleep(60 * time.Second) + + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconfig", + }, + } + Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles( + []string{ + "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/reference-grant.yaml", + "reconfig/gateway.yaml", + }, + ns.Name)).To(Succeed()) + + Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) + + return nil + } checkResourceCreation := func(resourceCount int) error { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) @@ -227,16 +218,19 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }, ns.Name)).To(Succeed()) } - runTestWithMetrics := func(resourceCount int, test func(resourceCount int) error, startWithNGFSetup bool) { - + runTestWithMetrics := func( + testName string, + resourceCount int, + test func(resourceCount int) error, + startWithNGFSetup bool, + ) { var ( metricExistTimeout = 2 * time.Minute metricExistPolling = 1 * time.Second ngfPodName string + startTime time.Time ) - startTime := time.Now() - getStartTime := func() time.Time { return startTime } modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } @@ -248,29 +242,29 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(podNames).To(HaveLen(1)) ngfPodName = podNames[0] startTime = time.Now() + + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + framework.CreateMetricExistChecker( + promInstance, + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } } else { output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) Expect(err).ToNot(HaveOccurred(), string(output)) } - queries := []string{ - fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), - fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), - // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time - fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), - } - - for _, q := range queries { - Eventually( - framework.CreateMetricExistChecker( - promInstance, - q, - getStartTime, - modifyStartTime, - ), - ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) - } - Expect(test(resourceCount)).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) @@ -282,6 +276,25 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(podNames).To(HaveLen(1)) ngfPodName = podNames[0] startTime = time.Now() + + // if i do a new instance of NGF each time, I might not need start time and can just do the endtime. + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + framework.CreateMetricExistChecker( + promInstance, + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } } time.Sleep(2 * scrapeInterval) @@ -301,7 +314,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig getEndTime := func() time.Time { return endTime } noOpModifier := func() {} - queries = []string{ + queries := []string{ fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), @@ -318,15 +331,57 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) } - reloadCount := framework.GetReloadCount(promInstance, ngfPodName, startTime) - + reloadCount, err := framework.GetReloadCount(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) fmt.Println(reloadCount) + + reloadAvgTime, err := framework.GetReloadAvgTime(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(reloadAvgTime) + + reloadBuckets, err := framework.GetReloadBuckets(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(reloadBuckets) + + eventsCount, err := framework.GetEventsCount(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(eventsCount) + + eventsAvgTime, err := framework.GetEventsAvgTime(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(eventsAvgTime) + + eventsBuckets, err := framework.GetEventsBuckets(promInstance, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(eventsBuckets) + + results := reconfigTestResults{ + Name: testName, + EventsBuckets: eventsBuckets, + ReloadBuckets: reloadBuckets, + NumResources: resourceCount, + NGINXReloads: int(reloadCount), + NGINXReloadAvgTime: int(reloadAvgTime), + EventsCount: int(eventsCount), + EventsAvgTime: int(eventsAvgTime), + } + + err = writeReconfigResults(outFile, results) + Expect(err).ToNot(HaveOccurred()) + cleanupResources(30) } It("test 1", func() { - //Skip("no") - runTestWithMetrics(30, createResourcesGWLast, true) + runTestWithMetrics("1", 30, createResourcesGWLast, false) + }) + + It("test 2", func() { + runTestWithMetrics("2", 30, createResourcesRoutesLast, true) + }) + + It("test 3", func() { + runTestWithMetrics("3", 30, createResourcesGWLast, true) }) //It("test 2", func() { @@ -338,8 +393,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig type reconfigTestResults struct { Name string - EventsBuckets []bucket - ReloadBuckets []bucket + EventsBuckets []framework.Bucket + ReloadBuckets []framework.Bucket NumResources int TimeToReadyTotal int TimeToReadyAvgSingle int @@ -350,9 +405,7 @@ type reconfigTestResults struct { } const reconfigResultTemplate = ` -## Test {{ .Name }} - -- NumResources: {{ .NumResources }} +## Test {{ .Name }} NumResources {{ .NumResources }} ### Reloads and Time to Ready From 4da88d50dabb8af65d8864ffa9669e9195fe3cb1 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jul 2024 16:00:12 -0700 Subject: [PATCH 07/42] Add time to ready total and average --- tests/suite/reconfig_test.go | 180 ++++++++++++++++++++++++++++++----- 1 file changed, 158 insertions(+), 22 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index d029e78910..3d862644da 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -9,6 +9,7 @@ import ( "os/exec" "path/filepath" "strconv" + "strings" "text/template" "time" @@ -25,7 +26,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration"), func() { +var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration", "nfr"), func() { var ( scrapeInterval = 15 * time.Second queryRangeStep = 5 * time.Second @@ -66,8 +67,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig teardown(releaseName) close(promPortForwardStopCh) Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) - - // might want to call cleanupResources here with 150 or the max resources. }) createUniqueResources := func(resourceCount int, fileName string) error { @@ -218,11 +217,115 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }, ns.Name)).To(Succeed()) } + getTimeStampFromLogLine := func(logLine string) string { + var timeStamp string + + timeStamp = strings.Split(logLine, "\"ts\":\"")[1] + // sometimes the log message will contain information on a "logger" followed by the "msg" + // while other times the "logger" will be omitted + timeStamp = strings.Split(timeStamp, "\",\"msg\"")[0] + timeStamp = strings.Split(timeStamp, "\",\"logger\"")[0] + + return timeStamp + } + + calculateTimeDifferenceBetweenLogLines := func(firstLine, secondLine string) (int, error) { + firstTS := getTimeStampFromLogLine(firstLine) + secondTS := getTimeStampFromLogLine(secondLine) + + // i might be able to just use the local constant timestamp layout + layout := "2006-01-02T15:04:05Z" + + parsedTS1, err := time.Parse(layout, firstTS) + if err != nil { + return 0, err + } + + parsedTS2, err := time.Parse(layout, secondTS) + if err != nil { + return 0, err + } + + return int(parsedTS2.Sub(parsedTS1).Seconds()), nil + } + + calculateTimeToReadyAverage := func(ngfLogs string) (string, error) { + var reconcilingLine, nginxReloadLine string + const maxCount = 5 + + var times [maxCount]int + var count int + + for _, line := range strings.Split(ngfLogs, "\n") { + // can't just do this line, need to do gateway specific resources + if reconcilingLine == "" && + strings.Contains(line, "Reconciling the resource\",\"controller\"") && + strings.Contains(line, "\"controllerGroup\":\"gateway.networking.k8s.io\"") { + reconcilingLine = line + } + + if strings.Contains(line, "NGINX configuration was successfully updated") && reconcilingLine != "" { + nginxReloadLine = line + + timeDifference, err := calculateTimeDifferenceBetweenLogLines(reconcilingLine, nginxReloadLine) + if err != nil { + return "", err + } + reconcilingLine = "" + + times[count] = timeDifference + count++ + if count == maxCount-1 { + break + } + } + } + + var sum float64 + for _, time := range times { + sum += float64(time) + } + + avgTime := sum / float64(count+1) + + if avgTime < 1 { + return "< 1", nil + } + + return strconv.FormatFloat(avgTime, 'f', -1, 64), nil + } + + calculateTimeToReadyTotal := func(ngfLogs, startingLogSubstring string) (string, error) { + var firstLine, lastLine string + for _, line := range strings.Split(ngfLogs, "\n") { + if firstLine == "" && strings.Contains(line, startingLogSubstring) { + firstLine = line + } + + if strings.Contains(line, "NGINX configuration was successfully updated") { + lastLine = line + } + } + + timeToReadyTotal, err := calculateTimeDifferenceBetweenLogLines(firstLine, lastLine) + if err != nil { + return "", err + } + + stringTimeToReadyTotal := strconv.Itoa(timeToReadyTotal) + if stringTimeToReadyTotal == "0" { + stringTimeToReadyTotal = "< 1" + } + + return stringTimeToReadyTotal, nil + } + runTestWithMetrics := func( testName string, resourceCount int, test func(resourceCount int) error, startWithNGFSetup bool, + timeToReadyStartingLogSubstring string, ) { var ( metricExistTimeout = 2 * time.Minute @@ -331,6 +434,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) } + checkContainerLogsForErrors(ngfPodName, false) + reloadCount, err := framework.GetReloadCount(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) fmt.Println(reloadCount) @@ -355,15 +460,31 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(err).ToNot(HaveOccurred()) fmt.Println(eventsBuckets) + logs, err := resourceManager.GetPodLogs(ngfNamespace, ngfPodName, &core.PodLogOptions{ + Container: "nginx-gateway", + }) + Expect(err).ToNot(HaveOccurred()) + + timeToReadyTotal, err := calculateTimeToReadyTotal(logs, timeToReadyStartingLogSubstring) + Expect(err).ToNot(HaveOccurred()) + + fmt.Println(timeToReadyTotal) + + timeToReadyAvgSingle, err := calculateTimeToReadyAverage(logs) + Expect(err).ToNot(HaveOccurred()) + fmt.Println(timeToReadyAvgSingle) + results := reconfigTestResults{ - Name: testName, - EventsBuckets: eventsBuckets, - ReloadBuckets: reloadBuckets, - NumResources: resourceCount, - NGINXReloads: int(reloadCount), - NGINXReloadAvgTime: int(reloadAvgTime), - EventsCount: int(eventsCount), - EventsAvgTime: int(eventsAvgTime), + Name: testName, + EventsBuckets: eventsBuckets, + ReloadBuckets: reloadBuckets, + NumResources: resourceCount, + TimeToReadyTotal: timeToReadyTotal, + TimeToReadyAvgSingle: timeToReadyAvgSingle, + NGINXReloads: int(reloadCount), + NGINXReloadAvgTime: int(reloadAvgTime), + EventsCount: int(eventsCount), + EventsAvgTime: int(eventsAvgTime), } err = writeReconfigResults(outFile, results) @@ -373,31 +494,46 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } It("test 1", func() { - runTestWithMetrics("1", 30, createResourcesGWLast, false) + timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + + runTestWithMetrics("1", + 30, + createResourcesGWLast, + false, + timeToReadyStartingLogSubstring, + ) }) It("test 2", func() { - runTestWithMetrics("2", 30, createResourcesRoutesLast, true) + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + + runTestWithMetrics("2", + 30, + createResourcesRoutesLast, + true, + timeToReadyStartingLogSubstring, + ) }) It("test 3", func() { - runTestWithMetrics("3", 30, createResourcesGWLast, true) - }) + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - //It("test 2", func() { - // Expect(createResourcesRoutesLast(30)).To(Succeed()) - // Expect(checkResourceCreation(30)).To(Succeed()) - // cleanupResources(30) - //}) + runTestWithMetrics("3", + 30, + createResourcesGWLast, + true, + timeToReadyStartingLogSubstring, + ) + }) }) type reconfigTestResults struct { Name string + TimeToReadyTotal string + TimeToReadyAvgSingle string EventsBuckets []framework.Bucket ReloadBuckets []framework.Bucket NumResources int - TimeToReadyTotal int - TimeToReadyAvgSingle int NGINXReloads int NGINXReloadAvgTime int EventsCount int From 946a547fee8dfdfb1163a6de334939846f01e783 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jul 2024 16:04:46 -0700 Subject: [PATCH 08/42] Add logic documentation for time to ready average --- tests/suite/reconfig_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 3d862644da..4b10392159 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -256,6 +256,9 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig var times [maxCount]int var count int + // parse the logs until it reaches a reconciling log line for a gateway resource, then it compares that + // timestamp to the next NGINX configuration update. When it reaches the NGINX configuration update line, + // it will reset the reconciling log line and set it to the next reconciling log line. for _, line := range strings.Split(ngfLogs, "\n") { // can't just do this line, need to do gateway specific resources if reconcilingLine == "" && From 4c7aad137fbbb4ac5a5974b7cea856e3c38b74ed Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Jul 2024 10:57:30 -0700 Subject: [PATCH 09/42] Add test run on gke --- tests/results/reconfig/v1.3.0/1.3.0-oss.md | 101 +++++++++++++++++++++ tests/suite/reconfig_test.go | 39 +++++++- 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 tests/results/reconfig/v1.3.0/1.3.0-oss.md diff --git a/tests/results/reconfig/v1.3.0/1.3.0-oss.md b/tests/results/reconfig/v1.3.0/1.3.0-oss.md new file mode 100644 index 0000000000..120fe199e7 --- /dev/null +++ b/tests/results/reconfig/v1.3.0/1.3.0-oss.md @@ -0,0 +1,101 @@ +# Results + +## Test environment + +NGINX Plus: false + +GKE Cluster: + +- Node count: 3 +- k8s version: v1.29.6-gke.1038001 +- vCPUs per node: 2 +- RAM per node: 4019168Ki +- Max pods per node: 110 +- Zone: us-central1-c +- Instance Type: e2-medium + +## Test 1 NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 1 +- TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 2 +- NGINX Reload Average Time: 97 +- Reload distribution: + - 500ms: 2 + - 1000ms: 2 + - 5000ms: 2 + - 10000ms: 2 + - 30000ms: 2 + - +Infms: 2 + +### Event Batch Processing + +- Event Batch Total: 6 +- Event Batch Processing Average Time: 70ms +- Event Batch Processing distribution: + - 500ms: 6 + - 1000ms: 6 + - 5000ms: 6 + - 10000ms: 6 + - 30000ms: 6 + - +Infms: 6 + + +## Test 2 NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 10 +- TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 63 +- NGINX Reload Average Time: 129 +- Reload distribution: + - 500ms: 63 + - 1000ms: 63 + - 5000ms: 63 + - 10000ms: 63 + - 30000ms: 63 + - +Infms: 63 + +### Event Batch Processing + +- Event Batch Total: 339 +- Event Batch Processing Average Time: 29ms +- Event Batch Processing distribution: + - 500ms: 339 + - 1000ms: 339 + - 5000ms: 339 + - 10000ms: 339 + - 30000ms: 339 + - +Infms: 339 + + +## Test 3 NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: < 1 +- TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 66 +- NGINX Reload Average Time: 131 +- Reload distribution: + - 500ms: 66 + - 1000ms: 66 + - 5000ms: 66 + - 10000ms: 66 + - 30000ms: 66 + - +Infms: 66 + +### Event Batch Processing + +- Event Batch Total: 338 +- Event Batch Processing Average Time: 26ms +- Event Batch Processing distribution: + - 500ms: 338 + - 1000ms: 338 + - 5000ms: 338 + - 10000ms: 338 + - 30000ms: 338 + - +Infms: 338 diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 4b10392159..a223a70cf2 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -496,7 +496,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig cleanupResources(30) } - It("test 1", func() { + It("test 1 - 30 resources", func() { timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" runTestWithMetrics("1", @@ -507,7 +507,18 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ) }) - It("test 2", func() { + //It("test 1 - 150 resources", func() { + // timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + // + // runTestWithMetrics("1", + // 150, + // createResourcesGWLast, + // false, + // timeToReadyStartingLogSubstring, + // ) + //}) + + It("test 2 - 30 resources", func() { timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" runTestWithMetrics("2", @@ -518,7 +529,18 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ) }) - It("test 3", func() { + //It("test 2 - 150 resources", func() { + // timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + // + // runTestWithMetrics("2", + // 150, + // createResourcesRoutesLast, + // true, + // timeToReadyStartingLogSubstring, + // ) + //}) + + It("test 3 - 30 resources", func() { timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" runTestWithMetrics("3", @@ -528,6 +550,17 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig timeToReadyStartingLogSubstring, ) }) + + //It("test 3 - 150 resources", func() { + // timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + // + // runTestWithMetrics("3", + // 150, + // createResourcesGWLast, + // true, + // timeToReadyStartingLogSubstring, + // ) + //}) }) type reconfigTestResults struct { From 6f94138e57c182da4f52fcc964b2bb523d691777 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Jul 2024 14:58:59 -0700 Subject: [PATCH 10/42] Add test description --- tests/suite/reconfig_test.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index a223a70cf2..06009a46cf 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -340,6 +340,9 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig getStartTime := func() time.Time { return startTime } modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } + output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) + Expect(err).ToNot(HaveOccurred(), string(output)) + if startWithNGFSetup { setup(getDefaultSetupCfg()) @@ -366,9 +369,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ), ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) } - } else { - output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) - Expect(err).ToNot(HaveOccurred(), string(output)) } Expect(test(resourceCount)).To(Succeed()) @@ -493,9 +493,10 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig err = writeReconfigResults(outFile, results) Expect(err).ToNot(HaveOccurred()) - cleanupResources(30) + cleanupResources(resourceCount) } + // Test 1 - Resources exist before start-up It("test 1 - 30 resources", func() { timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" @@ -518,6 +519,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig // ) //}) + // Test 2 - Start NGF, deploy Gateway, create many resources attached to GW It("test 2 - 30 resources", func() { timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" @@ -540,6 +542,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig // ) //}) + // Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway It("test 3 - 30 resources", func() { timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" From 25e5502bad4aefc3da9057ae64254f571a5526d3 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Jul 2024 15:30:12 -0700 Subject: [PATCH 11/42] Remove completed TODO --- tests/framework/queries.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/framework/queries.go b/tests/framework/queries.go index d45786cd7a..7577d9ba5e 100644 --- a/tests/framework/queries.go +++ b/tests/framework/queries.go @@ -9,8 +9,6 @@ import ( "github.com/prometheus/common/model" ) -// TODO: having gomega and regular queries here is bad - func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { result, err := promInstance.Query(query) if err != nil { From 898b94c10bcdbd96adeb4ad3576d17c75565ab5e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Aug 2024 11:38:39 -0700 Subject: [PATCH 12/42] Sort imports --- tests/suite/reconfig_test.go | 66 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 06009a46cf..5328305a25 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -16,12 +16,10 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctlr "sigs.k8s.io/controller-runtime" - v1 "sigs.k8s.io/gateway-api/apis/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) @@ -508,16 +506,16 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ) }) - //It("test 1 - 150 resources", func() { - // timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" - // - // runTestWithMetrics("1", - // 150, - // createResourcesGWLast, - // false, - // timeToReadyStartingLogSubstring, - // ) - //}) + It("test 1 - 150 resources", func() { + timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + + runTestWithMetrics("1", + 150, + createResourcesGWLast, + false, + timeToReadyStartingLogSubstring, + ) + }) // Test 2 - Start NGF, deploy Gateway, create many resources attached to GW It("test 2 - 30 resources", func() { @@ -531,16 +529,16 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ) }) - //It("test 2 - 150 resources", func() { - // timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" - // - // runTestWithMetrics("2", - // 150, - // createResourcesRoutesLast, - // true, - // timeToReadyStartingLogSubstring, - // ) - //}) + It("test 2 - 150 resources", func() { + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + + runTestWithMetrics("2", + 150, + createResourcesRoutesLast, + true, + timeToReadyStartingLogSubstring, + ) + }) // Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway It("test 3 - 30 resources", func() { @@ -554,16 +552,16 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ) }) - //It("test 3 - 150 resources", func() { - // timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - // - // runTestWithMetrics("3", - // 150, - // createResourcesGWLast, - // true, - // timeToReadyStartingLogSubstring, - // ) - //}) + It("test 3 - 150 resources", func() { + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + + runTestWithMetrics("3", + 150, + createResourcesGWLast, + true, + timeToReadyStartingLogSubstring, + ) + }) }) type reconfigTestResults struct { From b99121f61af3d0b88b4aace13772e8727ece7aaf Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Aug 2024 11:50:01 -0700 Subject: [PATCH 13/42] Change timeouts for creating resources --- tests/suite/reconfig_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 5328305a25..29fd457ed7 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -99,7 +99,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } createResourcesGWLast := func(resourceCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) defer cancel() for i := 1; i <= resourceCount; i++ { @@ -136,7 +136,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } createResourcesRoutesLast := func(resourceCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) defer cancel() for i := 1; i <= resourceCount; i++ { From 0d0f29beace76c2dd7ede19887b33710f097dd5e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Aug 2024 11:51:21 -0700 Subject: [PATCH 14/42] Remove debugging print lines --- tests/suite/reconfig_test.go | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 29fd457ed7..de0c8cfd16 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -84,14 +84,12 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig sedOutput, err := sedCmd.Output() if err != nil { - fmt.Println(err.Error() + ": " + string(sedOutput)) return err } kubectlCmd.Stdin = bytes.NewReader(sedOutput) - output, err := kubectlCmd.CombinedOutput() + _, err = kubectlCmd.CombinedOutput() if err != nil { - fmt.Println(err.Error() + ": " + string(output)) return err } } @@ -439,27 +437,21 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig reloadCount, err := framework.GetReloadCount(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(reloadCount) reloadAvgTime, err := framework.GetReloadAvgTime(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(reloadAvgTime) reloadBuckets, err := framework.GetReloadBuckets(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(reloadBuckets) eventsCount, err := framework.GetEventsCount(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(eventsCount) eventsAvgTime, err := framework.GetEventsAvgTime(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(eventsAvgTime) eventsBuckets, err := framework.GetEventsBuckets(promInstance, ngfPodName) Expect(err).ToNot(HaveOccurred()) - fmt.Println(eventsBuckets) logs, err := resourceManager.GetPodLogs(ngfNamespace, ngfPodName, &core.PodLogOptions{ Container: "nginx-gateway", @@ -469,11 +461,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig timeToReadyTotal, err := calculateTimeToReadyTotal(logs, timeToReadyStartingLogSubstring) Expect(err).ToNot(HaveOccurred()) - fmt.Println(timeToReadyTotal) - timeToReadyAvgSingle, err := calculateTimeToReadyAverage(logs) Expect(err).ToNot(HaveOccurred()) - fmt.Println(timeToReadyAvgSingle) results := reconfigTestResults{ Name: testName, From 107898bdf52b897af9af54a32b37dacf07537389 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Aug 2024 12:55:49 -0700 Subject: [PATCH 15/42] Add test results from 150 numResource run --- tests/results/reconfig/v1.3.0/1.3.0-oss.md | 149 ++++++++++++++++----- 1 file changed, 118 insertions(+), 31 deletions(-) diff --git a/tests/results/reconfig/v1.3.0/1.3.0-oss.md b/tests/results/reconfig/v1.3.0/1.3.0-oss.md index 120fe199e7..cd08f3efd0 100644 --- a/tests/results/reconfig/v1.3.0/1.3.0-oss.md +++ b/tests/results/reconfig/v1.3.0/1.3.0-oss.md @@ -6,10 +6,10 @@ NGINX Plus: false GKE Cluster: -- Node count: 3 -- k8s version: v1.29.6-gke.1038001 +- Node count: 4 +- k8s version: v1.29.6-gke.1254000 - vCPUs per node: 2 -- RAM per node: 4019168Ki +- RAM per node: 4019160Ki - Max pods per node: 110 - Zone: us-central1-c - Instance Type: e2-medium @@ -18,10 +18,10 @@ GKE Cluster: ### Reloads and Time to Ready -- TimeToReadyTotal: 1 +- TimeToReadyTotal: 2 - TimeToReadyAvgSingle: < 1 - NGINX Reloads: 2 -- NGINX Reload Average Time: 97 +- NGINX Reload Average Time: 76 - Reload distribution: - 500ms: 2 - 1000ms: 2 @@ -33,7 +33,36 @@ GKE Cluster: ### Event Batch Processing - Event Batch Total: 6 -- Event Batch Processing Average Time: 70ms +- Event Batch Processing Average Time: 108ms +- Event Batch Processing distribution: + - 500ms: 5 + - 1000ms: 6 + - 5000ms: 6 + - 10000ms: 6 + - 30000ms: 6 + - +Infms: 6 + + +## Test 1 NumResources 150 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 4 +- TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 3 +- NGINX Reload Average Time: 120 +- Reload distribution: + - 500ms: 3 + - 1000ms: 3 + - 5000ms: 3 + - 10000ms: 3 + - 30000ms: 3 + - +Infms: 3 + +### Event Batch Processing + +- Event Batch Total: 6 +- Event Batch Processing Average Time: 111ms - Event Batch Processing distribution: - 500ms: 6 - 1000ms: 6 @@ -49,8 +78,66 @@ GKE Cluster: - TimeToReadyTotal: 10 - TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 61 +- NGINX Reload Average Time: 127 +- Reload distribution: + - 500ms: 61 + - 1000ms: 61 + - 5000ms: 61 + - 10000ms: 61 + - 30000ms: 61 + - +Infms: 61 + +### Event Batch Processing + +- Event Batch Total: 336 +- Event Batch Processing Average Time: 30ms +- Event Batch Processing distribution: + - 500ms: 335 + - 1000ms: 336 + - 5000ms: 336 + - 10000ms: 336 + - 30000ms: 336 + - +Infms: 336 + + +## Test 2 NumResources 150 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 52 +- TimeToReadyAvgSingle: < 1 +- NGINX Reloads: 301 +- NGINX Reload Average Time: 131 +- Reload distribution: + - 500ms: 301 + - 1000ms: 301 + - 5000ms: 301 + - 10000ms: 301 + - 30000ms: 301 + - +Infms: 301 + +### Event Batch Processing + +- Event Batch Total: 1655 +- Event Batch Processing Average Time: 30ms +- Event Batch Processing distribution: + - 500ms: 1654 + - 1000ms: 1655 + - 5000ms: 1655 + - 10000ms: 1655 + - 30000ms: 1655 + - +Infms: 1655 + + +## Test 3 NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: < 1 +- TimeToReadyAvgSingle: < 1 - NGINX Reloads: 63 -- NGINX Reload Average Time: 129 +- NGINX Reload Average Time: 132 - Reload distribution: - 500ms: 63 - 1000ms: 63 @@ -61,41 +148,41 @@ GKE Cluster: ### Event Batch Processing -- Event Batch Total: 339 -- Event Batch Processing Average Time: 29ms +- Event Batch Total: 336 +- Event Batch Processing Average Time: 25ms - Event Batch Processing distribution: - - 500ms: 339 - - 1000ms: 339 - - 5000ms: 339 - - 10000ms: 339 - - 30000ms: 339 - - +Infms: 339 + - 500ms: 336 + - 1000ms: 336 + - 5000ms: 336 + - 10000ms: 336 + - 30000ms: 336 + - +Infms: 336 -## Test 3 NumResources 30 +## Test 3 NumResources 150 ### Reloads and Time to Ready - TimeToReadyTotal: < 1 - TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 66 +- NGINX Reloads: 318 - NGINX Reload Average Time: 131 - Reload distribution: - - 500ms: 66 - - 1000ms: 66 - - 5000ms: 66 - - 10000ms: 66 - - 30000ms: 66 - - +Infms: 66 + - 500ms: 318 + - 1000ms: 318 + - 5000ms: 318 + - 10000ms: 318 + - 30000ms: 318 + - +Infms: 318 ### Event Batch Processing -- Event Batch Total: 338 -- Event Batch Processing Average Time: 26ms +- Event Batch Total: 1669 +- Event Batch Processing Average Time: 25ms - Event Batch Processing distribution: - - 500ms: 338 - - 1000ms: 338 - - 5000ms: 338 - - 10000ms: 338 - - 30000ms: 338 - - +Infms: 338 + - 500ms: 1669 + - 1000ms: 1669 + - 5000ms: 1669 + - 10000ms: 1669 + - 30000ms: 1669 + - +Infms: 1669 From 7f38b8f6b1596afdb88aff5611e46454f497cad9 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Aug 2024 16:43:22 -0700 Subject: [PATCH 16/42] Remove certificate namespace --- ...certificate-ns-and-cafe-secret.yaml => cafe-secret.yaml} | 6 ------ tests/suite/manifests/reconfig/gateway.yaml | 1 - tests/suite/manifests/reconfig/reference-grant.yaml | 1 - tests/suite/reconfig_test.go | 6 +++--- 4 files changed, 3 insertions(+), 11 deletions(-) rename tests/suite/manifests/reconfig/{certificate-ns-and-cafe-secret.yaml => cafe-secret.yaml} (97%) diff --git a/tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml b/tests/suite/manifests/reconfig/cafe-secret.yaml similarity index 97% rename from tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml rename to tests/suite/manifests/reconfig/cafe-secret.yaml index d4037e2d67..4510460bba 100644 --- a/tests/suite/manifests/reconfig/certificate-ns-and-cafe-secret.yaml +++ b/tests/suite/manifests/reconfig/cafe-secret.yaml @@ -1,13 +1,7 @@ apiVersion: v1 -kind: Namespace -metadata: - name: certificate ---- -apiVersion: v1 kind: Secret metadata: name: cafe-secret - namespace: certificate type: kubernetes.io/tls data: tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== diff --git a/tests/suite/manifests/reconfig/gateway.yaml b/tests/suite/manifests/reconfig/gateway.yaml index fd9d52675b..ed6c91eb6a 100644 --- a/tests/suite/manifests/reconfig/gateway.yaml +++ b/tests/suite/manifests/reconfig/gateway.yaml @@ -22,4 +22,3 @@ spec: certificateRefs: - kind: Secret name: cafe-secret - namespace: certificate diff --git a/tests/suite/manifests/reconfig/reference-grant.yaml b/tests/suite/manifests/reconfig/reference-grant.yaml index 053bbbdcc2..e01df54009 100644 --- a/tests/suite/manifests/reconfig/reference-grant.yaml +++ b/tests/suite/manifests/reconfig/reference-grant.yaml @@ -2,7 +2,6 @@ apiVersion: gateway.networking.k8s.io/v1beta1 kind: ReferenceGrant metadata: name: access-to-cafe-secret - namespace: certificate spec: to: - group: "" diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index de0c8cfd16..ae24505b03 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -117,7 +117,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles( []string{ - "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/cafe-secret.yaml", "reconfig/reference-grant.yaml", }, ns.Name)).To(Succeed()) @@ -158,7 +158,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles( []string{ - "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/cafe-secret.yaml", "reconfig/reference-grant.yaml", "reconfig/gateway.yaml", }, @@ -207,7 +207,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } Expect(resourceManager.DeleteFromFiles([]string{ - "reconfig/certificate-ns-and-cafe-secret.yaml", + "reconfig/cafe-secret.yaml", "reconfig/reference-grant.yaml", "reconfig/gateway.yaml", }, ns.Name)).To(Succeed()) From 20a1f17ec6d3b50f3423c66a763026c6fc862c66 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Aug 2024 17:27:41 -0700 Subject: [PATCH 17/42] Delete entire namespace instead of specific manifest resources --- tests/suite/reconfig_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index ae24505b03..0cfef781a3 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -206,11 +206,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }, } - Expect(resourceManager.DeleteFromFiles([]string{ - "reconfig/cafe-secret.yaml", - "reconfig/reference-grant.yaml", - "reconfig/gateway.yaml", - }, ns.Name)).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) } getTimeStampFromLogLine := func(logLine string) string { From 43d2b8559085568ffb23a17a706abc0a4af847bd Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 09:24:46 -0700 Subject: [PATCH 18/42] Change comparison on HTTPRoutes to be equal operator --- tests/suite/reconfig_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 0cfef781a3..57bb1d6bdd 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -183,7 +183,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig if err := k8sClient.List(ctx, &routes); err != nil { return fmt.Errorf("error getting HTTPRoutes: %w", err) } - Expect(len(routes.Items)).To(BeNumerically(">=", resourceCount*3)) + Expect(len(routes.Items)).To(BeNumerically("==", resourceCount*3)) var pods core.PodList if err := k8sClient.List(ctx, &pods); err != nil { From 377778ea004a77512c3a1f4d366c26bc201aa4fd Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 09:53:14 -0700 Subject: [PATCH 19/42] Add units to results --- tests/suite/reconfig_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 57bb1d6bdd..eca0877051 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -567,10 +567,10 @@ const reconfigResultTemplate = ` ### Reloads and Time to Ready -- TimeToReadyTotal: {{ .TimeToReadyTotal }} -- TimeToReadyAvgSingle: {{ .TimeToReadyAvgSingle }} +- TimeToReadyTotal: {{ .TimeToReadyTotal }}s +- TimeToReadyAvgSingle: {{ .TimeToReadyAvgSingle }}s - NGINX Reloads: {{ .NGINXReloads }} -- NGINX Reload Average Time: {{ .NGINXReloadAvgTime }} +- NGINX Reload Average Time: {{ .NGINXReloadAvgTime }}ms - Reload distribution: {{- range .ReloadBuckets }} - {{ .Le }}ms: {{ .Val }} From bfe2f13e9780e9edec6af16813ff11069ff595a1 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 13:17:11 -0700 Subject: [PATCH 20/42] Refactor reconfig namespace to be declared in beforeeach --- tests/suite/reconfig_test.go | 39 ++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index eca0877051..7b66fd5775 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -31,6 +31,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig promInstance framework.PrometheusInstance promPortForwardStopCh = make(chan struct{}) + reconfigNamespace core.Namespace + outFile *os.File ) @@ -58,6 +60,15 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) BeforeEach(func() { + // need to redeclare this variable to reset its resource version. The framework has some bugs where + // if we set and declare this as a global variable, even after deleting the namespace, when we try to + // recreate it, it will error saying the resource version has already been set. + reconfigNamespace = core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "reconfig", + }, + } + teardown(releaseName) }) @@ -109,18 +120,13 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(k8sClient.Create(ctx, &ns)).To(Succeed()) } - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, - } - Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.Apply([]client.Object{&reconfigNamespace})).To(Succeed()) Expect(resourceManager.ApplyFromFiles( []string{ "reconfig/cafe-secret.yaml", "reconfig/reference-grant.yaml", }, - ns.Name)).To(Succeed()) + reconfigNamespace.Name)).To(Succeed()) Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) @@ -128,7 +134,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig time.Sleep(60 * time.Second) - Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, ns.Name)).To(Succeed()) + Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, reconfigNamespace.Name)).To(Succeed()) return nil } @@ -150,19 +156,14 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig time.Sleep(60 * time.Second) - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, - } - Expect(resourceManager.Apply([]client.Object{&ns})).To(Succeed()) + Expect(resourceManager.Apply([]client.Object{&reconfigNamespace})).To(Succeed()) Expect(resourceManager.ApplyFromFiles( []string{ "reconfig/cafe-secret.yaml", "reconfig/reference-grant.yaml", "reconfig/gateway.yaml", }, - ns.Name)).To(Succeed()) + reconfigNamespace.Name)).To(Succeed()) Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) @@ -200,13 +201,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(resourceManager.DeleteNamespace(nsName)).To(Succeed()) } - ns := core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "reconfig", - }, - } - - Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) + Expect(resourceManager.DeleteNamespace(reconfigNamespace.Name)).To(Succeed()) } getTimeStampFromLogLine := func(logLine string) string { From e7709d7a05531f958b0bdab2a40d5751b3c272cf Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 14:07:01 -0700 Subject: [PATCH 21/42] Refactor cleanupResources and cleanup after each test --- tests/suite/reconfig_test.go | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 7b66fd5775..3acd994d60 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -30,6 +30,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig queryRangeStep = 5 * time.Second promInstance framework.PrometheusInstance promPortForwardStopCh = make(chan struct{}) + maxResourceCount = 150 reconfigNamespace core.Namespace @@ -72,12 +73,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig teardown(releaseName) }) - AfterAll(func() { - teardown(releaseName) - close(promPortForwardStopCh) - Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) - }) - createUniqueResources := func(resourceCount int, fileName string) error { for i := 1; i <= resourceCount; i++ { nsName := "namespace" + strconv.Itoa(i) @@ -195,13 +190,20 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig return nil } - cleanupResources := func(resourceCount int) { - for i := 1; i <= resourceCount; i++ { + cleanupResources := func() error { + var err error + + for i := 1; i <= maxResourceCount; i++ { nsName := "namespace" + strconv.Itoa(i) - Expect(resourceManager.DeleteNamespace(nsName)).To(Succeed()) + resultError := resourceManager.DeleteNamespace(nsName) + if resultError != nil { + err = resultError + } } Expect(resourceManager.DeleteNamespace(reconfigNamespace.Name)).To(Succeed()) + + return err } getTimeStampFromLogLine := func(logLine string) string { @@ -470,8 +472,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig err = writeReconfigResults(outFile, results) Expect(err).ToNot(HaveOccurred()) - - cleanupResources(resourceCount) } // Test 1 - Resources exist before start-up @@ -542,6 +542,16 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig timeToReadyStartingLogSubstring, ) }) + + AfterEach(func() { + Expect(cleanupResources()).Should(Succeed()) + }) + + AfterAll(func() { + teardown(releaseName) + close(promPortForwardStopCh) + Expect(framework.UninstallPrometheus(resourceManager)).Should(Succeed()) + }) }) type reconfigTestResults struct { From 714be9fcc1fb4fc4112e2d594a0d97b5a469c72f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 14:16:46 -0700 Subject: [PATCH 22/42] Add reconfiguration test to nfr checks in system suite test --- tests/suite/system_suite_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index f0f3aa6993..bb5168d551 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -274,6 +274,7 @@ var _ = BeforeSuite(func() { "longevity-teardown", // - running longevity teardown (deployment will already exist) "telemetry", // - running telemetry test (NGF will be deployed as part of the test) "scale", // - running scale test (this test will deploy its own version) + "reconfiguration", // - running reconfiguration test (test will deploy its own instances) } for _, s := range skipSubstrings { if strings.Contains(labelFilter, s) { @@ -317,7 +318,8 @@ func isNFR(labelFilter string) bool { strings.Contains(labelFilter, "longevity") || strings.Contains(labelFilter, "performance") || strings.Contains(labelFilter, "upgrade") || - strings.Contains(labelFilter, "scale") + strings.Contains(labelFilter, "scale") || + strings.Contains(labelFilter, "reconfiguration") } var _ = ReportAfterSuite("Print info on failure", func(report Report) { From bc4862dc1ddbeee0c090179547c02dc0e82afebc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 14:51:26 -0700 Subject: [PATCH 23/42] Close out file and redeploy NGF for future tests --- tests/suite/reconfig_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 3acd994d60..61136c9095 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -69,8 +69,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Name: "reconfig", }, } - - teardown(releaseName) }) createUniqueResources := func(resourceCount int, fileName string) error { @@ -545,12 +543,18 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig AfterEach(func() { Expect(cleanupResources()).Should(Succeed()) + teardown(releaseName) }) AfterAll(func() { - teardown(releaseName) close(promPortForwardStopCh) Expect(framework.UninstallPrometheus(resourceManager)).Should(Succeed()) + Expect(outFile.Close()).To(Succeed()) + + // restoring NGF shared among tests in the suite + cfg := getDefaultSetupCfg() + cfg.nfr = true + setup(cfg) }) }) From 0ee057f4bbe7b58a103bb6f928536255d2b61c5f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Aug 2024 15:40:22 -0700 Subject: [PATCH 24/42] Add constant maxResourceCount --- tests/suite/reconfig_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 61136c9095..5422317c7a 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -25,12 +25,14 @@ import ( ) var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration", "nfr"), func() { + // used for cleaning up resources + const maxResourceCount = 150 + var ( scrapeInterval = 15 * time.Second queryRangeStep = 5 * time.Second promInstance framework.PrometheusInstance promPortForwardStopCh = make(chan struct{}) - maxResourceCount = 150 reconfigNamespace core.Namespace From 97b819ab953bf3a154f920ef8cf160d7967a4771 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 6 Aug 2024 09:00:42 -0700 Subject: [PATCH 25/42] Refactor setup steps --- tests/suite/reconfig_test.go | 200 ++++++++++++++++++----------------- 1 file changed, 104 insertions(+), 96 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 5422317c7a..2cf648b5f4 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -27,6 +27,8 @@ import ( var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration", "nfr"), func() { // used for cleaning up resources const maxResourceCount = 150 + const metricExistTimeout = 2 * time.Minute + const metricExistPolling = 1 * time.Second var ( scrapeInterval = 15 * time.Second @@ -63,6 +65,9 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) BeforeEach(func() { + output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) + Expect(err).ToNot(HaveOccurred(), string(output)) + // need to redeclare this variable to reset its resource version. The framework has some bugs where // if we set and declare this as a global variable, even after deleting the namespace, when we try to // recreate it, it will error saying the resource version has already been set. @@ -312,86 +317,47 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig return stringTimeToReadyTotal, nil } - runTestWithMetrics := func( - testName string, - resourceCount int, - test func(resourceCount int) error, - startWithNGFSetup bool, - timeToReadyStartingLogSubstring string, - ) { - var ( - metricExistTimeout = 2 * time.Minute - metricExistPolling = 1 * time.Second - ngfPodName string - startTime time.Time - ) + deployNGFReturnsNGFPodNameAndStartTime := func() (string, time.Time) { + var startTime time.Time getStartTime := func() time.Time { return startTime } modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } - output, err := framework.InstallGatewayAPI(getDefaultSetupCfg().gwAPIVersion) - Expect(err).ToNot(HaveOccurred(), string(output)) - - if startWithNGFSetup { - setup(getDefaultSetupCfg()) - - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).To(HaveLen(1)) - ngfPodName = podNames[0] - startTime = time.Now() - - queries := []string{ - fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), - fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), - // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time - fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), - } + setup(getDefaultSetupCfg()) + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + ngfPodName := podNames[0] + startTime = time.Now() - for _, q := range queries { - Eventually( - framework.CreateMetricExistChecker( - promInstance, - q, - getStartTime, - modifyStartTime, - ), - ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) - } + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), } - Expect(test(resourceCount)).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) - - if !startWithNGFSetup { - setup(getDefaultSetupCfg()) - - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).To(HaveLen(1)) - ngfPodName = podNames[0] - startTime = time.Now() - - // if i do a new instance of NGF each time, I might not need start time and can just do the endtime. - queries := []string{ - fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), - fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), - // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time - fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), - } - - for _, q := range queries { - Eventually( - framework.CreateMetricExistChecker( - promInstance, - q, - getStartTime, - modifyStartTime, - ), - ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) - } + for _, q := range queries { + Eventually( + framework.CreateMetricExistChecker( + promInstance, + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) } + return ngfPodName, startTime + } + + collectMetrics := func( + testName string, + resourceCount int, + timeToReadyStartingLogSubstring string, + ngfPodName string, + startTime time.Time, + ) { time.Sleep(2 * scrapeInterval) endTime := time.Now() @@ -476,70 +442,112 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig // Test 1 - Resources exist before start-up It("test 1 - 30 resources", func() { + resourceCount := 30 timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + test := createResourcesGWLast(resourceCount) - runTestWithMetrics("1", - 30, - createResourcesGWLast, - false, + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + collectMetrics("1", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) It("test 1 - 150 resources", func() { + resourceCount := 150 timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + test := createResourcesGWLast(resourceCount) + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - runTestWithMetrics("1", - 150, - createResourcesGWLast, - false, + collectMetrics("1", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) // Test 2 - Start NGF, deploy Gateway, create many resources attached to GW It("test 2 - 30 resources", func() { + resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + test := createResourcesRoutesLast(resourceCount) - runTestWithMetrics("2", - 30, - createResourcesRoutesLast, - true, + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) + + collectMetrics("2", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) It("test 2 - 150 resources", func() { + resourceCount := 150 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + test := createResourcesRoutesLast(resourceCount) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - runTestWithMetrics("2", - 150, - createResourcesRoutesLast, - true, + collectMetrics("2", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) - // Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway + // Test 3 - Start NGF, create many resources attached to a Gateway, deploy the Gateway It("test 3 - 30 resources", func() { + resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + test := createResourcesGWLast(resourceCount) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - runTestWithMetrics("3", - 30, - createResourcesGWLast, - true, + collectMetrics("3", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) It("test 3 - 150 resources", func() { + resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + test := createResourcesGWLast(resourceCount) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - runTestWithMetrics("3", - 150, - createResourcesGWLast, - true, + collectMetrics("3", + resourceCount, timeToReadyStartingLogSubstring, + ngfPodName, + startTime, ) }) From 531bc4e4f7fc8bab2e6dfc2089a0a5168cd82603 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 6 Aug 2024 11:57:26 -0700 Subject: [PATCH 26/42] Refactor ginkgo test specs --- tests/suite/reconfig_test.go | 177 ++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 87 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 2cf648b5f4..250c2fb9a9 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -440,115 +440,118 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(err).ToNot(HaveOccurred()) } - // Test 1 - Resources exist before start-up - It("test 1 - 30 resources", func() { - resourceCount := 30 - timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" - test := createResourcesGWLast(resourceCount) - - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) - - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - - collectMetrics("1", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) - }) + When("resources exist before startup", func() { + It("gathers metrics after creating 30 resources", func() { + resourceCount := 30 + timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + test := createResourcesGWLast(resourceCount) + + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) + + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + + collectMetrics("1", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) - It("test 1 - 150 resources", func() { - resourceCount := 150 - timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" - test := createResourcesGWLast(resourceCount) + It("gathers metrics after creating 150 resources", func() { + resourceCount := 150 + timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" + test := createResourcesGWLast(resourceCount) - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - collectMetrics("1", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) + collectMetrics("1", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) }) - // Test 2 - Start NGF, deploy Gateway, create many resources attached to GW - It("test 2 - 30 resources", func() { - resourceCount := 30 - timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" - test := createResourcesRoutesLast(resourceCount) + When("NGF and Gateway resource are deployed first", func() { + It("gathers metrics after creating 30 resources", func() { + resourceCount := 30 + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + test := createResourcesRoutesLast(resourceCount) - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("2", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) - }) + collectMetrics("2", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) - It("test 2 - 150 resources", func() { - resourceCount := 150 - timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" - test := createResourcesRoutesLast(resourceCount) + It("gathers metrics after creating 150 resources", func() { + resourceCount := 150 + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" + test := createResourcesRoutesLast(resourceCount) - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("2", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) + collectMetrics("2", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) }) - // Test 3 - Start NGF, create many resources attached to a Gateway, deploy the Gateway - It("test 3 - 30 resources", func() { - resourceCount := 30 - timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - test := createResourcesGWLast(resourceCount) + When("NGF and resources are deployed first", func() { + It("gathers metrics after creating 30 resources", func() { + resourceCount := 30 + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + test := createResourcesGWLast(resourceCount) - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("3", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) - }) + collectMetrics("3", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) - It("test 3 - 150 resources", func() { - resourceCount := 30 - timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - test := createResourcesGWLast(resourceCount) + It("gathers metrics after creating 150 resources", func() { + resourceCount := 150 + timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" + test := createResourcesGWLast(resourceCount) - ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() + ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) - Expect(checkResourceCreation(resourceCount)).To(Succeed()) + Expect(test).To(Succeed()) + Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("3", - resourceCount, - timeToReadyStartingLogSubstring, - ngfPodName, - startTime, - ) + collectMetrics("3", + resourceCount, + timeToReadyStartingLogSubstring, + ngfPodName, + startTime, + ) + }) }) AfterEach(func() { From 509cbdd44d0a13617a4c8452500d05d37c343b5e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 7 Aug 2024 13:56:51 -0700 Subject: [PATCH 27/42] Refactor scale test to include framework queries --- tests/framework/queries.go | 166 ++++++++++++++++++++++++++----- tests/suite/scale_test.go | 198 +++++-------------------------------- 2 files changed, 167 insertions(+), 197 deletions(-) diff --git a/tests/framework/queries.go b/tests/framework/queries.go index 7577d9ba5e..e8f9b55cfc 100644 --- a/tests/framework/queries.go +++ b/tests/framework/queries.go @@ -59,18 +59,39 @@ func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64 ) } -//func getReloadErrsCount(promInstance PrometheusInstance, ngfPodName string, startTime time.Time) float64 { -// return getFirstValueOfVector( -// fmt.Sprintf( -// `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ -// ` - `+ -// `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, -// ngfPodName, -// startTime.Unix(), -// ), -// promInstance, -// ) -//} +func GetReloadCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +func GetReloadErrsCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( @@ -84,6 +105,27 @@ func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float ) } +func GetReloadAvgTimeWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { return getBuckets( fmt.Sprintf( @@ -94,6 +136,23 @@ func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Buc ) } +func GetReloadBucketsWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( @@ -104,6 +163,23 @@ func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64 ) } +func GetEventsCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { return getFirstValueOfVector( fmt.Sprintf( @@ -116,6 +192,27 @@ func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float ) } +func GetEventsAvgTimeWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { return getBuckets( fmt.Sprintf( @@ -126,6 +223,23 @@ func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Buc ) } +func GetEventsBucketsWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + func CreateMetricExistChecker( promInstance PrometheusInstance, query string, @@ -175,20 +289,20 @@ func CreateEndTimeFinder( } } -//func createResponseChecker(url, address string, requestTimeout time.Duration) func() error { -// return func() error { -// status, _, err := Get(url, address, requestTimeout) -// if err != nil { -// return fmt.Errorf("bad response: %w", err) -// } -// -// if status != 200 { -// return fmt.Errorf("unexpected status code: %d", status) -// } -// -// return nil -// } -//} +func CreateResponseChecker(url, address string, requestTimeout time.Duration) func() error { + return func() error { + status, _, err := Get(url, address, requestTimeout) + if err != nil { + return fmt.Errorf("bad response: %w", err) + } + + if status != 200 { + return fmt.Errorf("unexpected status code: %d", status) + } + + return nil + } +} type Bucket struct { Le string diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 053d575d26..b7a2a6d460 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -3,7 +3,6 @@ package main import ( "bytes" "context" - "errors" "fmt" "io" "os" @@ -17,7 +16,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" promv1 "github.com/prometheus/client_golang/api/prometheus/v1" - "github.com/prometheus/common/model" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -109,15 +107,10 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ngfPodName = podNames[0] }) - type bucket struct { - Le string - Val int - } - type scaleTestResults struct { Name string - EventsBuckets []bucket - ReloadBuckets []bucket + EventsBuckets []framework.Bucket + ReloadBuckets []framework.Bucket EventsAvgTime int EventsCount int NGFContainerRestarts int @@ -173,91 +166,6 @@ The logs are attached only if there are errors. return tmpl.Execute(dest, results) } - createResponseChecker := func(url, address string) func() error { - return func() error { - status, _, err := framework.Get(url, address, timeoutConfig.RequestTimeout) - if err != nil { - return fmt.Errorf("bad response: %w", err) - } - - if status != 200 { - return fmt.Errorf("unexpected status code: %d", status) - } - - return nil - } - } - - createMetricExistChecker := func(query string, getTime func() time.Time, modifyTime func()) func() error { - return func() error { - queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) - - result, err := promInstance.Query(queryWithTimestamp) - if err != nil { - return fmt.Errorf("failed to query Prometheus: %w", err) - } - - if result.String() == "" { - modifyTime() - return errors.New("empty result") - } - - return nil - } - } - - createEndTimeFinder := func(query string, startTime time.Time, t *time.Time) func() error { - return func() error { - result, err := promInstance.QueryRange(query, promv1.Range{ - Start: startTime, - End: *t, - Step: queryRangeStep, - }) - if err != nil { - return fmt.Errorf("failed to query Prometheus: %w", err) - } - - if result.String() == "" { - *t = time.Now() - return errors.New("empty result") - } - - return nil - } - } - - getFirstValueOfVector := func(query string) float64 { - result, err := promInstance.Query(query) - Expect(err).ToNot(HaveOccurred()) - - val, err := framework.GetFirstValueOfPrometheusVector(result) - Expect(err).ToNot(HaveOccurred()) - - return val - } - - getBuckets := func(query string) []bucket { - result, err := promInstance.Query(query) - Expect(err).ToNot(HaveOccurred()) - - res, ok := result.(model.Vector) - Expect(ok).To(BeTrue()) - - buckets := make([]bucket, 0, len(res)) - - for _, sample := range res { - le := sample.Metric["le"] - val := float64(sample.Value) - bucket := bucket{ - Le: string(le), - Val: int(val), - } - buckets = append(buckets, bucket) - } - - return buckets - } - checkLogErrors := func( containerName string, substrings []string, @@ -323,7 +231,8 @@ The logs are attached only if there are errors. for _, q := range queries { Eventually( - createMetricExistChecker( + framework.CreateMetricExistChecker( + promInstance, q, getStartTime, modifyStartTime, @@ -345,10 +254,12 @@ The logs are attached only if there are errors. // the rate query may not return any data. // To ensure it returns data, we increase the startTime. Eventually( - createEndTimeFinder( + framework.CreateEndTimeFinder( + promInstance, fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), startTime, &endTime, + queryRangeStep, ), ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) @@ -363,7 +274,8 @@ The logs are attached only if there are errors. for _, q := range queries { Eventually( - createMetricExistChecker( + framework.CreateMetricExistChecker( + promInstance, q, getEndTime, noOpModifier, @@ -414,82 +326,26 @@ The logs are attached only if there are errors. Expect(os.Remove(cpuCSV)).To(Succeed()) - reloadCount := getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - ) + reloadCount, err := framework.GetReloadCountWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) - reloadErrsCount := getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - ) + reloadErrsCount, err := framework.GetReloadErrsCountWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) - reloadAvgTime := getFirstValueOfVector( - fmt.Sprintf( - `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ - ` / `+ - `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, - ngfPodName, - startTime.Unix(), - )) - - reloadBuckets := getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - ) + reloadAvgTime, err := framework.GetReloadAvgTimeWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) - eventsCount := getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - ) + reloadBuckets, err := framework.GetReloadBucketsWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) - eventsAvgTime := getFirstValueOfVector( - fmt.Sprintf( - `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ - ` / `+ - `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, - ngfPodName, - startTime.Unix(), - ), - ) + eventsCount, err := framework.GetEventsCountWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) - eventsBuckets := getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - ) + eventsAvgTime, err := framework.GetEventsAvgTimeWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) + + eventsBuckets, err := framework.GetEventsBucketsWithStartTime(promInstance, ngfPodName, startTime) + Expect(err).ToNot(HaveOccurred()) // Check container logs for errors @@ -573,7 +429,7 @@ The logs are attached only if there are errors. startCheck := time.Now() Eventually( - createResponseChecker(url, address), + framework.CreateResponseChecker(url, address, timeoutConfig.RequestTimeout), ).WithTimeout(30 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) ttr := time.Since(startCheck) @@ -607,7 +463,7 @@ The logs are attached only if there are errors. } Eventually( - createResponseChecker(url, address), + framework.CreateResponseChecker(url, address, timeoutConfig.RequestTimeout), ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) Expect( @@ -620,7 +476,7 @@ The logs are attached only if there are errors. Expect(resourceManager.WaitForPodsToBeReady(ctx, namespace)).To(Succeed()) Eventually( - createResponseChecker(url, address), + framework.CreateResponseChecker(url, address, timeoutConfig.RequestTimeout), ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) } From dc8125fbfdc82bdab32ae55fc476c6a8124a679f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 7 Aug 2024 15:16:24 -0700 Subject: [PATCH 28/42] Refactor blind sleep to instead wait for pods to be ready --- tests/suite/reconfig_test.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 250c2fb9a9..afd622cb07 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -132,7 +132,14 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) - time.Sleep(60 * time.Second) + for i := 1; i <= resourceCount; i++ { + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + } Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, reconfigNamespace.Name)).To(Succeed()) @@ -154,7 +161,14 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe.yaml")).To(Succeed()) - time.Sleep(60 * time.Second) + for i := 1; i <= resourceCount; i++ { + ns := core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace" + strconv.Itoa(i), + }, + } + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + } Expect(resourceManager.Apply([]client.Object{&reconfigNamespace})).To(Succeed()) Expect(resourceManager.ApplyFromFiles( From 410d68d40206f2b0ac3dcc4745acf563fba32735 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 12:08:50 -0700 Subject: [PATCH 29/42] Add and use ApplyFromBuffer to create unique resources --- tests/framework/resourcemanager.go | 63 +++++++++++++++++++----------- tests/suite/reconfig_test.go | 31 ++++++--------- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index e7a585cf79..4ce3fec712 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -115,6 +115,20 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { // ApplyFromFiles creates or updates Kubernetes resources defined within the provided YAML files. func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) error { + for _, file := range files { + data, err := rm.GetFileContents(file) + if err != nil { + return err + } + + if err = rm.ApplyFromBuffer(data, namespace); err != nil { + return err + } + } + return nil +} + +func (rm *ResourceManager) ApplyFromBuffer(buffer *bytes.Buffer, namespace string) error { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.CreateTimeout) defer cancel() @@ -150,7 +164,7 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro return nil } - return rm.readAndHandleObjects(handlerFunc, files) + return rm.readAndHandleObject(handlerFunc, buffer) } // Delete deletes Kubernetes resources defined as Go objects. @@ -213,36 +227,41 @@ func (rm *ResourceManager) DeleteFromFiles(files []string, namespace string) err return nil } - return rm.readAndHandleObjects(handlerFunc, files) -} - -func (rm *ResourceManager) readAndHandleObjects( - handle func(unstructured.Unstructured) error, - files []string, -) error { for _, file := range files { data, err := rm.GetFileContents(file) if err != nil { return err } - decoder := yaml.NewYAMLOrJSONDecoder(data, 4096) - for { - obj := unstructured.Unstructured{} - if err := decoder.Decode(&obj); err != nil { - if errors.Is(err, io.EOF) { - break - } - return fmt.Errorf("error decoding resource: %w", err) - } + if err = rm.readAndHandleObject(handlerFunc, data); err != nil { + return err + } + } - if len(obj.Object) == 0 { - continue - } + return nil +} - if err := handle(obj); err != nil { - return err +func (rm *ResourceManager) readAndHandleObject( + handle func(unstructured.Unstructured) error, + data *bytes.Buffer, +) error { + decoder := yaml.NewYAMLOrJSONDecoder(data, 4096) + + for { + obj := unstructured.Unstructured{} + if err := decoder.Decode(&obj); err != nil { + if errors.Is(err, io.EOF) { + break } + return fmt.Errorf("error decoding resource: %w", err) + } + + if len(obj.Object) == 0 { + continue + } + + if err := handle(obj); err != nil { + return err } } diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index afd622cb07..ea5859eca6 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "os" - "os/exec" "path/filepath" "strconv" "strings" @@ -80,30 +79,24 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig createUniqueResources := func(resourceCount int, fileName string) error { for i := 1; i <= resourceCount; i++ { - nsName := "namespace" + strconv.Itoa(i) - // Command to run sed and capture its output - //nolint:gosec - sedCmd := exec.Command("sed", - "-e", - "s/coffee/coffee"+nsName+"/g", - "-e", - "s/tea/tea"+nsName+"/g", - fileName, - ) - // Command to apply using kubectl - kubectlCmd := exec.Command("kubectl", "apply", "-n", nsName, "-f", "-") + namespace := "namespace" + strconv.Itoa(i) - sedOutput, err := sedCmd.Output() + b, err := resourceManager.GetFileContents(fileName) if err != nil { - return err + return fmt.Errorf("error getting manifest file: %w", err) } - kubectlCmd.Stdin = bytes.NewReader(sedOutput) - _, err = kubectlCmd.CombinedOutput() - if err != nil { - return err + fileString := b.String() + fileString = strings.ReplaceAll(fileString, "coffee", "coffee"+namespace) + fileString = strings.ReplaceAll(fileString, "tea", "tea"+namespace) + + data := bytes.NewBufferString(fileString) + + if err := resourceManager.ApplyFromBuffer(data, namespace); err != nil { + return fmt.Errorf("error processing manifest file: %w", err) } } + return nil } From 0d3c60495e074788cf5336100cdfb90a249b6ac0 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 12:20:49 -0700 Subject: [PATCH 30/42] Delete old reconfiguration files --- tests/reconfig/results/1.0.0/1.0.0.md | 79 ------------ tests/reconfig/results/1.1.0/1.1.0.md | 92 -------------- tests/reconfig/results/1.2.0/1.2.0.md | 106 ---------------- tests/reconfig/results/1.3.0/1.3.0.md | 110 ---------------- tests/reconfig/scripts/cafe-routes.yaml | 57 --------- tests/reconfig/scripts/cafe.yaml | 65 ---------- .../certificate-ns-and-cafe-secret.yaml | 14 -- .../scripts/create-resources-gw-last.sh | 31 ----- .../scripts/create-resources-routes-last.sh | 29 ----- tests/reconfig/scripts/delete-multiple.sh | 16 --- tests/reconfig/scripts/gateway.yaml | 25 ---- tests/reconfig/scripts/reference-grant.yaml | 14 -- tests/reconfig/setup.md | 120 ------------------ 13 files changed, 758 deletions(-) delete mode 100644 tests/reconfig/results/1.0.0/1.0.0.md delete mode 100644 tests/reconfig/results/1.1.0/1.1.0.md delete mode 100644 tests/reconfig/results/1.2.0/1.2.0.md delete mode 100644 tests/reconfig/results/1.3.0/1.3.0.md delete mode 100644 tests/reconfig/scripts/cafe-routes.yaml delete mode 100644 tests/reconfig/scripts/cafe.yaml delete mode 100644 tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml delete mode 100755 tests/reconfig/scripts/create-resources-gw-last.sh delete mode 100755 tests/reconfig/scripts/create-resources-routes-last.sh delete mode 100755 tests/reconfig/scripts/delete-multiple.sh delete mode 100644 tests/reconfig/scripts/gateway.yaml delete mode 100644 tests/reconfig/scripts/reference-grant.yaml delete mode 100644 tests/reconfig/setup.md diff --git a/tests/reconfig/results/1.0.0/1.0.0.md b/tests/reconfig/results/1.0.0/1.0.0.md deleted file mode 100644 index 101bde04be..0000000000 --- a/tests/reconfig/results/1.0.0/1.0.0.md +++ /dev/null @@ -1,79 +0,0 @@ -# Reconfiguration testing Results - - -- [Reconfiguration testing Results](#reconfiguration-testing-results) - - [Test environment](#test-environment) - - [Results Tables](#results-tables) - - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) - - [Event Batch Processing](#event-batch-processing) - - [NumResources -> Total Resources](#numresources---total-resources) - - [Observations](#observations) - - -## Test environment - -GKE cluster: - -- Node count: 3 -- Instance Type: e2-medium -- k8s version: 1.27.3-gke.100 -- Zone: us-central1-c -- Total vCPUs: 6 -- Total RAM: 12GB -- Max pods per node: 110 - -NGF deployment: - -- NGF version: edge - git commit 29b45e38bacd7c4f22834938105e3cda4f29f6d1 -- NGINX Version: 1.25.2 - -## Results Tables - -### NGINX Reloads and Time to Ready - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 1 | 1 | 2 | 191 | 100% | 100% | -| 1 | 150 | 2 | 2 | 2 | 440 | 50% | 100% | -| 2 | 30 | 50 | <1 | 93 | 162 | 100% | 100% | -| 2 | 150 | 208 | <1 | 396 | 281 | 96.46% | 100% | -| 3 | 30 | 1 | 1 | 93 | 129 | 100% | 100% | -| 3 | 150 | 1 | 1 | 453 | 130 | 100% | 100% | - - -### Event Batch Processing - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------| -| 1 | 30 | 69 | 6.232 | 100% | 100% | -| 1 | 150 | 309 | 3.638 | 99.68% | 100% | -| 2 | 30 | 465 | 38.759 | 100% | 100% | -| 2 | 150 | 1941 | 68.539 | 98.51% | 100% | -| 3 | 30 | 374 | 36.834 | 99.73% | 99.73% | -| 3 | 150 | 1812 | 40.411 | 99.94% | 99.94% | - - -## NumResources -> Total Resources - -| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Total Resources | -|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|-----------------| -| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | | -| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 244 | -| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 1204 | - -## Observations - -1. We are reloading after reconciling a ReferenceGrant even when there is no Gateway. This is because we treat every - upsert/delete of a ReferenceGrant as a change. This means we will regenerate NGINX config every time a ReferenceGrant - is created, updated (generation must change), or deleted, even if it does not apply to the accepted Gateway. - - Issue filed: https://github.com/nginxinc/nginx-gateway-fabric/issues/1124 - -2. We are reloading after reconciling a HTTPRoute even when there is no accepted Gateway and no config being generated. - - Issue filed: https://github.com/nginxinc/nginx-gateway-fabric/issues/1123 - -3. Majority of NGINX reloads were in the <= 500ms bucket, with all of them being in the <= 1000ms bucket. An increase - in the reload time based on number of configured resources resulting in NGINX configuration changes was observed. - -4. No errors (NGF or NGINX) were observed in any test run. diff --git a/tests/reconfig/results/1.1.0/1.1.0.md b/tests/reconfig/results/1.1.0/1.1.0.md deleted file mode 100644 index 3dcc8ed2e9..0000000000 --- a/tests/reconfig/results/1.1.0/1.1.0.md +++ /dev/null @@ -1,92 +0,0 @@ -# Reconfiguration testing Results - - -- [Reconfiguration testing Results](#reconfiguration-testing-results) - - [Summary](#summary) - - [Test environment](#test-environment) - - [Results Tables](#results-tables) - - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) - - [Event Batch Processing](#event-batch-processing) - - [NumResources to Total Resources](#numresources-to-total-resources) - - [Observations](#observations) - - [Future Improvements](#future-improvements) - - -## Summary - -- Better reload times across all tests -- Similar TimeToReadyTotal and TimeToReadyAveSingle times -- Similar event batch totals -- Slightly better event batch processing average times -- No new errors or issues - -## Test environment - -GKE cluster: - -- Node count: 4 -- Instance Type: n2d-standard-2 -- k8s version: 1.27.3-gke.100 -- Zone: us-west2-a -- Total vCPUs: 8 -- Total RAM: 32GB -- Max pods per node: 110 - -NGF deployment: - -- NGF version: edge - git commit 3cab370a46bccd55c115c16e23a475df2497a3d2 -- NGINX Version: 1.25.3 - -## Results Tables - -### NGINX Reloads and Time to Ready - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 1.5 | <1 | 2 | 158.5 | 100% | 100% | -| 1 | 150 | 3.5 | 1 | 2 | 272.5 | 100% | 100% | -| 2 | 30 | 34 | <1 | 93 | 136 | 100% | 100% | -| 2 | 150 | 176.5 | <1 | 451 | 203.98 | 100% | 100% | -| 3 | 30 | <1 | 1 | 93 | 125.7 | 100% | 100% | -| 3 | 150 | 1 | 1 | 453 | 126.71 | 100% | 100% | - - -### Event Batch Processing - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| -| 1 | 30 | 70 | 5.12 | 100% | 100% | 100% | 100% | 100% | -| 1 | 150 | 309 | 2.14 | 100% | 100% | 100% | 100% | 100% | -| 2 | 30 | 442 | 35.4 | 100% | 100% | 100% | 100% | 100% | -| 2 | 150 | 2009 | 54.76 | 100% | 100% | 100% | 100% | 100% | -| 3 | 30 | 373 | 35.72 | 99.73% | 99.73% | 100% | 100% | 100% | -| 3 | 150 | 1813 | 39.46 | 99.94% | 99.94% | 99.94% | 99.94% | 100% | - -> Note: The outlier for test #3 is the event batch that contains the Gateway. It took ~13s to process. - -## NumResources to Total Resources - -| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Attached HTTPRoutes | Total Resources | -|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|---------------------|-----------------| -| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | 2x | | -| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 60 | 244 | -| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 300 | 1204 | - -> Note: Only 2x HTTPRoutes attach to the Gateway because the parentRef name in the `cafe-tls-redirect` HTTPRoute is incorrect. This will be fixed in the next release. - -## Observations - -1. The following issues still exist: - - - https://github.com/nginxinc/nginx-gateway-fabric/issues/1124 - - https://github.com/nginxinc/nginx-gateway-fabric/issues/1123 - -2. All NGINX reloads were in the <= 500ms bucket. An increase in the reload time based on number of configured resources resulting in NGINX configuration changes was observed. - -3. No errors (NGF or NGINX) were observed in any test run. - -4. The majority of the event batches were processed in 500ms or less except the 3rd test. In the 3rd test, we create the Gateway resource after all the apps and routes. The batch that contains the Gateway is the only one that takes longer than 500ms. It takes ~13s. - -## Future Improvements - -1. Fix the parentRef name in the `cafe-tls-redirect` [HTTPRoute](/tests/reconfig/scripts/cafe-routes.yaml), so it matches the deployed Gateway. diff --git a/tests/reconfig/results/1.2.0/1.2.0.md b/tests/reconfig/results/1.2.0/1.2.0.md deleted file mode 100644 index 543c40e6ab..0000000000 --- a/tests/reconfig/results/1.2.0/1.2.0.md +++ /dev/null @@ -1,106 +0,0 @@ -# Reconfiguration testing Results - - -- [Reconfiguration testing Results](#reconfiguration-testing-results) - - [Summary](#summary) - - [Test environment](#test-environment) - - [Results Tables](#results-tables) - - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) - - [Event Batch Processing](#event-batch-processing) - - [NumResources to Total Resources](#numresources-to-total-resources) - - [Observations](#observations) - - [Future Improvements](#future-improvements) - - -## Summary - -- Time to ready stayed consistent, if not slightly faster. -- Reload time has slightly increased in some instances. -- Number of batch events has reduced, subsequently increasing the average time of each batch. - -## Test environment - -GKE cluster: - -- Node count: 3 -- Instance Type: e2-medium -- k8s version: 1.27.8-gke.1067004 -- Zone: us-west2-a -- Total vCPUs: 6 -- Total RAM: 12GB -- Max pods per node: 110 - -NGF deployment: - -- NGF version: edge - git commit 96a44240d317875406a8aef8fd1e424f2fb906eb -- NGINX OSS Version: 1.25.4 -- NGINX Plus Version: R31 - -## Results Tables - -> Note: After fixing the `cafe-tls-redirect` to point to the proper Gateway, tests that created 450 HTTPRoutes failed due to https://github.com/nginxinc/nginx-gateway-fabric/issues/1107. Therefore, those tests were re-run after reverting the `cafe-tls-redirect` issue to maintain consistency with the previous release tests. Going forward, results should look different once the above bug is fixed. Added N+ tests, but without testing 150 since it has the bug mentioned above. - -### NGINX Reloads and Time to Ready - -#### OSS - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 2 | <1 | 2 | 189.5 | 100% | 100% | -| 1 | 150 | 2 | <1 | 2 | 389 | 100% | 100% | -| 2 | 30 | 30 | <1 | 94 | 161 | 100% | 100% | -| 2 | 150 | 154 | <1 | 387 | 267.48 | 100% | 100% | -| 3 | 30 | <1 | <1 | 94 | 127.91 | 100% | 100% | -| 3 | 150 | <1 | <1 | 454 | 128 | 100% | 100% | - -#### Plus - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 1 | <1 | 2 | 151.5 | 100% | 100% | -| 2 | 30 | 30 | <1 | 94 | 157 | 100% | 100% | -| 3 | 30 | <1 | <1 | 94 | 128 | 100% | 100% | - -### Event Batch Processing - -#### OSS - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| -| 1 | 30 | 5 | 733.6 | 80% | 80% | 100% | 100% | 100% | -| 1 | 150 | 5 | 2967 | 40% | 40% | 40% | 40% | 40% | -| 2 | 30 | 371 | 57.32 | 100% | 100% | 100% | 100% | 100% | -| 2 | 150 | 1743 | 75.87 | 98.45% | 100% | 100% | 100% | 100% | -| 3 | 30 | 370 | 37.48 | 99.73% | 99.73% | 100% | 100% | 100% | -| 3 | 150 | 1808 | 40.18 | 99.94% | 99.94% | 99.94% | 99.94% | 100% | - -#### Plus - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| -| 1 | 30 | 3 | 1170 | 66% | 66% | 100% | 100% | 100% | -| 2 | 30 | 370 | 58.79 | 100% | 100% | 100% | 100% | 100% | -| 3 | 30 | 370 | 41.32 | 99.73% | 99.73% | 100% | 100% | 100% | - -## NumResources to Total Resources - -| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Attached HTTPRoutes | Total Resources | -|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|---------------------|-----------------| -| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | 2x | | -| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 60 | 244 | -| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 300 | 1204 | - -> Note: Only 2x HTTPRoutes attach to the Gateway because the parentRef name in the `cafe-tls-redirect` HTTPRoute is incorrect. This has been fixed, but until https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 is fixed we can't actually run the test successfully. - -## Observations - -1. Reload time seems to have a increased slightly in a few instances, though time to ready is consistent if not faster. - -2. Processing fewer batches overall due to improvements in resource event tracking. Overall processing time didn't change much, so the average increased due to fewer batches. - -3. No errors in the logs. - - -## Future Improvements - -Fix https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 to allow for 150 resource tests to properly run. diff --git a/tests/reconfig/results/1.3.0/1.3.0.md b/tests/reconfig/results/1.3.0/1.3.0.md deleted file mode 100644 index 4ccb9f38f2..0000000000 --- a/tests/reconfig/results/1.3.0/1.3.0.md +++ /dev/null @@ -1,110 +0,0 @@ -# Reconfiguration testing Results - - -- [Reconfiguration testing Results](#reconfiguration-testing-results) - - [Summary](#summary) - - [Test environment](#test-environment) - - [Results Tables](#results-tables) - - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) - - [Event Batch Processing](#event-batch-processing) - - [NumResources to Total Resources](#numresources-to-total-resources) - - [Observations](#observations) - - [Future Improvements](#future-improvements) - - -## Summary - -- Due to fix https://github.com/nginxinc/nginx-gateway-fabric/issues/1107, time to ready, reload time, and event batch processing - time increased for all 150 resource tests. -- For all 30 resource tests, results were mostly consistent to prior results. - -## Test environment - -GKE cluster: - -- Node count: 3 -- Instance Type: e2-medium -- k8s version: 1.28.9-gke.1000000 -- Zone: us-central1-c -- Total vCPUs: 6 -- Total RAM: 12GB -- Max pods per node: 110 - -NGF deployment: - -- NGF version: edge - git commit 7c9bf23ed89861c9ce7b725f2c1686f4c24ef2f9 -- NGINX OSS Version: 1.27.0 -- NGINX Plus Version: R32 - -## Results Tables - -### NGINX Reloads and Time to Ready - -#### OSS - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 2 | <1 | 2 | 190 | 100% | 100% | -| 1 | 150 | 2 | <1 | 2 | 542 | 50% | 100% | -| 2 | 30 | 37 | <1 | 94 | 169 | 100% | 100% | -| 2 | 150 | 204 | <1 | 387 | 326 | 88% | 100% | -| 3 | 30 | <1 | <1 | 94 | 129 | 100% | 100% | -| 3 | 150 | <1 | <1 | 454 | 130 | 100% | 100% | - -#### Plus - -| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | -|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| -| 1 | 30 | 1 | <1 | 2 | 220.5 | 100% | 100% | -| 1 | 150 | 1.5 | <1 | 2 | 528.5 | 50% | 100% | -| 2 | 30 | 41 | <1 | 94 | 176.8 | 100% | 100% | -| 2 | 150 | 199 | <1 | 391 | 320.56 | 94.1% | 100% | -| 3 | 30 | <1 | <1 | 94 | 128.5 | 100% | 100% | -| 3 | 150 | <1 | <1 | 454 | 129.2 | 100% | 100% | - -### Event Batch Processing - -#### OSS - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| -| 1 | 30 | 5 | 726.6 | 80% | 80% | 100% | 100% | 100% | -| 1 | 150 | 5 | 4457 | 40% | 80% | 80% | 80% | 100% | -| 2 | 30 | 371 | 59.5 | 99.7% | 100% | 100% | 100% | 100% | -| 2 | 150 | 1742 | 93.5 | 92.9% | 99.99% | 100% | 100% | 100% | -| 3 | 30 | 370 | 43.9 | 99.85% | 99.85% | 100% | 100% | 100% | -| 3 | 150 | 1810 | 44.8 | 99.99% | 99.99% | 99.99% | 100% | 100% | - -#### Plus - -| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | -|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|--------------| -| 1 | 30 | 6 | 84 | 100% | 100% | 100% | 100% | 100% | -| 1 | 150 | 5 | 4544.3 | 40% | 80% | 80% | 80% | 100% | -| 2 | 30 | 370 | 59.1 | 100% | 100% | 100% | 100% | 100% | -| 2 | 150 | 1747 | 93.2 | 94.1% | 99.99% | 100% | 100% | 100% | -| 3 | 30 | 370 | 41.33 | 99.99% | 99.99% | 100% | 100% | 100% | -| 3 | 150 | 1809 | 44.88 | 99.99% | 99.99% | 99.99% | 99.99% | 100% | - -## NumResources to Total Resources - -| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Total Resources | -|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|-----------------| -| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | | -| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 244 | -| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 1204 | - -## Observations - -1. Reload time and time to ready have increased in 150 resource tests. This is probably due, in part, to the fix of https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 causing the prior - test to only attach 2x of the HTTPRoutes while this test attaches all of them. In the 30 resource tests, results were mostly consistent to prior results. - -2. Event batch processing time increased notably in the 150 resource tests, probably for the same reason mentioned in observation #1. - In the 30 resource tests, results were mostly consistent to prior results. - -3. No errors in the logs. - - -## Future Improvements - -None. diff --git a/tests/reconfig/scripts/cafe-routes.yaml b/tests/reconfig/scripts/cafe-routes.yaml deleted file mode 100644 index 006a8eba92..0000000000 --- a/tests/reconfig/scripts/cafe-routes.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: cafe-tls-redirect -spec: - parentRefs: - - name: gateway - namespace: default - sectionName: http - hostnames: - - "cafe.example.com" - rules: - - filters: - - type: RequestRedirect - requestRedirect: - scheme: https - port: 443 ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: coffee -spec: - parentRefs: - - name: gateway - namespace: default - sectionName: https - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: PathPrefix - value: /coffee - backendRefs: - - name: coffee - port: 80 ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: tea -spec: - parentRefs: - - name: gateway - sectionName: https - namespace: default - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: PathPrefix - value: /tea - backendRefs: - - name: tea - port: 80 diff --git a/tests/reconfig/scripts/cafe.yaml b/tests/reconfig/scripts/cafe.yaml deleted file mode 100644 index 2d03ae59ff..0000000000 --- a/tests/reconfig/scripts/cafe.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: coffee -spec: - replicas: 1 - selector: - matchLabels: - app: coffee - template: - metadata: - labels: - app: coffee - spec: - containers: - - name: coffee - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: coffee -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: coffee ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tea -spec: - replicas: 1 - selector: - matchLabels: - app: tea - template: - metadata: - labels: - app: tea - spec: - containers: - - name: tea - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: tea -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: tea diff --git a/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml b/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml deleted file mode 100644 index d4037e2d67..0000000000 --- a/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: certificate ---- -apiVersion: v1 -kind: Secret -metadata: - name: cafe-secret - namespace: certificate -type: kubernetes.io/tls -data: - tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/reconfig/scripts/create-resources-gw-last.sh b/tests/reconfig/scripts/create-resources-gw-last.sh deleted file mode 100755 index a1be0fc9e6..0000000000 --- a/tests/reconfig/scripts/create-resources-gw-last.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Create namespaces -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - kubectl create namespace "$namespace_name" -done - -# Create single instance resources -kubectl create -f certificate-ns-and-cafe-secret.yaml -kubectl create -f reference-grant.yaml - -# Create backend service and apps -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Create routes -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Wait for apps to be ready -sleep 60 - -# Create Gateway -kubectl create -f gateway.yaml diff --git a/tests/reconfig/scripts/create-resources-routes-last.sh b/tests/reconfig/scripts/create-resources-routes-last.sh deleted file mode 100755 index be41d9a706..0000000000 --- a/tests/reconfig/scripts/create-resources-routes-last.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Create namespaces -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - kubectl create namespace "$namespace_name" -done - -# Create backend service and apps -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Wait for apps to be ready -sleep 60 - -# Create single instance resources -kubectl create -f certificate-ns-and-cafe-secret.yaml -kubectl create -f reference-grant.yaml -kubectl create -f gateway.yaml - -# Create routes -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - -done diff --git a/tests/reconfig/scripts/delete-multiple.sh b/tests/reconfig/scripts/delete-multiple.sh deleted file mode 100755 index 2f9752e8c9..0000000000 --- a/tests/reconfig/scripts/delete-multiple.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Delete namespaces -namespaces="" -for ((i = 1; i <= num_namespaces; i++)); do - namespaces+="namespace${i} " -done - -kubectl delete namespace "${namespaces}" - -# Delete single instance resources -kubectl delete -f gateway.yaml -kubectl delete -f reference-grant.yaml -kubectl delete -f certificate-ns-and-cafe-secret.yaml diff --git a/tests/reconfig/scripts/gateway.yaml b/tests/reconfig/scripts/gateway.yaml deleted file mode 100644 index fd9d52675b..0000000000 --- a/tests/reconfig/scripts/gateway.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: Gateway -metadata: - name: gateway -spec: - gatewayClassName: nginx - listeners: - - name: http - port: 80 - protocol: HTTP - allowedRoutes: - namespaces: - from: "All" - - name: https - port: 443 - protocol: HTTPS - allowedRoutes: - namespaces: - from: "All" - tls: - mode: Terminate - certificateRefs: - - kind: Secret - name: cafe-secret - namespace: certificate diff --git a/tests/reconfig/scripts/reference-grant.yaml b/tests/reconfig/scripts/reference-grant.yaml deleted file mode 100644 index 053bbbdcc2..0000000000 --- a/tests/reconfig/scripts/reference-grant.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1beta1 -kind: ReferenceGrant -metadata: - name: access-to-cafe-secret - namespace: certificate -spec: - to: - - group: "" - kind: Secret - name: cafe-secret # if you omit this name, then Gateways in default ns can access all Secrets in the certificate ns - from: - - group: gateway.networking.k8s.io - kind: Gateway - namespace: default diff --git a/tests/reconfig/setup.md b/tests/reconfig/setup.md deleted file mode 100644 index 8729115544..0000000000 --- a/tests/reconfig/setup.md +++ /dev/null @@ -1,120 +0,0 @@ -# Reconfig tests - - -- [Reconfig tests](#reconfig-tests) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Setup](#setup) - - [Tests](#tests) - - [Test 1: Resources exist before start-up](#test-1-resources-exist-before-start-up) - - [Test 2: Start NGF, deploy Gateway, create many resources attached to GW](#test-2-start-ngf-deploy-gateway-create-many-resources-attached-to-gw) - - [Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway](#test-3-start-ngf-create-many-resources-attached-to-a-gateway-deploy-the-gateway) - - -## Goals - -- Measure how long it takes NGF to reconfigure NGINX and update statuses when a number of Gateway API and - referenced core Kubernetes resources are created at once. -- Two runs of each test should be ran with differing numbers of resources. Each run will deploy: - - a single Gateway, Secret, and ReferenceGrant resources - - `x+1` number of namespaces - - `2x` number of backend apps and services - - `3x` number of HTTPRoutes. -- Where x=30 OR x=150. - -## Test Environment - -The following cluster will be sufficient: - -- A Kubernetes cluster with 4 nodes on GKE - - Node: e2-medium (2 vCPU, 4GB memory) - -## Setup - -1. Create cloud cluster -2. Install Gateway API Resources: - - ```bash - kubectl kustomize config/crd/gateway-api/standard | kubectl apply -f - - ``` - -3. Deploy NGF from edge using Helm install and wait for LoadBalancer Service to be ready - (NOTE: For Test 1, deploy AFTER resources): - - ```console - helm install my-release oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --version 0.0.0-edge \ - --create-namespace --wait -n nginx-gateway --set nginxGateway.productTelemetry.enable=false - ``` - -4. Run tests: - 1. There are 3 versions of the reconfiguration tests that need to be ran, with a low and high number of resources. - Therefore, a full test suite includes 6 test runs. - 2. There are scripts to generate the required resources and config changes. - 3. Run each test using the provided script (`scripts/create-resources-gw-last.sh` or - `scripts/create-resources-routes-last.sh` depending on the test). - 4. The scripts accept a number parameter to indicate how many resources should be created. Currently, we are running - with 30 or 150. The scripts will create a single Gateway, Secret and ReferenceGrant resources, `x+1` number of - namespaces, `2x` number of backend apps and services, and `3x` number of HTTPRoutes. - - Note: Clean up after each test run for isolated results. There's a script provided for removing all the test - fixtures `scripts/delete-multiple.sh` which takes a number (needs to be the same number as what was used in the - create script.) -5. After each individual test: - - - Describe the Gateway resource and make sure the status is correct. - - Check the logs of both NGF containers for errors. - - Parse the logs for TimeToReady numbers (see steps 6-7 below). - - Grab metrics. - Note: You can expose metrics by running the below snippet and then navigating to `127.0.0.1:9113/metrics`: - - ```console - GW_POD=$(kubectl get pods -n nginx-gateway | sed -n '2s/^\([^[:space:]]*\).*$/\1/p') - kubectl port-forward $GW_POD -n nginx-gateway 9113:9113 & - ``` - -6. Measure NGINX Reloads and Time to Ready Results - 1. TimeToReadyTotal as described in each test - NGF logs. - 2. TimeToReadyAvgSingle which is the average time between updating any resource and the - NGINX configuration being reloaded - NGF logs. - 3. NGINX Reload count - metrics. - 4. Average NGINX reload duration - metrics. - 1. The average reload duration can be computed by taking the `nginx_gateway_fabric_nginx_reloads_milliseconds_sum` - metric value and dividing it by the `nginx_gateway_fabric_nginx_reloads_milliseconds_count` metric value. -7. Measure Event Batch Processing Results - 1. Event Batch Total - `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric. - 2. Average Event Batch Processing duration - metrics. - 1. The average event batch processing duration can be computed by taking the `nginx_gateway_fabric_event_batch_processing_milliseconds_sum` - metric value and dividing it by the `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric value. -8. For accuracy, repeat the test suite once or twice, take the averages, and look for any anomalies or outliers. - -## Tests - -### Test 1: Resources exist before start-up - -1. Deploy Gateway resources before start-up: - 1. Use either of the provided scripts with the required number of resources, - e.g. `cd scripts && bash create-resources-gw-last.sh 30`. The script will deploy backend apps and services, wait - 60 seconds for them to be ready, and deploy 1 Gateway, 1 RefGrant, 1 Secret, and HTTPRoutes. - 2. Deploy NGF - 3. Measure TimeToReadyTotal as the time it takes from start-up -> final config written and - NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. - -### Test 2: Start NGF, deploy Gateway, create many resources attached to GW - -1. Deploy all Gateway resources, NGF running: - 1. Deploy NGF - 2. Run the provided script with the required number of resources, - e.g. `cd scripts && bash create-resources-routes-last.sh 30`. The script will deploy backend apps and services, - wait 60 seconds for them to be ready, and deploy 1 Gateway, 1 Secret, 1 RefGrant, and HTTPRoutes at the same time. - 3. Measure TimeToReadyTotal as the time it takes from NGF receiving the first HTTPRoute resource update (logs will say "reconciling") -> final - config written and NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. - -### Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - -1. Deploy HTTPRoute resources, NGF running, Gateway last: - 1. Deploy NGF - 2. Run the provided script with the required number of resources, - e.g. `cd scripts && bash create-resources-gw-last.sh 30`. - The script will deploy the namespaces, backend apps and services, 1 Secret, 1 ReferenceGrant, and the HTTPRoutes; - wait 60 seconds for the backend apps to be ready, and then deploy 1 Gateway for all HTTPRoutes. - 3. Measure TimeToReadyTotal as the time it takes from NGF receiving gateway resource -> config written and NGINX reloaded. - Measure the other results as described in steps 6-7 of the [Setup](#setup) section. From 85b24ef5094e6ee870c2bff25cedb819818c75ca Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 12:22:34 -0700 Subject: [PATCH 31/42] Revert "Delete old reconfiguration files" This reverts commit 0ee56f5045a1747de64f1c60436975d8f98181a6. --- tests/reconfig/results/1.0.0/1.0.0.md | 79 ++++++++++++ tests/reconfig/results/1.1.0/1.1.0.md | 92 ++++++++++++++ tests/reconfig/results/1.2.0/1.2.0.md | 106 ++++++++++++++++ tests/reconfig/results/1.3.0/1.3.0.md | 110 ++++++++++++++++ tests/reconfig/scripts/cafe-routes.yaml | 57 +++++++++ tests/reconfig/scripts/cafe.yaml | 65 ++++++++++ .../certificate-ns-and-cafe-secret.yaml | 14 ++ .../scripts/create-resources-gw-last.sh | 31 +++++ .../scripts/create-resources-routes-last.sh | 29 +++++ tests/reconfig/scripts/delete-multiple.sh | 16 +++ tests/reconfig/scripts/gateway.yaml | 25 ++++ tests/reconfig/scripts/reference-grant.yaml | 14 ++ tests/reconfig/setup.md | 120 ++++++++++++++++++ 13 files changed, 758 insertions(+) create mode 100644 tests/reconfig/results/1.0.0/1.0.0.md create mode 100644 tests/reconfig/results/1.1.0/1.1.0.md create mode 100644 tests/reconfig/results/1.2.0/1.2.0.md create mode 100644 tests/reconfig/results/1.3.0/1.3.0.md create mode 100644 tests/reconfig/scripts/cafe-routes.yaml create mode 100644 tests/reconfig/scripts/cafe.yaml create mode 100644 tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml create mode 100755 tests/reconfig/scripts/create-resources-gw-last.sh create mode 100755 tests/reconfig/scripts/create-resources-routes-last.sh create mode 100755 tests/reconfig/scripts/delete-multiple.sh create mode 100644 tests/reconfig/scripts/gateway.yaml create mode 100644 tests/reconfig/scripts/reference-grant.yaml create mode 100644 tests/reconfig/setup.md diff --git a/tests/reconfig/results/1.0.0/1.0.0.md b/tests/reconfig/results/1.0.0/1.0.0.md new file mode 100644 index 0000000000..101bde04be --- /dev/null +++ b/tests/reconfig/results/1.0.0/1.0.0.md @@ -0,0 +1,79 @@ +# Reconfiguration testing Results + + +- [Reconfiguration testing Results](#reconfiguration-testing-results) + - [Test environment](#test-environment) + - [Results Tables](#results-tables) + - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) + - [Event Batch Processing](#event-batch-processing) + - [NumResources -> Total Resources](#numresources---total-resources) + - [Observations](#observations) + + +## Test environment + +GKE cluster: + +- Node count: 3 +- Instance Type: e2-medium +- k8s version: 1.27.3-gke.100 +- Zone: us-central1-c +- Total vCPUs: 6 +- Total RAM: 12GB +- Max pods per node: 110 + +NGF deployment: + +- NGF version: edge - git commit 29b45e38bacd7c4f22834938105e3cda4f29f6d1 +- NGINX Version: 1.25.2 + +## Results Tables + +### NGINX Reloads and Time to Ready + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 1 | 1 | 2 | 191 | 100% | 100% | +| 1 | 150 | 2 | 2 | 2 | 440 | 50% | 100% | +| 2 | 30 | 50 | <1 | 93 | 162 | 100% | 100% | +| 2 | 150 | 208 | <1 | 396 | 281 | 96.46% | 100% | +| 3 | 30 | 1 | 1 | 93 | 129 | 100% | 100% | +| 3 | 150 | 1 | 1 | 453 | 130 | 100% | 100% | + + +### Event Batch Processing + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------| +| 1 | 30 | 69 | 6.232 | 100% | 100% | +| 1 | 150 | 309 | 3.638 | 99.68% | 100% | +| 2 | 30 | 465 | 38.759 | 100% | 100% | +| 2 | 150 | 1941 | 68.539 | 98.51% | 100% | +| 3 | 30 | 374 | 36.834 | 99.73% | 99.73% | +| 3 | 150 | 1812 | 40.411 | 99.94% | 99.94% | + + +## NumResources -> Total Resources + +| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Total Resources | +|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|-----------------| +| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | | +| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 244 | +| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 1204 | + +## Observations + +1. We are reloading after reconciling a ReferenceGrant even when there is no Gateway. This is because we treat every + upsert/delete of a ReferenceGrant as a change. This means we will regenerate NGINX config every time a ReferenceGrant + is created, updated (generation must change), or deleted, even if it does not apply to the accepted Gateway. + + Issue filed: https://github.com/nginxinc/nginx-gateway-fabric/issues/1124 + +2. We are reloading after reconciling a HTTPRoute even when there is no accepted Gateway and no config being generated. + + Issue filed: https://github.com/nginxinc/nginx-gateway-fabric/issues/1123 + +3. Majority of NGINX reloads were in the <= 500ms bucket, with all of them being in the <= 1000ms bucket. An increase + in the reload time based on number of configured resources resulting in NGINX configuration changes was observed. + +4. No errors (NGF or NGINX) were observed in any test run. diff --git a/tests/reconfig/results/1.1.0/1.1.0.md b/tests/reconfig/results/1.1.0/1.1.0.md new file mode 100644 index 0000000000..3dcc8ed2e9 --- /dev/null +++ b/tests/reconfig/results/1.1.0/1.1.0.md @@ -0,0 +1,92 @@ +# Reconfiguration testing Results + + +- [Reconfiguration testing Results](#reconfiguration-testing-results) + - [Summary](#summary) + - [Test environment](#test-environment) + - [Results Tables](#results-tables) + - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) + - [Event Batch Processing](#event-batch-processing) + - [NumResources to Total Resources](#numresources-to-total-resources) + - [Observations](#observations) + - [Future Improvements](#future-improvements) + + +## Summary + +- Better reload times across all tests +- Similar TimeToReadyTotal and TimeToReadyAveSingle times +- Similar event batch totals +- Slightly better event batch processing average times +- No new errors or issues + +## Test environment + +GKE cluster: + +- Node count: 4 +- Instance Type: n2d-standard-2 +- k8s version: 1.27.3-gke.100 +- Zone: us-west2-a +- Total vCPUs: 8 +- Total RAM: 32GB +- Max pods per node: 110 + +NGF deployment: + +- NGF version: edge - git commit 3cab370a46bccd55c115c16e23a475df2497a3d2 +- NGINX Version: 1.25.3 + +## Results Tables + +### NGINX Reloads and Time to Ready + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 1.5 | <1 | 2 | 158.5 | 100% | 100% | +| 1 | 150 | 3.5 | 1 | 2 | 272.5 | 100% | 100% | +| 2 | 30 | 34 | <1 | 93 | 136 | 100% | 100% | +| 2 | 150 | 176.5 | <1 | 451 | 203.98 | 100% | 100% | +| 3 | 30 | <1 | 1 | 93 | 125.7 | 100% | 100% | +| 3 | 150 | 1 | 1 | 453 | 126.71 | 100% | 100% | + + +### Event Batch Processing + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| +| 1 | 30 | 70 | 5.12 | 100% | 100% | 100% | 100% | 100% | +| 1 | 150 | 309 | 2.14 | 100% | 100% | 100% | 100% | 100% | +| 2 | 30 | 442 | 35.4 | 100% | 100% | 100% | 100% | 100% | +| 2 | 150 | 2009 | 54.76 | 100% | 100% | 100% | 100% | 100% | +| 3 | 30 | 373 | 35.72 | 99.73% | 99.73% | 100% | 100% | 100% | +| 3 | 150 | 1813 | 39.46 | 99.94% | 99.94% | 99.94% | 99.94% | 100% | + +> Note: The outlier for test #3 is the event batch that contains the Gateway. It took ~13s to process. + +## NumResources to Total Resources + +| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Attached HTTPRoutes | Total Resources | +|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|---------------------|-----------------| +| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | 2x | | +| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 60 | 244 | +| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 300 | 1204 | + +> Note: Only 2x HTTPRoutes attach to the Gateway because the parentRef name in the `cafe-tls-redirect` HTTPRoute is incorrect. This will be fixed in the next release. + +## Observations + +1. The following issues still exist: + + - https://github.com/nginxinc/nginx-gateway-fabric/issues/1124 + - https://github.com/nginxinc/nginx-gateway-fabric/issues/1123 + +2. All NGINX reloads were in the <= 500ms bucket. An increase in the reload time based on number of configured resources resulting in NGINX configuration changes was observed. + +3. No errors (NGF or NGINX) were observed in any test run. + +4. The majority of the event batches were processed in 500ms or less except the 3rd test. In the 3rd test, we create the Gateway resource after all the apps and routes. The batch that contains the Gateway is the only one that takes longer than 500ms. It takes ~13s. + +## Future Improvements + +1. Fix the parentRef name in the `cafe-tls-redirect` [HTTPRoute](/tests/reconfig/scripts/cafe-routes.yaml), so it matches the deployed Gateway. diff --git a/tests/reconfig/results/1.2.0/1.2.0.md b/tests/reconfig/results/1.2.0/1.2.0.md new file mode 100644 index 0000000000..543c40e6ab --- /dev/null +++ b/tests/reconfig/results/1.2.0/1.2.0.md @@ -0,0 +1,106 @@ +# Reconfiguration testing Results + + +- [Reconfiguration testing Results](#reconfiguration-testing-results) + - [Summary](#summary) + - [Test environment](#test-environment) + - [Results Tables](#results-tables) + - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) + - [Event Batch Processing](#event-batch-processing) + - [NumResources to Total Resources](#numresources-to-total-resources) + - [Observations](#observations) + - [Future Improvements](#future-improvements) + + +## Summary + +- Time to ready stayed consistent, if not slightly faster. +- Reload time has slightly increased in some instances. +- Number of batch events has reduced, subsequently increasing the average time of each batch. + +## Test environment + +GKE cluster: + +- Node count: 3 +- Instance Type: e2-medium +- k8s version: 1.27.8-gke.1067004 +- Zone: us-west2-a +- Total vCPUs: 6 +- Total RAM: 12GB +- Max pods per node: 110 + +NGF deployment: + +- NGF version: edge - git commit 96a44240d317875406a8aef8fd1e424f2fb906eb +- NGINX OSS Version: 1.25.4 +- NGINX Plus Version: R31 + +## Results Tables + +> Note: After fixing the `cafe-tls-redirect` to point to the proper Gateway, tests that created 450 HTTPRoutes failed due to https://github.com/nginxinc/nginx-gateway-fabric/issues/1107. Therefore, those tests were re-run after reverting the `cafe-tls-redirect` issue to maintain consistency with the previous release tests. Going forward, results should look different once the above bug is fixed. Added N+ tests, but without testing 150 since it has the bug mentioned above. + +### NGINX Reloads and Time to Ready + +#### OSS + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 2 | <1 | 2 | 189.5 | 100% | 100% | +| 1 | 150 | 2 | <1 | 2 | 389 | 100% | 100% | +| 2 | 30 | 30 | <1 | 94 | 161 | 100% | 100% | +| 2 | 150 | 154 | <1 | 387 | 267.48 | 100% | 100% | +| 3 | 30 | <1 | <1 | 94 | 127.91 | 100% | 100% | +| 3 | 150 | <1 | <1 | 454 | 128 | 100% | 100% | + +#### Plus + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 1 | <1 | 2 | 151.5 | 100% | 100% | +| 2 | 30 | 30 | <1 | 94 | 157 | 100% | 100% | +| 3 | 30 | <1 | <1 | 94 | 128 | 100% | 100% | + +### Event Batch Processing + +#### OSS + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| +| 1 | 30 | 5 | 733.6 | 80% | 80% | 100% | 100% | 100% | +| 1 | 150 | 5 | 2967 | 40% | 40% | 40% | 40% | 40% | +| 2 | 30 | 371 | 57.32 | 100% | 100% | 100% | 100% | 100% | +| 2 | 150 | 1743 | 75.87 | 98.45% | 100% | 100% | 100% | 100% | +| 3 | 30 | 370 | 37.48 | 99.73% | 99.73% | 100% | 100% | 100% | +| 3 | 150 | 1808 | 40.18 | 99.94% | 99.94% | 99.94% | 99.94% | 100% | + +#### Plus + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| +| 1 | 30 | 3 | 1170 | 66% | 66% | 100% | 100% | 100% | +| 2 | 30 | 370 | 58.79 | 100% | 100% | 100% | 100% | 100% | +| 3 | 30 | 370 | 41.32 | 99.73% | 99.73% | 100% | 100% | 100% | + +## NumResources to Total Resources + +| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Attached HTTPRoutes | Total Resources | +|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|---------------------|-----------------| +| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | 2x | | +| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 60 | 244 | +| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 300 | 1204 | + +> Note: Only 2x HTTPRoutes attach to the Gateway because the parentRef name in the `cafe-tls-redirect` HTTPRoute is incorrect. This has been fixed, but until https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 is fixed we can't actually run the test successfully. + +## Observations + +1. Reload time seems to have a increased slightly in a few instances, though time to ready is consistent if not faster. + +2. Processing fewer batches overall due to improvements in resource event tracking. Overall processing time didn't change much, so the average increased due to fewer batches. + +3. No errors in the logs. + + +## Future Improvements + +Fix https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 to allow for 150 resource tests to properly run. diff --git a/tests/reconfig/results/1.3.0/1.3.0.md b/tests/reconfig/results/1.3.0/1.3.0.md new file mode 100644 index 0000000000..4ccb9f38f2 --- /dev/null +++ b/tests/reconfig/results/1.3.0/1.3.0.md @@ -0,0 +1,110 @@ +# Reconfiguration testing Results + + +- [Reconfiguration testing Results](#reconfiguration-testing-results) + - [Summary](#summary) + - [Test environment](#test-environment) + - [Results Tables](#results-tables) + - [NGINX Reloads and Time to Ready](#nginx-reloads-and-time-to-ready) + - [Event Batch Processing](#event-batch-processing) + - [NumResources to Total Resources](#numresources-to-total-resources) + - [Observations](#observations) + - [Future Improvements](#future-improvements) + + +## Summary + +- Due to fix https://github.com/nginxinc/nginx-gateway-fabric/issues/1107, time to ready, reload time, and event batch processing + time increased for all 150 resource tests. +- For all 30 resource tests, results were mostly consistent to prior results. + +## Test environment + +GKE cluster: + +- Node count: 3 +- Instance Type: e2-medium +- k8s version: 1.28.9-gke.1000000 +- Zone: us-central1-c +- Total vCPUs: 6 +- Total RAM: 12GB +- Max pods per node: 110 + +NGF deployment: + +- NGF version: edge - git commit 7c9bf23ed89861c9ce7b725f2c1686f4c24ef2f9 +- NGINX OSS Version: 1.27.0 +- NGINX Plus Version: R32 + +## Results Tables + +### NGINX Reloads and Time to Ready + +#### OSS + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 2 | <1 | 2 | 190 | 100% | 100% | +| 1 | 150 | 2 | <1 | 2 | 542 | 50% | 100% | +| 2 | 30 | 37 | <1 | 94 | 169 | 100% | 100% | +| 2 | 150 | 204 | <1 | 387 | 326 | 88% | 100% | +| 3 | 30 | <1 | <1 | 94 | 129 | 100% | 100% | +| 3 | 150 | <1 | <1 | 454 | 130 | 100% | 100% | + +#### Plus + +| Test number | NumResources | TimeToReadyTotal (s) | TimeToReadyAvgSingle (s) | NGINX reloads | NGINX reload avg time (ms) | <= 500ms | <= 1000ms | +|-------------|--------------|----------------------|--------------------------|---------------|----------------------------|----------|-----------| +| 1 | 30 | 1 | <1 | 2 | 220.5 | 100% | 100% | +| 1 | 150 | 1.5 | <1 | 2 | 528.5 | 50% | 100% | +| 2 | 30 | 41 | <1 | 94 | 176.8 | 100% | 100% | +| 2 | 150 | 199 | <1 | 391 | 320.56 | 94.1% | 100% | +| 3 | 30 | <1 | <1 | 94 | 128.5 | 100% | 100% | +| 3 | 150 | <1 | <1 | 454 | 129.2 | 100% | 100% | + +### Event Batch Processing + +#### OSS + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|------------| +| 1 | 30 | 5 | 726.6 | 80% | 80% | 100% | 100% | 100% | +| 1 | 150 | 5 | 4457 | 40% | 80% | 80% | 80% | 100% | +| 2 | 30 | 371 | 59.5 | 99.7% | 100% | 100% | 100% | 100% | +| 2 | 150 | 1742 | 93.5 | 92.9% | 99.99% | 100% | 100% | 100% | +| 3 | 30 | 370 | 43.9 | 99.85% | 99.85% | 100% | 100% | 100% | +| 3 | 150 | 1810 | 44.8 | 99.99% | 99.99% | 99.99% | 100% | 100% | + +#### Plus + +| Test number | NumResources | Event Batch Total | Event Batch Processing avg time (ms) | <= 500ms | <= 1000ms | <= 5000ms | <= 10000ms | <= 30000ms | +|-------------|--------------|-------------------|--------------------------------------|----------|-----------|-----------|------------|--------------| +| 1 | 30 | 6 | 84 | 100% | 100% | 100% | 100% | 100% | +| 1 | 150 | 5 | 4544.3 | 40% | 80% | 80% | 80% | 100% | +| 2 | 30 | 370 | 59.1 | 100% | 100% | 100% | 100% | 100% | +| 2 | 150 | 1747 | 93.2 | 94.1% | 99.99% | 100% | 100% | 100% | +| 3 | 30 | 370 | 41.33 | 99.99% | 99.99% | 100% | 100% | 100% | +| 3 | 150 | 1809 | 44.88 | 99.99% | 99.99% | 99.99% | 99.99% | 100% | + +## NumResources to Total Resources + +| NumResources | Gateways | Secrets | ReferenceGrants | Namespaces | application Pods | application Services | HTTPRoutes | Total Resources | +|--------------|----------|---------|-----------------|------------|------------------|----------------------|------------|-----------------| +| x | 1 | 1 | 1 | x+1 | 2x | 2x | 3x | | +| 30 | 1 | 1 | 1 | 31 | 60 | 60 | 90 | 244 | +| 150 | 1 | 1 | 1 | 151 | 300 | 300 | 450 | 1204 | + +## Observations + +1. Reload time and time to ready have increased in 150 resource tests. This is probably due, in part, to the fix of https://github.com/nginxinc/nginx-gateway-fabric/issues/1107 causing the prior + test to only attach 2x of the HTTPRoutes while this test attaches all of them. In the 30 resource tests, results were mostly consistent to prior results. + +2. Event batch processing time increased notably in the 150 resource tests, probably for the same reason mentioned in observation #1. + In the 30 resource tests, results were mostly consistent to prior results. + +3. No errors in the logs. + + +## Future Improvements + +None. diff --git a/tests/reconfig/scripts/cafe-routes.yaml b/tests/reconfig/scripts/cafe-routes.yaml new file mode 100644 index 0000000000..006a8eba92 --- /dev/null +++ b/tests/reconfig/scripts/cafe-routes.yaml @@ -0,0 +1,57 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: cafe-tls-redirect +spec: + parentRefs: + - name: gateway + namespace: default + sectionName: http + hostnames: + - "cafe.example.com" + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + port: 443 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: coffee +spec: + parentRefs: + - name: gateway + namespace: default + sectionName: https + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /coffee + backendRefs: + - name: coffee + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: tea +spec: + parentRefs: + - name: gateway + sectionName: https + namespace: default + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /tea + backendRefs: + - name: tea + port: 80 diff --git a/tests/reconfig/scripts/cafe.yaml b/tests/reconfig/scripts/cafe.yaml new file mode 100644 index 0000000000..2d03ae59ff --- /dev/null +++ b/tests/reconfig/scripts/cafe.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coffee +spec: + replicas: 1 + selector: + matchLabels: + app: coffee + template: + metadata: + labels: + app: coffee + spec: + containers: + - name: coffee + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: coffee +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: coffee +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tea +spec: + replicas: 1 + selector: + matchLabels: + app: tea + template: + metadata: + labels: + app: tea + spec: + containers: + - name: tea + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: tea +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: tea diff --git a/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml b/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml new file mode 100644 index 0000000000..d4037e2d67 --- /dev/null +++ b/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: certificate +--- +apiVersion: v1 +kind: Secret +metadata: + name: cafe-secret + namespace: certificate +type: kubernetes.io/tls +data: + tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/reconfig/scripts/create-resources-gw-last.sh b/tests/reconfig/scripts/create-resources-gw-last.sh new file mode 100755 index 0000000000..a1be0fc9e6 --- /dev/null +++ b/tests/reconfig/scripts/create-resources-gw-last.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +num_namespaces=$1 + +# Create namespaces +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + kubectl create namespace "$namespace_name" +done + +# Create single instance resources +kubectl create -f certificate-ns-and-cafe-secret.yaml +kubectl create -f reference-grant.yaml + +# Create backend service and apps +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - +done + +# Create routes +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - +done + +# Wait for apps to be ready +sleep 60 + +# Create Gateway +kubectl create -f gateway.yaml diff --git a/tests/reconfig/scripts/create-resources-routes-last.sh b/tests/reconfig/scripts/create-resources-routes-last.sh new file mode 100755 index 0000000000..be41d9a706 --- /dev/null +++ b/tests/reconfig/scripts/create-resources-routes-last.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +num_namespaces=$1 + +# Create namespaces +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + kubectl create namespace "$namespace_name" +done + +# Create backend service and apps +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - +done + +# Wait for apps to be ready +sleep 60 + +# Create single instance resources +kubectl create -f certificate-ns-and-cafe-secret.yaml +kubectl create -f reference-grant.yaml +kubectl create -f gateway.yaml + +# Create routes +for ((i = 1; i <= num_namespaces; i++)); do + namespace_name="namespace$i" + sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - +done diff --git a/tests/reconfig/scripts/delete-multiple.sh b/tests/reconfig/scripts/delete-multiple.sh new file mode 100755 index 0000000000..2f9752e8c9 --- /dev/null +++ b/tests/reconfig/scripts/delete-multiple.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +num_namespaces=$1 + +# Delete namespaces +namespaces="" +for ((i = 1; i <= num_namespaces; i++)); do + namespaces+="namespace${i} " +done + +kubectl delete namespace "${namespaces}" + +# Delete single instance resources +kubectl delete -f gateway.yaml +kubectl delete -f reference-grant.yaml +kubectl delete -f certificate-ns-and-cafe-secret.yaml diff --git a/tests/reconfig/scripts/gateway.yaml b/tests/reconfig/scripts/gateway.yaml new file mode 100644 index 0000000000..fd9d52675b --- /dev/null +++ b/tests/reconfig/scripts/gateway.yaml @@ -0,0 +1,25 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: gateway +spec: + gatewayClassName: nginx + listeners: + - name: http + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: "All" + - name: https + port: 443 + protocol: HTTPS + allowedRoutes: + namespaces: + from: "All" + tls: + mode: Terminate + certificateRefs: + - kind: Secret + name: cafe-secret + namespace: certificate diff --git a/tests/reconfig/scripts/reference-grant.yaml b/tests/reconfig/scripts/reference-grant.yaml new file mode 100644 index 0000000000..053bbbdcc2 --- /dev/null +++ b/tests/reconfig/scripts/reference-grant.yaml @@ -0,0 +1,14 @@ +apiVersion: gateway.networking.k8s.io/v1beta1 +kind: ReferenceGrant +metadata: + name: access-to-cafe-secret + namespace: certificate +spec: + to: + - group: "" + kind: Secret + name: cafe-secret # if you omit this name, then Gateways in default ns can access all Secrets in the certificate ns + from: + - group: gateway.networking.k8s.io + kind: Gateway + namespace: default diff --git a/tests/reconfig/setup.md b/tests/reconfig/setup.md new file mode 100644 index 0000000000..8729115544 --- /dev/null +++ b/tests/reconfig/setup.md @@ -0,0 +1,120 @@ +# Reconfig tests + + +- [Reconfig tests](#reconfig-tests) + - [Goals](#goals) + - [Test Environment](#test-environment) + - [Setup](#setup) + - [Tests](#tests) + - [Test 1: Resources exist before start-up](#test-1-resources-exist-before-start-up) + - [Test 2: Start NGF, deploy Gateway, create many resources attached to GW](#test-2-start-ngf-deploy-gateway-create-many-resources-attached-to-gw) + - [Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway](#test-3-start-ngf-create-many-resources-attached-to-a-gateway-deploy-the-gateway) + + +## Goals + +- Measure how long it takes NGF to reconfigure NGINX and update statuses when a number of Gateway API and + referenced core Kubernetes resources are created at once. +- Two runs of each test should be ran with differing numbers of resources. Each run will deploy: + - a single Gateway, Secret, and ReferenceGrant resources + - `x+1` number of namespaces + - `2x` number of backend apps and services + - `3x` number of HTTPRoutes. +- Where x=30 OR x=150. + +## Test Environment + +The following cluster will be sufficient: + +- A Kubernetes cluster with 4 nodes on GKE + - Node: e2-medium (2 vCPU, 4GB memory) + +## Setup + +1. Create cloud cluster +2. Install Gateway API Resources: + + ```bash + kubectl kustomize config/crd/gateway-api/standard | kubectl apply -f - + ``` + +3. Deploy NGF from edge using Helm install and wait for LoadBalancer Service to be ready + (NOTE: For Test 1, deploy AFTER resources): + + ```console + helm install my-release oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --version 0.0.0-edge \ + --create-namespace --wait -n nginx-gateway --set nginxGateway.productTelemetry.enable=false + ``` + +4. Run tests: + 1. There are 3 versions of the reconfiguration tests that need to be ran, with a low and high number of resources. + Therefore, a full test suite includes 6 test runs. + 2. There are scripts to generate the required resources and config changes. + 3. Run each test using the provided script (`scripts/create-resources-gw-last.sh` or + `scripts/create-resources-routes-last.sh` depending on the test). + 4. The scripts accept a number parameter to indicate how many resources should be created. Currently, we are running + with 30 or 150. The scripts will create a single Gateway, Secret and ReferenceGrant resources, `x+1` number of + namespaces, `2x` number of backend apps and services, and `3x` number of HTTPRoutes. + - Note: Clean up after each test run for isolated results. There's a script provided for removing all the test + fixtures `scripts/delete-multiple.sh` which takes a number (needs to be the same number as what was used in the + create script.) +5. After each individual test: + + - Describe the Gateway resource and make sure the status is correct. + - Check the logs of both NGF containers for errors. + - Parse the logs for TimeToReady numbers (see steps 6-7 below). + - Grab metrics. + Note: You can expose metrics by running the below snippet and then navigating to `127.0.0.1:9113/metrics`: + + ```console + GW_POD=$(kubectl get pods -n nginx-gateway | sed -n '2s/^\([^[:space:]]*\).*$/\1/p') + kubectl port-forward $GW_POD -n nginx-gateway 9113:9113 & + ``` + +6. Measure NGINX Reloads and Time to Ready Results + 1. TimeToReadyTotal as described in each test - NGF logs. + 2. TimeToReadyAvgSingle which is the average time between updating any resource and the + NGINX configuration being reloaded - NGF logs. + 3. NGINX Reload count - metrics. + 4. Average NGINX reload duration - metrics. + 1. The average reload duration can be computed by taking the `nginx_gateway_fabric_nginx_reloads_milliseconds_sum` + metric value and dividing it by the `nginx_gateway_fabric_nginx_reloads_milliseconds_count` metric value. +7. Measure Event Batch Processing Results + 1. Event Batch Total - `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric. + 2. Average Event Batch Processing duration - metrics. + 1. The average event batch processing duration can be computed by taking the `nginx_gateway_fabric_event_batch_processing_milliseconds_sum` + metric value and dividing it by the `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric value. +8. For accuracy, repeat the test suite once or twice, take the averages, and look for any anomalies or outliers. + +## Tests + +### Test 1: Resources exist before start-up + +1. Deploy Gateway resources before start-up: + 1. Use either of the provided scripts with the required number of resources, + e.g. `cd scripts && bash create-resources-gw-last.sh 30`. The script will deploy backend apps and services, wait + 60 seconds for them to be ready, and deploy 1 Gateway, 1 RefGrant, 1 Secret, and HTTPRoutes. + 2. Deploy NGF + 3. Measure TimeToReadyTotal as the time it takes from start-up -> final config written and + NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. + +### Test 2: Start NGF, deploy Gateway, create many resources attached to GW + +1. Deploy all Gateway resources, NGF running: + 1. Deploy NGF + 2. Run the provided script with the required number of resources, + e.g. `cd scripts && bash create-resources-routes-last.sh 30`. The script will deploy backend apps and services, + wait 60 seconds for them to be ready, and deploy 1 Gateway, 1 Secret, 1 RefGrant, and HTTPRoutes at the same time. + 3. Measure TimeToReadyTotal as the time it takes from NGF receiving the first HTTPRoute resource update (logs will say "reconciling") -> final + config written and NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. + +### Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway + +1. Deploy HTTPRoute resources, NGF running, Gateway last: + 1. Deploy NGF + 2. Run the provided script with the required number of resources, + e.g. `cd scripts && bash create-resources-gw-last.sh 30`. + The script will deploy the namespaces, backend apps and services, 1 Secret, 1 ReferenceGrant, and the HTTPRoutes; + wait 60 seconds for the backend apps to be ready, and then deploy 1 Gateway for all HTTPRoutes. + 3. Measure TimeToReadyTotal as the time it takes from NGF receiving gateway resource -> config written and NGINX reloaded. + Measure the other results as described in steps 6-7 of the [Setup](#setup) section. From 03ed0bc63cf6dcba224f3b9a71b6c16554fe2401 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 12:23:58 -0700 Subject: [PATCH 32/42] Revert full delete of reconfiguration files and keep results files --- tests/reconfig/scripts/cafe-routes.yaml | 57 --------- tests/reconfig/scripts/cafe.yaml | 65 ---------- .../certificate-ns-and-cafe-secret.yaml | 14 -- .../scripts/create-resources-gw-last.sh | 31 ----- .../scripts/create-resources-routes-last.sh | 29 ----- tests/reconfig/scripts/delete-multiple.sh | 16 --- tests/reconfig/scripts/gateway.yaml | 25 ---- tests/reconfig/scripts/reference-grant.yaml | 14 -- tests/reconfig/setup.md | 120 ------------------ .../reconfig}/1.0.0/1.0.0.md | 0 .../reconfig}/1.1.0/1.1.0.md | 0 .../reconfig}/1.2.0/1.2.0.md | 0 .../reconfig}/1.3.0/1.3.0.md | 0 13 files changed, 371 deletions(-) delete mode 100644 tests/reconfig/scripts/cafe-routes.yaml delete mode 100644 tests/reconfig/scripts/cafe.yaml delete mode 100644 tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml delete mode 100755 tests/reconfig/scripts/create-resources-gw-last.sh delete mode 100755 tests/reconfig/scripts/create-resources-routes-last.sh delete mode 100755 tests/reconfig/scripts/delete-multiple.sh delete mode 100644 tests/reconfig/scripts/gateway.yaml delete mode 100644 tests/reconfig/scripts/reference-grant.yaml delete mode 100644 tests/reconfig/setup.md rename tests/{reconfig/results => results/reconfig}/1.0.0/1.0.0.md (100%) rename tests/{reconfig/results => results/reconfig}/1.1.0/1.1.0.md (100%) rename tests/{reconfig/results => results/reconfig}/1.2.0/1.2.0.md (100%) rename tests/{reconfig/results => results/reconfig}/1.3.0/1.3.0.md (100%) diff --git a/tests/reconfig/scripts/cafe-routes.yaml b/tests/reconfig/scripts/cafe-routes.yaml deleted file mode 100644 index 006a8eba92..0000000000 --- a/tests/reconfig/scripts/cafe-routes.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: cafe-tls-redirect -spec: - parentRefs: - - name: gateway - namespace: default - sectionName: http - hostnames: - - "cafe.example.com" - rules: - - filters: - - type: RequestRedirect - requestRedirect: - scheme: https - port: 443 ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: coffee -spec: - parentRefs: - - name: gateway - namespace: default - sectionName: https - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: PathPrefix - value: /coffee - backendRefs: - - name: coffee - port: 80 ---- -apiVersion: gateway.networking.k8s.io/v1 -kind: HTTPRoute -metadata: - name: tea -spec: - parentRefs: - - name: gateway - sectionName: https - namespace: default - hostnames: - - "cafe.example.com" - rules: - - matches: - - path: - type: PathPrefix - value: /tea - backendRefs: - - name: tea - port: 80 diff --git a/tests/reconfig/scripts/cafe.yaml b/tests/reconfig/scripts/cafe.yaml deleted file mode 100644 index 2d03ae59ff..0000000000 --- a/tests/reconfig/scripts/cafe.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: coffee -spec: - replicas: 1 - selector: - matchLabels: - app: coffee - template: - metadata: - labels: - app: coffee - spec: - containers: - - name: coffee - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: coffee -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: coffee ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tea -spec: - replicas: 1 - selector: - matchLabels: - app: tea - template: - metadata: - labels: - app: tea - spec: - containers: - - name: tea - image: nginxdemos/nginx-hello:plain-text - ports: - - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: tea -spec: - ports: - - port: 80 - targetPort: 8080 - protocol: TCP - name: http - selector: - app: tea diff --git a/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml b/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml deleted file mode 100644 index d4037e2d67..0000000000 --- a/tests/reconfig/scripts/certificate-ns-and-cafe-secret.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: certificate ---- -apiVersion: v1 -kind: Secret -metadata: - name: cafe-secret - namespace: certificate -type: kubernetes.io/tls -data: - tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== - tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/reconfig/scripts/create-resources-gw-last.sh b/tests/reconfig/scripts/create-resources-gw-last.sh deleted file mode 100755 index a1be0fc9e6..0000000000 --- a/tests/reconfig/scripts/create-resources-gw-last.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Create namespaces -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - kubectl create namespace "$namespace_name" -done - -# Create single instance resources -kubectl create -f certificate-ns-and-cafe-secret.yaml -kubectl create -f reference-grant.yaml - -# Create backend service and apps -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Create routes -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Wait for apps to be ready -sleep 60 - -# Create Gateway -kubectl create -f gateway.yaml diff --git a/tests/reconfig/scripts/create-resources-routes-last.sh b/tests/reconfig/scripts/create-resources-routes-last.sh deleted file mode 100755 index be41d9a706..0000000000 --- a/tests/reconfig/scripts/create-resources-routes-last.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Create namespaces -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - kubectl create namespace "$namespace_name" -done - -# Create backend service and apps -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe.yaml | kubectl apply -n "$namespace_name" -f - -done - -# Wait for apps to be ready -sleep 60 - -# Create single instance resources -kubectl create -f certificate-ns-and-cafe-secret.yaml -kubectl create -f reference-grant.yaml -kubectl create -f gateway.yaml - -# Create routes -for ((i = 1; i <= num_namespaces; i++)); do - namespace_name="namespace$i" - sed -e "s/coffee/coffee${namespace_name}/g" -e "s/tea/tea${namespace_name}/g" cafe-routes.yaml | kubectl apply -n "$namespace_name" -f - -done diff --git a/tests/reconfig/scripts/delete-multiple.sh b/tests/reconfig/scripts/delete-multiple.sh deleted file mode 100755 index 2f9752e8c9..0000000000 --- a/tests/reconfig/scripts/delete-multiple.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -num_namespaces=$1 - -# Delete namespaces -namespaces="" -for ((i = 1; i <= num_namespaces; i++)); do - namespaces+="namespace${i} " -done - -kubectl delete namespace "${namespaces}" - -# Delete single instance resources -kubectl delete -f gateway.yaml -kubectl delete -f reference-grant.yaml -kubectl delete -f certificate-ns-and-cafe-secret.yaml diff --git a/tests/reconfig/scripts/gateway.yaml b/tests/reconfig/scripts/gateway.yaml deleted file mode 100644 index fd9d52675b..0000000000 --- a/tests/reconfig/scripts/gateway.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1 -kind: Gateway -metadata: - name: gateway -spec: - gatewayClassName: nginx - listeners: - - name: http - port: 80 - protocol: HTTP - allowedRoutes: - namespaces: - from: "All" - - name: https - port: 443 - protocol: HTTPS - allowedRoutes: - namespaces: - from: "All" - tls: - mode: Terminate - certificateRefs: - - kind: Secret - name: cafe-secret - namespace: certificate diff --git a/tests/reconfig/scripts/reference-grant.yaml b/tests/reconfig/scripts/reference-grant.yaml deleted file mode 100644 index 053bbbdcc2..0000000000 --- a/tests/reconfig/scripts/reference-grant.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: gateway.networking.k8s.io/v1beta1 -kind: ReferenceGrant -metadata: - name: access-to-cafe-secret - namespace: certificate -spec: - to: - - group: "" - kind: Secret - name: cafe-secret # if you omit this name, then Gateways in default ns can access all Secrets in the certificate ns - from: - - group: gateway.networking.k8s.io - kind: Gateway - namespace: default diff --git a/tests/reconfig/setup.md b/tests/reconfig/setup.md deleted file mode 100644 index 8729115544..0000000000 --- a/tests/reconfig/setup.md +++ /dev/null @@ -1,120 +0,0 @@ -# Reconfig tests - - -- [Reconfig tests](#reconfig-tests) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Setup](#setup) - - [Tests](#tests) - - [Test 1: Resources exist before start-up](#test-1-resources-exist-before-start-up) - - [Test 2: Start NGF, deploy Gateway, create many resources attached to GW](#test-2-start-ngf-deploy-gateway-create-many-resources-attached-to-gw) - - [Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway](#test-3-start-ngf-create-many-resources-attached-to-a-gateway-deploy-the-gateway) - - -## Goals - -- Measure how long it takes NGF to reconfigure NGINX and update statuses when a number of Gateway API and - referenced core Kubernetes resources are created at once. -- Two runs of each test should be ran with differing numbers of resources. Each run will deploy: - - a single Gateway, Secret, and ReferenceGrant resources - - `x+1` number of namespaces - - `2x` number of backend apps and services - - `3x` number of HTTPRoutes. -- Where x=30 OR x=150. - -## Test Environment - -The following cluster will be sufficient: - -- A Kubernetes cluster with 4 nodes on GKE - - Node: e2-medium (2 vCPU, 4GB memory) - -## Setup - -1. Create cloud cluster -2. Install Gateway API Resources: - - ```bash - kubectl kustomize config/crd/gateway-api/standard | kubectl apply -f - - ``` - -3. Deploy NGF from edge using Helm install and wait for LoadBalancer Service to be ready - (NOTE: For Test 1, deploy AFTER resources): - - ```console - helm install my-release oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --version 0.0.0-edge \ - --create-namespace --wait -n nginx-gateway --set nginxGateway.productTelemetry.enable=false - ``` - -4. Run tests: - 1. There are 3 versions of the reconfiguration tests that need to be ran, with a low and high number of resources. - Therefore, a full test suite includes 6 test runs. - 2. There are scripts to generate the required resources and config changes. - 3. Run each test using the provided script (`scripts/create-resources-gw-last.sh` or - `scripts/create-resources-routes-last.sh` depending on the test). - 4. The scripts accept a number parameter to indicate how many resources should be created. Currently, we are running - with 30 or 150. The scripts will create a single Gateway, Secret and ReferenceGrant resources, `x+1` number of - namespaces, `2x` number of backend apps and services, and `3x` number of HTTPRoutes. - - Note: Clean up after each test run for isolated results. There's a script provided for removing all the test - fixtures `scripts/delete-multiple.sh` which takes a number (needs to be the same number as what was used in the - create script.) -5. After each individual test: - - - Describe the Gateway resource and make sure the status is correct. - - Check the logs of both NGF containers for errors. - - Parse the logs for TimeToReady numbers (see steps 6-7 below). - - Grab metrics. - Note: You can expose metrics by running the below snippet and then navigating to `127.0.0.1:9113/metrics`: - - ```console - GW_POD=$(kubectl get pods -n nginx-gateway | sed -n '2s/^\([^[:space:]]*\).*$/\1/p') - kubectl port-forward $GW_POD -n nginx-gateway 9113:9113 & - ``` - -6. Measure NGINX Reloads and Time to Ready Results - 1. TimeToReadyTotal as described in each test - NGF logs. - 2. TimeToReadyAvgSingle which is the average time between updating any resource and the - NGINX configuration being reloaded - NGF logs. - 3. NGINX Reload count - metrics. - 4. Average NGINX reload duration - metrics. - 1. The average reload duration can be computed by taking the `nginx_gateway_fabric_nginx_reloads_milliseconds_sum` - metric value and dividing it by the `nginx_gateway_fabric_nginx_reloads_milliseconds_count` metric value. -7. Measure Event Batch Processing Results - 1. Event Batch Total - `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric. - 2. Average Event Batch Processing duration - metrics. - 1. The average event batch processing duration can be computed by taking the `nginx_gateway_fabric_event_batch_processing_milliseconds_sum` - metric value and dividing it by the `nginx_gateway_fabric_event_batch_processing_milliseconds_count` metric value. -8. For accuracy, repeat the test suite once or twice, take the averages, and look for any anomalies or outliers. - -## Tests - -### Test 1: Resources exist before start-up - -1. Deploy Gateway resources before start-up: - 1. Use either of the provided scripts with the required number of resources, - e.g. `cd scripts && bash create-resources-gw-last.sh 30`. The script will deploy backend apps and services, wait - 60 seconds for them to be ready, and deploy 1 Gateway, 1 RefGrant, 1 Secret, and HTTPRoutes. - 2. Deploy NGF - 3. Measure TimeToReadyTotal as the time it takes from start-up -> final config written and - NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. - -### Test 2: Start NGF, deploy Gateway, create many resources attached to GW - -1. Deploy all Gateway resources, NGF running: - 1. Deploy NGF - 2. Run the provided script with the required number of resources, - e.g. `cd scripts && bash create-resources-routes-last.sh 30`. The script will deploy backend apps and services, - wait 60 seconds for them to be ready, and deploy 1 Gateway, 1 Secret, 1 RefGrant, and HTTPRoutes at the same time. - 3. Measure TimeToReadyTotal as the time it takes from NGF receiving the first HTTPRoute resource update (logs will say "reconciling") -> final - config written and NGINX reloaded. Measure the other results as described in steps 6-7 of the [Setup](#setup) section. - -### Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - -1. Deploy HTTPRoute resources, NGF running, Gateway last: - 1. Deploy NGF - 2. Run the provided script with the required number of resources, - e.g. `cd scripts && bash create-resources-gw-last.sh 30`. - The script will deploy the namespaces, backend apps and services, 1 Secret, 1 ReferenceGrant, and the HTTPRoutes; - wait 60 seconds for the backend apps to be ready, and then deploy 1 Gateway for all HTTPRoutes. - 3. Measure TimeToReadyTotal as the time it takes from NGF receiving gateway resource -> config written and NGINX reloaded. - Measure the other results as described in steps 6-7 of the [Setup](#setup) section. diff --git a/tests/reconfig/results/1.0.0/1.0.0.md b/tests/results/reconfig/1.0.0/1.0.0.md similarity index 100% rename from tests/reconfig/results/1.0.0/1.0.0.md rename to tests/results/reconfig/1.0.0/1.0.0.md diff --git a/tests/reconfig/results/1.1.0/1.1.0.md b/tests/results/reconfig/1.1.0/1.1.0.md similarity index 100% rename from tests/reconfig/results/1.1.0/1.1.0.md rename to tests/results/reconfig/1.1.0/1.1.0.md diff --git a/tests/reconfig/results/1.2.0/1.2.0.md b/tests/results/reconfig/1.2.0/1.2.0.md similarity index 100% rename from tests/reconfig/results/1.2.0/1.2.0.md rename to tests/results/reconfig/1.2.0/1.2.0.md diff --git a/tests/reconfig/results/1.3.0/1.3.0.md b/tests/results/reconfig/1.3.0/1.3.0.md similarity index 100% rename from tests/reconfig/results/1.3.0/1.3.0.md rename to tests/results/reconfig/1.3.0/1.3.0.md From 3bf27e2cf6e42571641abb00edcadddd5e18c661 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 13:52:35 -0700 Subject: [PATCH 33/42] Elaborate on test description --- tests/suite/reconfig_test.go | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index ea5859eca6..4e470c9a87 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -261,7 +261,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig // timestamp to the next NGINX configuration update. When it reaches the NGINX configuration update line, // it will reset the reconciling log line and set it to the next reconciling log line. for _, line := range strings.Split(ngfLogs, "\n") { - // can't just do this line, need to do gateway specific resources if reconcilingLine == "" && strings.Contains(line, "Reconciling the resource\",\"controller\"") && strings.Contains(line, "\"controllerGroup\":\"gateway.networking.k8s.io\"") { @@ -359,7 +358,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } collectMetrics := func( - testName string, + testDescription string, resourceCount int, timeToReadyStartingLogSubstring string, ngfPodName string, @@ -431,7 +430,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(err).ToNot(HaveOccurred()) results := reconfigTestResults{ - Name: testName, + TestDescription: testDescription, EventsBuckets: eventsBuckets, ReloadBuckets: reloadBuckets, NumResources: resourceCount, @@ -448,6 +447,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } When("resources exist before startup", func() { + testDescription := "Test 1: Resources exist before startup" + It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" @@ -458,7 +459,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - collectMetrics("1", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -476,7 +478,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - collectMetrics("1", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -486,6 +489,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) When("NGF and Gateway resource are deployed first", func() { + testDescription := "Test 2: Start NGF, deploy Gateway, create many resources attached to GW" + It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" @@ -496,7 +501,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(test).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("2", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -514,7 +520,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(test).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("2", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -524,6 +531,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) When("NGF and resources are deployed first", func() { + testDescription := "Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway" + It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" @@ -534,7 +543,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(test).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("3", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -552,7 +562,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig Expect(test).To(Succeed()) Expect(checkResourceCreation(resourceCount)).To(Succeed()) - collectMetrics("3", + collectMetrics( + testDescription, resourceCount, timeToReadyStartingLogSubstring, ngfPodName, @@ -579,7 +590,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) type reconfigTestResults struct { - Name string + TestDescription string TimeToReadyTotal string TimeToReadyAvgSingle string EventsBuckets []framework.Bucket @@ -592,7 +603,7 @@ type reconfigTestResults struct { } const reconfigResultTemplate = ` -## Test {{ .Name }} NumResources {{ .NumResources }} +## {{ .TestDescription }} - NumResources {{ .NumResources }} ### Reloads and Time to Ready From 35159e9946d21b5caf7ff32fcaa2d8fc9dedec18 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 14:55:37 -0700 Subject: [PATCH 34/42] Add note on node size --- tests/suite/reconfig_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 4e470c9a87..e3f0a76c38 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -23,6 +23,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) +// Cluster node size must be greater than or equal to 4 for test to perform correctly. var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration", "nfr"), func() { // used for cleaning up resources const maxResourceCount = 150 From fe1f414431cf1587f6689c141a707731f2d4b597 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 8 Aug 2024 15:01:22 -0700 Subject: [PATCH 35/42] Change package to main --- tests/suite/reconfig_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index e3f0a76c38..16a99ed0a5 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -1,4 +1,4 @@ -package suite +package main import ( "bytes" From 9842b1eb24ec657a15412681c6388f14148cd258 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 9 Aug 2024 09:34:51 -0700 Subject: [PATCH 36/42] Refactor queries.go function layout to be more readable --- tests/framework/queries.go | 82 +++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/tests/framework/queries.go b/tests/framework/queries.go index e8f9b55cfc..9ce3c1464a 100644 --- a/tests/framework/queries.go +++ b/tests/framework/queries.go @@ -9,44 +9,9 @@ import ( "github.com/prometheus/common/model" ) -func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { - result, err := promInstance.Query(query) - if err != nil { - return 0, err - } - - val, err := GetFirstValueOfPrometheusVector(result) - if err != nil { - return 0, err - } - - return val, nil -} - -func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) { - result, err := promInstance.Query(query) - if err != nil { - return nil, err - } - - res, ok := result.(model.Vector) - if !ok { - return nil, errors.New("could not convert result to vector") - } - - buckets := make([]Bucket, 0, len(res)) - - for _, sample := range res { - le := sample.Metric["le"] - val := float64(sample.Value) - bucket := Bucket{ - Le: string(le), - Val: int(val), - } - buckets = append(buckets, bucket) - } - - return buckets, nil +type Bucket struct { + Le string + Val int } func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { @@ -304,7 +269,42 @@ func CreateResponseChecker(url, address string, requestTimeout time.Duration) fu } } -type Bucket struct { - Le string - Val int +func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { + result, err := promInstance.Query(query) + if err != nil { + return 0, err + } + + val, err := GetFirstValueOfPrometheusVector(result) + if err != nil { + return 0, err + } + + return val, nil +} + +func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) { + result, err := promInstance.Query(query) + if err != nil { + return nil, err + } + + res, ok := result.(model.Vector) + if !ok { + return nil, errors.New("could not convert result to vector") + } + + buckets := make([]Bucket, 0, len(res)) + + for _, sample := range res { + le := sample.Metric["le"] + val := float64(sample.Value) + bucket := Bucket{ + Le: string(le), + Val: int(val), + } + buckets = append(buckets, bucket) + } + + return buckets, nil } From 2d753a4036598c13e2bf27875911727aebfdacb4 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 9 Aug 2024 09:41:49 -0700 Subject: [PATCH 37/42] Remove unnecessary return and correct placement of creation of resources functions --- tests/suite/reconfig_test.go | 37 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 16a99ed0a5..fa860d2727 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -25,10 +25,13 @@ import ( // Cluster node size must be greater than or equal to 4 for test to perform correctly. var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfiguration", "nfr"), func() { - // used for cleaning up resources - const maxResourceCount = 150 - const metricExistTimeout = 2 * time.Minute - const metricExistPolling = 1 * time.Second + const ( + // used for cleaning up resources + maxResourceCount = 150 + + metricExistTimeout = 2 * time.Minute + metricExistPolling = 1 * time.Second + ) var ( scrapeInterval = 15 * time.Second @@ -101,7 +104,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig return nil } - createResourcesGWLast := func(resourceCount int) error { + createResourcesGWLast := func(resourceCount int) { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) defer cancel() @@ -136,11 +139,9 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } Expect(resourceManager.ApplyFromFiles([]string{"reconfig/gateway.yaml"}, reconfigNamespace.Name)).To(Succeed()) - - return nil } - createResourcesRoutesLast := func(resourceCount int) error { + createResourcesRoutesLast := func(resourceCount int) { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) defer cancel() @@ -174,8 +175,6 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig reconfigNamespace.Name)).To(Succeed()) Expect(createUniqueResources(resourceCount, "manifests/reconfig/cafe-routes.yaml")).To(Succeed()) - - return nil } checkResourceCreation := func(resourceCount int) error { @@ -453,9 +452,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" - test := createResourcesGWLast(resourceCount) - Expect(test).To(Succeed()) + createResourcesGWLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() @@ -472,9 +470,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 150 resources", func() { resourceCount := 150 timeToReadyStartingLogSubstring := "Starting NGINX Gateway Fabric" - test := createResourcesGWLast(resourceCount) - Expect(test).To(Succeed()) + createResourcesGWLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() @@ -495,11 +492,10 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" - test := createResourcesRoutesLast(resourceCount) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) + createResourcesRoutesLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) collectMetrics( @@ -514,11 +510,10 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 150 resources", func() { resourceCount := 150 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"httproute\"" - test := createResourcesRoutesLast(resourceCount) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) + createResourcesRoutesLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) collectMetrics( @@ -537,11 +532,10 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 30 resources", func() { resourceCount := 30 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - test := createResourcesGWLast(resourceCount) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) + createResourcesGWLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) collectMetrics( @@ -556,11 +550,10 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig It("gathers metrics after creating 150 resources", func() { resourceCount := 150 timeToReadyStartingLogSubstring := "Reconciling the resource\",\"controller\":\"gateway\"" - test := createResourcesGWLast(resourceCount) ngfPodName, startTime := deployNGFReturnsNGFPodNameAndStartTime() - Expect(test).To(Succeed()) + createResourcesGWLast(resourceCount) Expect(checkResourceCreation(resourceCount)).To(Succeed()) collectMetrics( From cdf4387713628b75cfc4b7534bb5ab0cfbae92e4 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 9 Aug 2024 11:59:01 -0700 Subject: [PATCH 38/42] Use time constant layout when parsing logs --- tests/suite/reconfig_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index fa860d2727..8dbdcfe656 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -231,12 +231,11 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } calculateTimeDifferenceBetweenLogLines := func(firstLine, secondLine string) (int, error) { + layout := time.RFC3339 + firstTS := getTimeStampFromLogLine(firstLine) secondTS := getTimeStampFromLogLine(secondLine) - // i might be able to just use the local constant timestamp layout - layout := "2006-01-02T15:04:05Z" - parsedTS1, err := time.Parse(layout, firstTS) if err != nil { return 0, err From c67265cbb299925049b924104b7ed159488202b1 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 9 Aug 2024 13:56:49 -0700 Subject: [PATCH 39/42] Move queries to different file and add descriptions to functions --- tests/framework/prometheus.go | 325 ++++++++++++++++++++++++++++++++++ tests/framework/queries.go | 310 -------------------------------- 2 files changed, 325 insertions(+), 310 deletions(-) delete mode 100644 tests/framework/queries.go diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index 358ec5d09a..d8794562df 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -293,3 +293,328 @@ func WritePrometheusMatrixToCSVFile(fileName string, value model.Value) error { return nil } + +// Bucket represents a data point of a Histogram Bucket. +type Bucket struct { + // Le is the interval Less than or Equal which represents the Bucket's bin. i.e. "500ms". + Le string + // Val is the value for how many instances fall in the Bucket. + Val int +} + +// GetReloadCount gets the total number of nginx reloads. +func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetReloadCountWithStartTime gets the total number of nginx reloads from a start time to the current time. +func GetReloadCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetReloadErrsCountWithStartTime gets the total number of nginx reload errors from a start time to the current time. +func GetReloadErrsCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetReloadAvgTime gets the average time in milliseconds for nginx to reload. +func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ + ` / `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetReloadAvgTimeWithStartTime gets the average time in milliseconds for nginx to reload using a start time +// to the current time to calculate. +func GetReloadAvgTimeWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetReloadBuckets gets the Buckets in millisecond intervals for nginx reloads. +func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetReloadBucketsWithStartTime gets the Buckets in millisecond intervals for nginx reloads from a start time +// to the current time. +func GetReloadBucketsWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetEventsCount gets the NGF event batch processing count. +func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetEventsCountWithStartTime gets the NGF event batch processing count from a start time to the current time. +func GetEventsCountWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetEventsAvgTime gets the average time in milliseconds it takes for NGF to process a single event batch. +func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ + ` / `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetEventsAvgTimeWithStartTime gets the average time in milliseconds it takes for NGF to process a single event +// batch using a start time to the current time to calculate. +func GetEventsAvgTimeWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) (float64, error) { + return getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// GetEventsBuckets gets the Buckets in millisecond intervals for NGF event batch processing. +func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`, + ngfPodName, + ), + promInstance, + ) +} + +// GetEventsBucketsWithStartTime gets the Buckets in millisecond intervals for NGF event batch processing from a start +// time to the current time. +func GetEventsBucketsWithStartTime( + promInstance PrometheusInstance, + ngfPodName string, + startTime time.Time, +) ([]Bucket, error) { + return getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + promInstance, + ) +} + +// CreateMetricExistChecker returns a function that will query Prometheus at a specific timestamp +// and adjust that timestamp if there is no result found. +func CreateMetricExistChecker( + promInstance PrometheusInstance, + query string, + getTime func() time.Time, + modifyTime func(), +) func() error { + return func() error { + queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) + + result, err := promInstance.Query(queryWithTimestamp) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + modifyTime() + return errors.New("empty result") + } + + return nil + } +} + +// CreateEndTimeFinder returns a function that will range query Prometheus given a specific startTime and endTime +// and adjust the endTime if there is no result found. +func CreateEndTimeFinder( + promInstance PrometheusInstance, + query string, + startTime time.Time, + endTime *time.Time, + queryRangeStep time.Duration, +) func() error { + return func() error { + result, err := promInstance.QueryRange(query, v1.Range{ + Start: startTime, + End: *endTime, + Step: queryRangeStep, + }) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + *endTime = time.Now() + return errors.New("empty result") + } + + return nil + } +} + +// CreateResponseChecker returns a function that checks if there is a successful response from a url. +func CreateResponseChecker(url, address string, requestTimeout time.Duration) func() error { + return func() error { + status, _, err := Get(url, address, requestTimeout) + if err != nil { + return fmt.Errorf("bad response: %w", err) + } + + if status != 200 { + return fmt.Errorf("unexpected status code: %d", status) + } + + return nil + } +} + +func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { + result, err := promInstance.Query(query) + if err != nil { + return 0, err + } + + val, err := GetFirstValueOfPrometheusVector(result) + if err != nil { + return 0, err + } + + return val, nil +} + +func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) { + result, err := promInstance.Query(query) + if err != nil { + return nil, err + } + + res, ok := result.(model.Vector) + if !ok { + return nil, errors.New("could not convert result to vector") + } + + buckets := make([]Bucket, 0, len(res)) + + for _, sample := range res { + le := sample.Metric["le"] + val := float64(sample.Value) + bucket := Bucket{ + Le: string(le), + Val: int(val), + } + buckets = append(buckets, bucket) + } + + return buckets, nil +} diff --git a/tests/framework/queries.go b/tests/framework/queries.go deleted file mode 100644 index 9ce3c1464a..0000000000 --- a/tests/framework/queries.go +++ /dev/null @@ -1,310 +0,0 @@ -package framework - -import ( - "errors" - "fmt" - "time" - - v1 "github.com/prometheus/client_golang/api/prometheus/v1" - "github.com/prometheus/common/model" -) - -type Bucket struct { - Le string - Val int -} - -func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetReloadCountWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetReloadErrsCountWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ - ` / `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetReloadAvgTimeWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ - ` / `+ - `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { - return getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetReloadBucketsWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) ([]Bucket, error) { - return getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetEventsCountWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ - ` / `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetEventsAvgTimeWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) (float64, error) { - return getFirstValueOfVector( - fmt.Sprintf( - `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ - ` / `+ - `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) { - return getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`, - ngfPodName, - ), - promInstance, - ) -} - -func GetEventsBucketsWithStartTime( - promInstance PrometheusInstance, - ngfPodName string, - startTime time.Time, -) ([]Bucket, error) { - return getBuckets( - fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ - ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, - ngfPodName, - startTime.Unix(), - ), - promInstance, - ) -} - -func CreateMetricExistChecker( - promInstance PrometheusInstance, - query string, - getTime func() time.Time, - modifyTime func(), -) func() error { - return func() error { - queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) - - result, err := promInstance.Query(queryWithTimestamp) - if err != nil { - return fmt.Errorf("failed to query Prometheus: %w", err) - } - - if result.String() == "" { - modifyTime() - return errors.New("empty result") - } - - return nil - } -} - -func CreateEndTimeFinder( - promInstance PrometheusInstance, - query string, - startTime time.Time, - t *time.Time, - queryRangeStep time.Duration, -) func() error { - return func() error { - result, err := promInstance.QueryRange(query, v1.Range{ - Start: startTime, - End: *t, - Step: queryRangeStep, - }) - if err != nil { - return fmt.Errorf("failed to query Prometheus: %w", err) - } - - if result.String() == "" { - *t = time.Now() - return errors.New("empty result") - } - - return nil - } -} - -func CreateResponseChecker(url, address string, requestTimeout time.Duration) func() error { - return func() error { - status, _, err := Get(url, address, requestTimeout) - if err != nil { - return fmt.Errorf("bad response: %w", err) - } - - if status != 200 { - return fmt.Errorf("unexpected status code: %d", status) - } - - return nil - } -} - -func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) { - result, err := promInstance.Query(query) - if err != nil { - return 0, err - } - - val, err := GetFirstValueOfPrometheusVector(result) - if err != nil { - return 0, err - } - - return val, nil -} - -func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) { - result, err := promInstance.Query(query) - if err != nil { - return nil, err - } - - res, ok := result.(model.Vector) - if !ok { - return nil, errors.New("could not convert result to vector") - } - - buckets := make([]Bucket, 0, len(res)) - - for _, sample := range res { - le := sample.Metric["le"] - val := float64(sample.Value) - bucket := Bucket{ - Le: string(le), - Val: int(val), - } - buckets = append(buckets, bucket) - } - - return buckets, nil -} From e1720b68dfed4894832b4cbc375331379b61674d Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 12 Aug 2024 14:17:06 -0700 Subject: [PATCH 40/42] Add FIXME on issues --- tests/suite/reconfig_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/suite/reconfig_test.go b/tests/suite/reconfig_test.go index 8dbdcfe656..386b69000e 100644 --- a/tests/suite/reconfig_test.go +++ b/tests/suite/reconfig_test.go @@ -105,7 +105,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } createResourcesGWLast := func(resourceCount int) { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout*5) defer cancel() for i := 1; i <= resourceCount; i++ { @@ -142,7 +142,7 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig } createResourcesRoutesLast := func(resourceCount int) { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.CreateTimeout*5) defer cancel() for i := 1; i <= resourceCount; i++ { @@ -205,6 +205,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig cleanupResources := func() error { var err error + // FIXME (bjee19): https://github.com/nginxinc/nginx-gateway-fabric/issues/2376 + // Find a way to bulk delete these namespaces. for i := 1; i <= maxResourceCount; i++ { nsName := "namespace" + strconv.Itoa(i) resultError := resourceManager.DeleteNamespace(nsName) @@ -422,6 +424,8 @@ var _ = Describe("Reconfiguration Performance Testing", Ordered, Label("reconfig }) Expect(err).ToNot(HaveOccurred()) + // FIXME (bjee19): https://github.com/nginxinc/nginx-gateway-fabric/issues/2374 + // Find a way to calculate time to ready metrics without having to rely on specific log lines. timeToReadyTotal, err := calculateTimeToReadyTotal(logs, timeToReadyStartingLogSubstring) Expect(err).ToNot(HaveOccurred()) From 685295dc6dc6bd4821afb014ff1514ff0f81bfc9 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 12 Aug 2024 15:13:24 -0700 Subject: [PATCH 41/42] Add latest results --- tests/results/reconfig/b.jee/b.jee-oss.md | 194 +++++++++++++++++++++ tests/results/reconfig/v1.3.0/1.3.0-oss.md | 188 -------------------- 2 files changed, 194 insertions(+), 188 deletions(-) create mode 100644 tests/results/reconfig/b.jee/b.jee-oss.md delete mode 100644 tests/results/reconfig/v1.3.0/1.3.0-oss.md diff --git a/tests/results/reconfig/b.jee/b.jee-oss.md b/tests/results/reconfig/b.jee/b.jee-oss.md new file mode 100644 index 0000000000..66c8206188 --- /dev/null +++ b/tests/results/reconfig/b.jee/b.jee-oss.md @@ -0,0 +1,194 @@ +# Results + +## Test environment + +NGINX Plus: false + +NGINX Gateway Fabric: + +- Commit: 19f98ab76481e5de0ff8b9ca4ab618c7995cb90d +- Date: 2024-08-09T20:56:49Z +- Dirty: true + +GKE Cluster: + +- Node count: 4 +- k8s version: v1.29.6-gke.1326000 +- vCPUs per node: 2 +- RAM per node: 4019160Ki +- Max pods per node: 110 +- Zone: us-central1-c +- Instance Type: e2-medium + +## Test 1: Resources exist before startup - NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 2s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 2 +- NGINX Reload Average Time: 140ms +- Reload distribution: + - 500ms: 2 + - 1000ms: 2 + - 5000ms: 2 + - 10000ms: 2 + - 30000ms: 2 + - +Infms: 2 + +### Event Batch Processing + +- Event Batch Total: 6 +- Event Batch Processing Average Time: 76ms +- Event Batch Processing distribution: + - 500ms: 6 + - 1000ms: 6 + - 5000ms: 6 + - 10000ms: 6 + - 30000ms: 6 + - +Infms: 6 + + +## Test 1: Resources exist before startup - NumResources 150 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 4s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 2 +- NGINX Reload Average Time: 127ms +- Reload distribution: + - 500ms: 2 + - 1000ms: 2 + - 5000ms: 2 + - 10000ms: 2 + - 30000ms: 2 + - +Infms: 2 + +### Event Batch Processing + +- Event Batch Total: 5 +- Event Batch Processing Average Time: 74ms +- Event Batch Processing distribution: + - 500ms: 5 + - 1000ms: 5 + - 5000ms: 5 + - 10000ms: 5 + - 30000ms: 5 + - +Infms: 5 + + +## Test 2: Start NGF, deploy Gateway, create many resources attached to GW - NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 7s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 59 +- NGINX Reload Average Time: 131ms +- Reload distribution: + - 500ms: 59 + - 1000ms: 59 + - 5000ms: 59 + - 10000ms: 59 + - 30000ms: 59 + - +Infms: 59 + +### Event Batch Processing + +- Event Batch Total: 335 +- Event Batch Processing Average Time: 23ms +- Event Batch Processing distribution: + - 500ms: 335 + - 1000ms: 335 + - 5000ms: 335 + - 10000ms: 335 + - 30000ms: 335 + - +Infms: 335 + + +## Test 2: Start NGF, deploy Gateway, create many resources attached to GW - NumResources 150 + +### Reloads and Time to Ready + +- TimeToReadyTotal: 43s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 310 +- NGINX Reload Average Time: 135ms +- Reload distribution: + - 500ms: 310 + - 1000ms: 310 + - 5000ms: 310 + - 10000ms: 310 + - 30000ms: 310 + - +Infms: 310 + +### Event Batch Processing + +- Event Batch Total: 1645 +- Event Batch Processing Average Time: 26ms +- Event Batch Processing distribution: + - 500ms: 1645 + - 1000ms: 1645 + - 5000ms: 1645 + - 10000ms: 1645 + - 30000ms: 1645 + - +Infms: 1645 + + +## Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - NumResources 30 + +### Reloads and Time to Ready + +- TimeToReadyTotal: < 1s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 63 +- NGINX Reload Average Time: 126ms +- Reload distribution: + - 500ms: 63 + - 1000ms: 63 + - 5000ms: 63 + - 10000ms: 63 + - 30000ms: 63 + - +Infms: 63 + +### Event Batch Processing + +- Event Batch Total: 309 +- Event Batch Processing Average Time: 26ms +- Event Batch Processing distribution: + - 500ms: 309 + - 1000ms: 309 + - 5000ms: 309 + - 10000ms: 309 + - 30000ms: 309 + - +Infms: 309 + + +## Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - NumResources 150 + +### Reloads and Time to Ready + +- TimeToReadyTotal: < 1s +- TimeToReadyAvgSingle: < 1s +- NGINX Reloads: 326 +- NGINX Reload Average Time: 131ms +- Reload distribution: + - 500ms: 326 + - 1000ms: 326 + - 5000ms: 326 + - 10000ms: 326 + - 30000ms: 326 + - +Infms: 326 + +### Event Batch Processing + +- Event Batch Total: 1632 +- Event Batch Processing Average Time: 26ms +- Event Batch Processing distribution: + - 500ms: 1632 + - 1000ms: 1632 + - 5000ms: 1632 + - 10000ms: 1632 + - 30000ms: 1632 + - +Infms: 1632 diff --git a/tests/results/reconfig/v1.3.0/1.3.0-oss.md b/tests/results/reconfig/v1.3.0/1.3.0-oss.md deleted file mode 100644 index cd08f3efd0..0000000000 --- a/tests/results/reconfig/v1.3.0/1.3.0-oss.md +++ /dev/null @@ -1,188 +0,0 @@ -# Results - -## Test environment - -NGINX Plus: false - -GKE Cluster: - -- Node count: 4 -- k8s version: v1.29.6-gke.1254000 -- vCPUs per node: 2 -- RAM per node: 4019160Ki -- Max pods per node: 110 -- Zone: us-central1-c -- Instance Type: e2-medium - -## Test 1 NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 2 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 2 -- NGINX Reload Average Time: 76 -- Reload distribution: - - 500ms: 2 - - 1000ms: 2 - - 5000ms: 2 - - 10000ms: 2 - - 30000ms: 2 - - +Infms: 2 - -### Event Batch Processing - -- Event Batch Total: 6 -- Event Batch Processing Average Time: 108ms -- Event Batch Processing distribution: - - 500ms: 5 - - 1000ms: 6 - - 5000ms: 6 - - 10000ms: 6 - - 30000ms: 6 - - +Infms: 6 - - -## Test 1 NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 4 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 3 -- NGINX Reload Average Time: 120 -- Reload distribution: - - 500ms: 3 - - 1000ms: 3 - - 5000ms: 3 - - 10000ms: 3 - - 30000ms: 3 - - +Infms: 3 - -### Event Batch Processing - -- Event Batch Total: 6 -- Event Batch Processing Average Time: 111ms -- Event Batch Processing distribution: - - 500ms: 6 - - 1000ms: 6 - - 5000ms: 6 - - 10000ms: 6 - - 30000ms: 6 - - +Infms: 6 - - -## Test 2 NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 10 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 61 -- NGINX Reload Average Time: 127 -- Reload distribution: - - 500ms: 61 - - 1000ms: 61 - - 5000ms: 61 - - 10000ms: 61 - - 30000ms: 61 - - +Infms: 61 - -### Event Batch Processing - -- Event Batch Total: 336 -- Event Batch Processing Average Time: 30ms -- Event Batch Processing distribution: - - 500ms: 335 - - 1000ms: 336 - - 5000ms: 336 - - 10000ms: 336 - - 30000ms: 336 - - +Infms: 336 - - -## Test 2 NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 52 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 301 -- NGINX Reload Average Time: 131 -- Reload distribution: - - 500ms: 301 - - 1000ms: 301 - - 5000ms: 301 - - 10000ms: 301 - - 30000ms: 301 - - +Infms: 301 - -### Event Batch Processing - -- Event Batch Total: 1655 -- Event Batch Processing Average Time: 30ms -- Event Batch Processing distribution: - - 500ms: 1654 - - 1000ms: 1655 - - 5000ms: 1655 - - 10000ms: 1655 - - 30000ms: 1655 - - +Infms: 1655 - - -## Test 3 NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: < 1 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 63 -- NGINX Reload Average Time: 132 -- Reload distribution: - - 500ms: 63 - - 1000ms: 63 - - 5000ms: 63 - - 10000ms: 63 - - 30000ms: 63 - - +Infms: 63 - -### Event Batch Processing - -- Event Batch Total: 336 -- Event Batch Processing Average Time: 25ms -- Event Batch Processing distribution: - - 500ms: 336 - - 1000ms: 336 - - 5000ms: 336 - - 10000ms: 336 - - 30000ms: 336 - - +Infms: 336 - - -## Test 3 NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: < 1 -- TimeToReadyAvgSingle: < 1 -- NGINX Reloads: 318 -- NGINX Reload Average Time: 131 -- Reload distribution: - - 500ms: 318 - - 1000ms: 318 - - 5000ms: 318 - - 10000ms: 318 - - 30000ms: 318 - - +Infms: 318 - -### Event Batch Processing - -- Event Batch Total: 1669 -- Event Batch Processing Average Time: 25ms -- Event Batch Processing distribution: - - 500ms: 1669 - - 1000ms: 1669 - - 5000ms: 1669 - - 10000ms: 1669 - - 30000ms: 1669 - - +Infms: 1669 From 683cd36ec839b54adb1551a66bbf3c4df44bc540 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 12 Aug 2024 15:18:07 -0700 Subject: [PATCH 42/42] Remove latest results --- tests/results/reconfig/b.jee/b.jee-oss.md | 194 ---------------------- 1 file changed, 194 deletions(-) delete mode 100644 tests/results/reconfig/b.jee/b.jee-oss.md diff --git a/tests/results/reconfig/b.jee/b.jee-oss.md b/tests/results/reconfig/b.jee/b.jee-oss.md deleted file mode 100644 index 66c8206188..0000000000 --- a/tests/results/reconfig/b.jee/b.jee-oss.md +++ /dev/null @@ -1,194 +0,0 @@ -# Results - -## Test environment - -NGINX Plus: false - -NGINX Gateway Fabric: - -- Commit: 19f98ab76481e5de0ff8b9ca4ab618c7995cb90d -- Date: 2024-08-09T20:56:49Z -- Dirty: true - -GKE Cluster: - -- Node count: 4 -- k8s version: v1.29.6-gke.1326000 -- vCPUs per node: 2 -- RAM per node: 4019160Ki -- Max pods per node: 110 -- Zone: us-central1-c -- Instance Type: e2-medium - -## Test 1: Resources exist before startup - NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 2s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 2 -- NGINX Reload Average Time: 140ms -- Reload distribution: - - 500ms: 2 - - 1000ms: 2 - - 5000ms: 2 - - 10000ms: 2 - - 30000ms: 2 - - +Infms: 2 - -### Event Batch Processing - -- Event Batch Total: 6 -- Event Batch Processing Average Time: 76ms -- Event Batch Processing distribution: - - 500ms: 6 - - 1000ms: 6 - - 5000ms: 6 - - 10000ms: 6 - - 30000ms: 6 - - +Infms: 6 - - -## Test 1: Resources exist before startup - NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 4s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 2 -- NGINX Reload Average Time: 127ms -- Reload distribution: - - 500ms: 2 - - 1000ms: 2 - - 5000ms: 2 - - 10000ms: 2 - - 30000ms: 2 - - +Infms: 2 - -### Event Batch Processing - -- Event Batch Total: 5 -- Event Batch Processing Average Time: 74ms -- Event Batch Processing distribution: - - 500ms: 5 - - 1000ms: 5 - - 5000ms: 5 - - 10000ms: 5 - - 30000ms: 5 - - +Infms: 5 - - -## Test 2: Start NGF, deploy Gateway, create many resources attached to GW - NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 7s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 59 -- NGINX Reload Average Time: 131ms -- Reload distribution: - - 500ms: 59 - - 1000ms: 59 - - 5000ms: 59 - - 10000ms: 59 - - 30000ms: 59 - - +Infms: 59 - -### Event Batch Processing - -- Event Batch Total: 335 -- Event Batch Processing Average Time: 23ms -- Event Batch Processing distribution: - - 500ms: 335 - - 1000ms: 335 - - 5000ms: 335 - - 10000ms: 335 - - 30000ms: 335 - - +Infms: 335 - - -## Test 2: Start NGF, deploy Gateway, create many resources attached to GW - NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: 43s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 310 -- NGINX Reload Average Time: 135ms -- Reload distribution: - - 500ms: 310 - - 1000ms: 310 - - 5000ms: 310 - - 10000ms: 310 - - 30000ms: 310 - - +Infms: 310 - -### Event Batch Processing - -- Event Batch Total: 1645 -- Event Batch Processing Average Time: 26ms -- Event Batch Processing distribution: - - 500ms: 1645 - - 1000ms: 1645 - - 5000ms: 1645 - - 10000ms: 1645 - - 30000ms: 1645 - - +Infms: 1645 - - -## Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - NumResources 30 - -### Reloads and Time to Ready - -- TimeToReadyTotal: < 1s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 63 -- NGINX Reload Average Time: 126ms -- Reload distribution: - - 500ms: 63 - - 1000ms: 63 - - 5000ms: 63 - - 10000ms: 63 - - 30000ms: 63 - - +Infms: 63 - -### Event Batch Processing - -- Event Batch Total: 309 -- Event Batch Processing Average Time: 26ms -- Event Batch Processing distribution: - - 500ms: 309 - - 1000ms: 309 - - 5000ms: 309 - - 10000ms: 309 - - 30000ms: 309 - - +Infms: 309 - - -## Test 3: Start NGF, create many resources attached to a Gateway, deploy the Gateway - NumResources 150 - -### Reloads and Time to Ready - -- TimeToReadyTotal: < 1s -- TimeToReadyAvgSingle: < 1s -- NGINX Reloads: 326 -- NGINX Reload Average Time: 131ms -- Reload distribution: - - 500ms: 326 - - 1000ms: 326 - - 5000ms: 326 - - 10000ms: 326 - - 30000ms: 326 - - +Infms: 326 - -### Event Batch Processing - -- Event Batch Total: 1632 -- Event Batch Processing Average Time: 26ms -- Event Batch Processing distribution: - - 500ms: 1632 - - 1000ms: 1632 - - 5000ms: 1632 - - 10000ms: 1632 - - 30000ms: 1632 - - +Infms: 1632