Skip to content

Commit 2765acd

Browse files
mgencurclaude
andcommitted
feat: Enable ROSA cluster support in HCP backup/restore tests
- Add external-rosa mode for HC_BACKUP_RESTORE_MODE to support existing ROSA clusters - Introduce HC_NAMESPACE parameter for configurable cluster namespace management - Add service cluster kubeconfig support via SC_KUBECONFIG parameter for ROSA ManifestWork operations - Implement ManifestWork backup/deletion functionality for ROSA cluster lifecycle management - Add open-cluster-management.io/api dependency to support ManifestWork operations - Create separate OADP deployment operations for default vs ROSA scenarios - Skip DPA HCP plugin modification for ROSA where DPA is managed via ManifestWork - Add VSL_AWS_PROFILE parameter for volume snapshot location AWS profile configuration - Refactor backup/restore suite to use pluggable deployment strategies - Update test configuration to handle both regular HCP and ROSA cluster workflows 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 94f5f0b commit 2765acd

14 files changed

+518
-124
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,13 @@ TTL_DURATION ?= 1h
6464
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
6565
ENVTEST_K8S_VERSION = 1.32 # Kubernetes version from OpenShift 4.19.x https://openshift-release.apps.ci.l2s4.p1.openshiftapps.com/#4-stable
6666

67-
# HC_NAME is the name of the HostedCluster to use for HCP tests when
68-
# hc_backup_restore_mode is set to external. Otherwise, HC_NAME is ignored.
67+
# HC_BACKUP_RESTORE_MODE is the mode of the HostedCluster to use for HCP tests.
68+
HC_BACKUP_RESTORE_MODE ?= external # create, external, external-rosa
69+
# HC_NAME is the name of the HostedCluster to use for HCP tests when HC_BACKUP_RESTORE_MODE is
70+
# set to external. Otherwise, HC_NAME is ignored.
6971
HC_NAME ?= ""
72+
# HC_NAMESPACE is the namespace for HostedClusters to use for HCP tests.
73+
HC_NAMESPACE ?= clusters
7074

7175
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
7276
ifeq (,$(shell go env GOBIN))
@@ -671,6 +675,7 @@ CI_CRED_FILE ?= ${CLUSTER_PROFILE_DIR}/.awscred
671675
BSL_REGION ?= us-east-1
672676
VSL_REGION ?= ${LEASED_RESOURCE}
673677
BSL_AWS_PROFILE ?= default
678+
VSL_AWS_PROFILE ?= default
674679
# BSL_AWS_PROFILE ?= migration-engineering
675680

676681
# bucket file
@@ -724,6 +729,7 @@ test-e2e-setup: login-required build-must-gather
724729
OADP_CRED_FILE="$(OADP_CRED_FILE)" \
725730
BUCKET="$(OADP_BUCKET)" \
726731
TARGET_CI_CRED_FILE="$(CI_CRED_FILE)" \
732+
VSL_AWS_PROFILE="$(VSL_AWS_PROFILE)" \
727733
VSL_REGION="$(VSL_REGION)" \
728734
BSL_REGION="$(BSL_REGION)" \
729735
BSL_AWS_PROFILE="$(BSL_AWS_PROFILE)" \
@@ -760,7 +766,7 @@ else
760766
endif
761767
ifeq ($(TEST_HCP_EXTERNAL),true)
762768
TEST_FILTER += && (hcp_external)
763-
HCP_EXTERNAL_ARGS = -hc_backup_restore_mode=external -hc_name=$(HC_NAME)
769+
HCP_EXTERNAL_ARGS = -hc_backup_restore_mode=$(HC_BACKUP_RESTORE_MODE) -hc_name=$(HC_NAME) -hc_namespace=$(HC_NAMESPACE) -sc_kubeconfig=$(SC_KUBECONFIG)
764770
else
765771
TEST_FILTER += && (! hcp_external)
766772
endif

docs/developer/testing/TESTING.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,23 @@ HC_NAME=hc1 \
110110
make test-e2e
111111
```
112112

113+
### Run selected test for HCP against external HostedControlPlane on ROSA
114+
115+
* KUBECONFIG must point to the management cluster
116+
* SC_KUBECONFIG must point to the Service Cluster with ManifestWork resources
117+
* In order to break the guest cluster, the tests delete ManifestWork resources on the Service Cluster.
118+
119+
120+
```bash
121+
TEST_HCP_EXTERNAL=true \
122+
HC_BACKUP_RESTORE_MODE=external-rosa \
123+
HC_NAME=hc1 \
124+
HC_NAMESPACE=xyz \
125+
SC_KUBECONFIG=/path/to/service/cluster/kubeconfig \
126+
make test-e2e
127+
```
128+
129+
113130
### Run tests with custom images
114131

115132
You can run tests with custom images by setting the following environment variables:

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ require (
2222
k8s.io/apimachinery v0.31.3
2323
k8s.io/client-go v0.31.3
2424
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
25+
open-cluster-management.io/api v0.15.0
2526
sigs.k8s.io/controller-runtime v0.19.3
2627
)
2728

@@ -43,6 +44,7 @@ require (
4344
github.com/vmware-tanzu/velero v1.14.0
4445
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
4546
google.golang.org/api v0.218.0
47+
gopkg.in/yaml.v2 v2.4.0
4648
k8s.io/klog/v2 v2.130.1
4749
)
4850

@@ -170,7 +172,6 @@ require (
170172
google.golang.org/protobuf v1.36.3 // indirect
171173
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
172174
gopkg.in/inf.v0 v0.9.1 // indirect
173-
gopkg.in/yaml.v2 v2.4.0 // indirect
174175
gopkg.in/yaml.v3 v3.0.1 // indirect
175176
k8s.io/cli-runtime v0.31.3 // indirect
176177
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect

go.sum

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -679,8 +679,9 @@ github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+
679679
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
680680
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
681681
github.com/onsi/ginkgo v1.16.2/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E=
682-
github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
683682
github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
683+
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
684+
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
684685
github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
685686
github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
686687
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
@@ -1486,6 +1487,8 @@ k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
14861487
k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA=
14871488
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
14881489
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
1490+
open-cluster-management.io/api v0.15.0 h1:lRee1KOlGHZb2scTA7ff9E9Fxt2hJc7jpkHnaCbvkOU=
1491+
open-cluster-management.io/api v0.15.0/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
14891492
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
14901493
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
14911494
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

tests/e2e/backup_restore_cli_suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ var _ = ginkgo.Describe("Backup and restore tests via OADP CLI", ginkgo.Label("c
208208

209209
var _ = ginkgo.AfterAll(func() {
210210
// Same cleanup as original
211-
waitOADPReadiness(lib.KOPIA)
211+
NewOADPDeploymentOperationDefault().Deploy(lib.KOPIA)
212212

213213
log.Printf("Creating real DataProtectionTest before must-gather")
214214
bsls, err := dpaCR.ListBSLs()

tests/e2e/backup_restore_suite_test.go

Lines changed: 105 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,108 @@ type ApplicationBackupRestoreCase struct {
3333
PvcSuffixName string
3434
}
3535

36+
// OADPDeploymentOperation is a helper to deploy OADP resources for a given backup restore type.
37+
type OADPDeploymentOperation struct {
38+
CreateDPA bool
39+
CreateVolumeSnapshotClass bool
40+
CreateBSL bool
41+
CreateVSL bool
42+
}
43+
44+
func NewOADPDeploymentOperationDefault() *OADPDeploymentOperation {
45+
return &OADPDeploymentOperation{
46+
CreateDPA: true,
47+
CreateVolumeSnapshotClass: true,
48+
CreateBSL: false,
49+
CreateVSL: false,
50+
}
51+
}
52+
53+
func NewOADPDeploymentOperationROSA() *OADPDeploymentOperation {
54+
return &OADPDeploymentOperation{
55+
CreateDPA: false,
56+
CreateVolumeSnapshotClass: false,
57+
CreateBSL: true,
58+
CreateVSL: true,
59+
}
60+
}
61+
62+
func (o *OADPDeploymentOperation) Deploy(backupRestoreType lib.BackupRestoreType) {
63+
if o.CreateDPA {
64+
err := dpaCR.CreateOrUpdate(dpaCR.Build(backupRestoreType))
65+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
66+
67+
log.Print("Checking if DPA is reconciled")
68+
gomega.Eventually(dpaCR.IsReconciledTrue(), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
69+
70+
if backupRestoreType == lib.RESTIC || backupRestoreType == lib.KOPIA || backupRestoreType == lib.CSIDataMover {
71+
log.Printf("Waiting for Node Agent pods to be running")
72+
gomega.Eventually(lib.AreNodeAgentPodsRunning(kubernetesClientForSuiteRun, namespace), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
73+
}
74+
}
75+
76+
log.Printf("Waiting for Velero Pod to be running")
77+
gomega.Eventually(lib.VeleroPodIsRunning(kubernetesClientForSuiteRun, namespace), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
78+
79+
if o.CreateVolumeSnapshotClass {
80+
if backupRestoreType == lib.CSI || backupRestoreType == lib.CSIDataMover {
81+
if provider == "aws" || provider == "ibmcloud" || provider == "gcp" || provider == "azure" || provider == "openstack" {
82+
log.Printf("Creating VolumeSnapshotClass for CSI backuprestore")
83+
snapshotClassPath := fmt.Sprintf("./sample-applications/snapclass-csi/%s.yaml", provider)
84+
err := lib.InstallApplication(dpaCR.Client, snapshotClassPath)
85+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
86+
}
87+
}
88+
}
89+
90+
if o.CreateBSL {
91+
log.Print("Creating BSL")
92+
err := dpaCR.CreateBackupStorageLocation()
93+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
94+
}
95+
96+
log.Print("Checking if BSL is available")
97+
gomega.Eventually(dpaCR.BSLsAreAvailable(), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
98+
99+
if o.CreateVSL {
100+
log.Print("Creating VSL")
101+
err := dpaCR.CreateVolumeSnapshotLocation()
102+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
103+
// Velero does not change status of VSL objects.
104+
// Users can only confirm if VSLs are correct configured when running a native snapshot backup/restore
105+
}
106+
}
107+
108+
func (o *OADPDeploymentOperation) Undeploy(backupRestoreType lib.BackupRestoreType) {
109+
if o.CreateVolumeSnapshotClass {
110+
if backupRestoreType == lib.CSI || backupRestoreType == lib.CSIDataMover {
111+
log.Printf("Deleting VolumeSnapshot for CSI backuprestore")
112+
snapshotClassPath := fmt.Sprintf("./sample-applications/snapclass-csi/%s.yaml", provider)
113+
err := lib.UninstallApplication(dpaCR.Client, snapshotClassPath)
114+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
115+
}
116+
}
117+
118+
if o.CreateDPA {
119+
log.Printf("Deleting DPA")
120+
err := dpaCR.Delete()
121+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
122+
gomega.Eventually(dpaCR.IsDeleted(), time.Minute*2, time.Second*5).Should(gomega.BeTrue())
123+
}
124+
125+
if o.CreateBSL {
126+
log.Printf("Deleting BSL")
127+
err := dpaCR.DeleteBackupStorageLocation()
128+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
129+
}
130+
131+
if o.CreateVSL {
132+
log.Printf("Deleting VSL")
133+
err := dpaCR.DeleteVolumeSnapshotLocation()
134+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
135+
}
136+
}
137+
36138
func todoListReady(preBackupState bool, twoVol bool, database string) VerificationFunction {
37139
return VerificationFunction(func(ocClient client.Client, namespace string) error {
38140
log.Printf("checking for the NAMESPACE: %s", namespace)
@@ -49,40 +151,10 @@ func todoListReady(preBackupState bool, twoVol bool, database string) Verificati
49151
})
50152
}
51153

52-
func waitOADPReadiness(backupRestoreType lib.BackupRestoreType) {
53-
err := dpaCR.CreateOrUpdate(dpaCR.Build(backupRestoreType))
54-
gomega.Expect(err).NotTo(gomega.HaveOccurred())
55-
56-
log.Print("Checking if DPA is reconciled")
57-
gomega.Eventually(dpaCR.IsReconciledTrue(), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
58-
59-
log.Printf("Waiting for Velero Pod to be running")
60-
gomega.Eventually(lib.VeleroPodIsRunning(kubernetesClientForSuiteRun, namespace), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
61-
62-
if backupRestoreType == lib.RESTIC || backupRestoreType == lib.KOPIA || backupRestoreType == lib.CSIDataMover {
63-
log.Printf("Waiting for Node Agent pods to be running")
64-
gomega.Eventually(lib.AreNodeAgentPodsRunning(kubernetesClientForSuiteRun, namespace), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
65-
}
66-
67-
// Velero does not change status of VSL objects. Users can only confirm if VSLs are correct configured when running a native snapshot backup/restore
68-
69-
log.Print("Checking if BSL is available")
70-
gomega.Eventually(dpaCR.BSLsAreAvailable(), time.Minute*3, time.Second*5).Should(gomega.BeTrue())
71-
}
72-
73154
func prepareBackupAndRestore(brCase BackupRestoreCase, updateLastInstallTime func()) (string, string) {
74155
updateLastInstallTime()
75156

76-
waitOADPReadiness(brCase.BackupRestoreType)
77-
78-
if brCase.BackupRestoreType == lib.CSI || brCase.BackupRestoreType == lib.CSIDataMover {
79-
if provider == "aws" || provider == "ibmcloud" || provider == "gcp" || provider == "azure" || provider == "openstack" {
80-
log.Printf("Creating VolumeSnapshotClass for CSI backuprestore of %s", brCase.Name)
81-
snapshotClassPath := fmt.Sprintf("./sample-applications/snapclass-csi/%s.yaml", provider)
82-
err := lib.InstallApplication(dpaCR.Client, snapshotClassPath)
83-
gomega.Expect(err).ToNot(gomega.HaveOccurred())
84-
}
85-
}
157+
NewOADPDeploymentOperationDefault().Deploy(brCase.BackupRestoreType)
86158

87159
// TODO: check registry deployments are deleted
88160
// TODO: check S3 for images
@@ -257,22 +329,10 @@ func getFailedTestLogs(oadpNamespace string, appNamespace string, installTime ti
257329
func tearDownBackupAndRestore(brCase BackupRestoreCase, installTime time.Time, report ginkgo.SpecReport) {
258330
log.Println("Post backup and restore state: ", report.State.String())
259331
gatherLogs(brCase, installTime, report)
260-
tearDownDPAResources(brCase)
332+
NewOADPDeploymentOperationDefault().Undeploy(brCase.BackupRestoreType)
261333
deleteNamespace(brCase.Namespace)
262334
}
263335

264-
func tearDownDPAResources(brCase BackupRestoreCase) {
265-
if brCase.BackupRestoreType == lib.CSI || brCase.BackupRestoreType == lib.CSIDataMover {
266-
log.Printf("Deleting VolumeSnapshot for CSI backuprestore of %s", brCase.Name)
267-
snapshotClassPath := fmt.Sprintf("./sample-applications/snapclass-csi/%s.yaml", provider)
268-
err := lib.UninstallApplication(dpaCR.Client, snapshotClassPath)
269-
gomega.Expect(err).ToNot(gomega.HaveOccurred())
270-
}
271-
272-
err := dpaCR.Delete()
273-
gomega.Expect(err).ToNot(gomega.HaveOccurred())
274-
}
275-
276336
func gatherLogs(brCase BackupRestoreCase, installTime time.Time, report ginkgo.SpecReport) {
277337
if report.Failed() {
278338
knownFlake = lib.CheckIfFlakeOccurred(accumulatedTestLogs)
@@ -304,7 +364,7 @@ var _ = ginkgo.Describe("Backup and restore tests", ginkgo.Ordered, func() {
304364
var _ = ginkgo.AfterAll(func() {
305365
// DPA just needs to have BSL so gathering of backups/restores logs/describe work
306366
// using kopia to collect more info (DaemonSet)
307-
waitOADPReadiness(lib.KOPIA)
367+
NewOADPDeploymentOperationDefault().Deploy(lib.KOPIA)
308368

309369
log.Printf("Creating real DataProtectionTest before must-gather")
310370
bsls, err := dpaCR.ListBSLs()

0 commit comments

Comments
 (0)