Skip to content

Commit 84353f5

Browse files
mgencurclaude
andcommitted
Add comprehensive HCP full backup/restore test suite with existing cluster support
This commit introduces a complete HCP (Hosted Control Plane) backup and restore testing framework with support for both newly created and existing HostedCluster environments. ## Key Features Added: ### New Test Infrastructure - Add `hcp_full_backup_restore_suite_test.go`: Complete test suite for full HCP backup/restore scenarios - Support for two operational modes: - `create`: Creates new HostedCluster for testing (existing behavior) - `existing`: Uses pre-existing HostedCluster with data plane ### Enhanced Configuration Options - Add Makefile variables for HCP test configuration: - `HC_BACKUP_RESTORE_MODE`: Controls test execution mode (create/existing) - `HC_NAME`: Specifies HostedCluster name for existing mode - `HC_KUBECONFIG`: Path to guest cluster kubeconfig for existing mode - Pass HCP configuration parameters to e2e test execution ### Improved Test Architecture - Refactor `runHCPBackupAndRestore()` function for unified handling of both modes - Add guest cluster verification functions (`PreBackupVerifyGuest`, `PostRestoreVerifyGuest`) - Separate log gathering and DPA resource cleanup into reusable functions - Enhanced error handling and validation for both control plane and guest cluster ### Guest Cluster Testing - Add support for kubeconfig-based guest cluster operations - Implement pre/post backup verification for guest cluster resources - Add namespace creation/validation tests for guest cluster functionality ### Library Enhancements - Add `GetHostedCluster()` method to retrieve existing HostedCluster objects - Add `ClientGuest` field to `HCHandler` for guest cluster operations - Improve error message formatting in DPA helpers ### Documentation Updates - Add comprehensive testing documentation for HCP scenarios - Include examples for running tests against existing HostedControlPlane - Document environment variable configuration options ### Build System Improvements - Add conditional must-gather build based on `SKIP_MUST_GATHER` flag - Enhanced e2e test parameter passing for HCP configurations ## Technical Implementation: The implementation supports testing both scenarios where OADP needs to: 1. Create a new HostedCluster and test backup/restore (existing functionality) 2. Work with an existing HostedCluster that already has workloads and data plane This enables comprehensive testing of HCP backup/restore functionality in realistic production-like environments where clusters already exist and contain user workloads. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent c354db6 commit 84353f5

File tree

9 files changed

+271
-32
lines changed

9 files changed

+271
-32
lines changed

Makefile

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ TTL_DURATION ?= 1h
6464
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
6565
ENVTEST_K8S_VERSION = 1.32 # Kubernetes version from OpenShift 4.19.x https://openshift-release.apps.ci.l2s4.p1.openshiftapps.com/#4-stable
6666

67+
# HC_BACKUP_RESTORE_MODE is used to run HCP tests against existing HostedControlPlane.
68+
# Possible values are: create, existing.
69+
HC_BACKUP_RESTORE_MODE ?= create
70+
# HC_NAME is the name of the HostedCluster to use for HCP tests when HC_BACKUP_RESTORE_MODE is set to existing.
71+
# Otherwise, HC_NAME is ignored.
72+
HC_NAME ?= ""
73+
# HC_KUBECONFIG is the path to the kubeconfig file for the HostedCluster to use for HCP tests when HC_BACKUP_RESTORE_MODE is set to existing.
74+
# Otherwise, HC_KUBECONFIG is ignored.
75+
HC_KUBECONFIG ?= ""
76+
6777
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
6878
ifeq (,$(shell go env GOBIN))
6979
GOBIN=$(shell go env GOPATH)/bin
@@ -769,6 +779,9 @@ test-e2e: test-e2e-setup install-ginkgo ## Run E2E tests against OADP operator i
769779
-velero_instance_name=$(VELERO_INSTANCE_NAME) \
770780
-artifact_dir=$(ARTIFACT_DIR) \
771781
-kvm_emulation=$(KVM_EMULATION) \
782+
-hc_backup_restore_mode=$(HC_BACKUP_RESTORE_MODE) \
783+
-hc_name=$(HC_NAME) \
784+
-hc_kubeconfig=$(HC_KUBECONFIG) \
772785
-hco_upstream=$(HCO_UPSTREAM) \
773786
-skipMustGather=$(SKIP_MUST_GATHER) \
774787
--ginkgo.vv \
@@ -792,7 +805,6 @@ test-e2e-cleanup: login-required
792805
for restore_name in $(shell $(OC_CLI) get restore -n $(OADP_TEST_NAMESPACE) -o name);do $(OC_CLI) patch "$$restore_name" -n $(OADP_TEST_NAMESPACE) -p '{"metadata":{"finalizers":null}}' --type=merge;done
793806
rm -rf $(SETTINGS_TMP)
794807

795-
796808
.PHONY: update-non-admin-manifests
797809
update-non-admin-manifests: NON_ADMIN_CONTROLLER_IMG?=quay.io/konveyor/oadp-non-admin:latest
798810
update-non-admin-manifests: yq ## Update Non Admin Controller (NAC) manifests shipped with OADP, from NON_ADMIN_CONTROLLER_PATH
@@ -816,4 +828,8 @@ endif
816828

817829
.PHONY: build-must-gather
818830
build-must-gather: ## Build OADP Must-gather binary must-gather/oadp-must-gather
831+
ifeq ($(SKIP_MUST_GATHER),true)
832+
echo "Skipping must-gather build"
833+
else
819834
cd must-gather && go build -mod=mod -a -o oadp-must-gather cmd/main.go
835+
endif

docs/developer/testing/TESTING.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,17 @@ You can also execute make test-e2e with a $GINKGO_ARGS variable set. Example:
9999
make test-e2e GINKGO_ARGS="--ginkgo.focus='MySQL application DATAMOVER'"
100100
```
101101

102+
### Run selected test for HCP against existing HostedControlPlane
103+
104+
Set common env variables as mentioned above, then run:
105+
106+
```bash
107+
HC_BACKUP_RESTORE_MODE=existing \
108+
HC_NAME=hc1 \
109+
HC_KUBECONFIG=/path/to/kubeconfig/for/hosted/cluster \
110+
make test-e2e GINKGO_ARGS="--ginkgo.focus='HCP full Backup and Restore tests'"
111+
```
112+
102113
### Run tests with custom images
103114

104115
You can run tests with custom images by setting the following environment variables:

tests/e2e/backup_restore_suite_test.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ func runRestore(brCase BackupRestoreCase, backupName, restoreName string, nsRequ
237237

238238
func getFailedTestLogs(oadpNamespace string, appNamespace string, installTime time.Time, report ginkgo.SpecReport) {
239239
baseReportDir := artifact_dir + "/" + report.LeafNodeText
240+
log.Println("Storing failed test logs in: ", baseReportDir)
240241
err := os.MkdirAll(baseReportDir, 0755)
241242
gomega.Expect(err).NotTo(gomega.HaveOccurred())
242243

@@ -255,12 +256,12 @@ func getFailedTestLogs(oadpNamespace string, appNamespace string, installTime ti
255256

256257
func tearDownBackupAndRestore(brCase BackupRestoreCase, installTime time.Time, report ginkgo.SpecReport) {
257258
log.Println("Post backup and restore state: ", report.State.String())
259+
gatherLogs(brCase, installTime, report)
260+
tearDownDPAResources(brCase)
261+
deleteNamespace(brCase.Namespace)
262+
}
258263

259-
if report.Failed() {
260-
knownFlake = lib.CheckIfFlakeOccurred(accumulatedTestLogs)
261-
accumulatedTestLogs = nil
262-
getFailedTestLogs(namespace, brCase.Namespace, installTime, report)
263-
}
264+
func tearDownDPAResources(brCase BackupRestoreCase) {
264265
if brCase.BackupRestoreType == lib.CSI || brCase.BackupRestoreType == lib.CSIDataMover {
265266
log.Printf("Deleting VolumeSnapshot for CSI backuprestore of %s", brCase.Name)
266267
snapshotClassPath := fmt.Sprintf("./sample-applications/snapclass-csi/%s.yaml", provider)
@@ -270,10 +271,20 @@ func tearDownBackupAndRestore(brCase BackupRestoreCase, installTime time.Time, r
270271

271272
err := dpaCR.Delete()
272273
gomega.Expect(err).ToNot(gomega.HaveOccurred())
274+
}
275+
276+
func gatherLogs(brCase BackupRestoreCase, installTime time.Time, report ginkgo.SpecReport) {
277+
if report.Failed() {
278+
knownFlake = lib.CheckIfFlakeOccurred(accumulatedTestLogs)
279+
accumulatedTestLogs = nil
280+
getFailedTestLogs(namespace, brCase.Namespace, installTime, report)
281+
}
282+
}
273283

274-
err = lib.DeleteNamespace(kubernetesClientForSuiteRun, brCase.Namespace)
284+
func deleteNamespace(namespace string) {
285+
err := lib.DeleteNamespace(kubernetesClientForSuiteRun, namespace)
275286
gomega.Expect(err).ToNot(gomega.HaveOccurred())
276-
gomega.Eventually(lib.IsNamespaceDeleted(kubernetesClientForSuiteRun, brCase.Namespace), time.Minute*5, time.Second*5).Should(gomega.BeTrue())
287+
gomega.Eventually(lib.IsNamespaceDeleted(kubernetesClientForSuiteRun, namespace), time.Minute*5, time.Second*5).Should(gomega.BeTrue())
277288
}
278289

279290
var _ = ginkgo.Describe("Backup and restore tests", ginkgo.Ordered, func() {

tests/e2e/e2e_suite_test.go

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"k8s.io/client-go/dynamic"
1515
"k8s.io/client-go/kubernetes"
1616
"k8s.io/client-go/rest"
17+
"k8s.io/client-go/tools/clientcmd"
1718
ctrl "sigs.k8s.io/controller-runtime"
1819
"sigs.k8s.io/controller-runtime/pkg/client"
1920
"sigs.k8s.io/controller-runtime/pkg/client/config"
@@ -24,10 +25,11 @@ import (
2425

2526
var (
2627
// Common vars obtained from flags passed in ginkgo.
27-
bslCredFile, namespace, instanceName, provider, vslCredFile, settings, artifact_dir string
28-
flakeAttempts int64
28+
bslCredFile, namespace, instanceName, provider, vslCredFile, settings, artifact_dir, hcKubeconfig string
29+
flakeAttempts int64
2930

3031
kubernetesClientForSuiteRun *kubernetes.Clientset
32+
crClientForHC client.Client
3133
runTimeClientForSuiteRun client.Client
3234
dynamicClientForSuiteRun dynamic.Interface
3335

@@ -37,12 +39,15 @@ var (
3739
vslSecretName string
3840

3941
kubeConfig *rest.Config
42+
kubeConfigForHC *rest.Config
4043
knownFlake bool
4144
accumulatedTestLogs []string
4245

43-
kvmEmulation bool
44-
useUpstreamHco bool
45-
skipMustGather bool
46+
kvmEmulation bool
47+
useUpstreamHco bool
48+
skipMustGather bool
49+
hcBackupRestoreMode string
50+
hcName string
4651
)
4752

4853
func init() {
@@ -59,6 +64,9 @@ func init() {
5964
flag.BoolVar(&kvmEmulation, "kvm_emulation", true, "Enable or disable KVM emulation for virtualization testing")
6065
flag.BoolVar(&useUpstreamHco, "hco_upstream", false, "Force use of upstream virtualization operator")
6166
flag.BoolVar(&skipMustGather, "skipMustGather", false, "avoid errors with local execution and cluster architecture")
67+
flag.StringVar(&hcBackupRestoreMode, "hc_backup_restore_mode", string(HCModeCreate), "Type of HC test to run")
68+
flag.StringVar(&hcName, "hc_name", "", "Name of the HostedCluster to use for HCP tests")
69+
flag.StringVar(&hcKubeconfig, "hc_kubeconfig", "", "Path to kubeconfig file for HostedCluster")
6270

6371
// helps with launching debug sessions from IDE
6472
if os.Getenv("E2E_USE_ENV_FLAGS") == "true" {
@@ -115,6 +123,17 @@ func init() {
115123
log.Println("Error parsing SKIP_MUST_GATHER, must-gather will be enabled by default: ", err)
116124
}
117125
}
126+
if os.Getenv("HC_BACKUP_RESTORE_MODE") != "" {
127+
hcBackupRestoreMode = os.Getenv("HC_BACKUP_RESTORE_MODE")
128+
} else {
129+
hcBackupRestoreMode = string(HCModeCreate)
130+
}
131+
if os.Getenv("HC_NAME") != "" {
132+
hcName = os.Getenv("HC_NAME")
133+
}
134+
if os.Getenv("HC_KUBECONFIG") != "" {
135+
hcKubeconfig = os.Getenv("HC_KUBECONFIG")
136+
}
118137
}
119138

120139
}
@@ -123,15 +142,27 @@ func TestOADPE2E(t *testing.T) {
123142
flag.Parse()
124143

125144
var err error
145+
126146
kubeConfig = config.GetConfigOrDie()
127147
kubeConfig.QPS = 50
128148
kubeConfig.Burst = 100
129149

130150
gomega.RegisterFailHandler(ginkgo.Fail)
131151

152+
// Set up kubeConfigForHC if kubeconfig_hc flag is provided
153+
if hcKubeconfig != "" {
154+
kubeConfigForHC, err = clientcmd.BuildConfigFromFlags("", hcKubeconfig)
155+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
156+
kubeConfigForHC.QPS = kubeConfig.QPS
157+
kubeConfigForHC.Burst = kubeConfig.Burst
158+
}
159+
132160
kubernetesClientForSuiteRun, err = kubernetes.NewForConfig(kubeConfig)
133161
gomega.Expect(err).NotTo(gomega.HaveOccurred())
134162

163+
crClientForHC, err = client.New(kubeConfigForHC, client.Options{Scheme: lib.Scheme})
164+
gomega.Expect(err).NotTo(gomega.HaveOccurred())
165+
135166
runTimeClientForSuiteRun, err = client.New(kubeConfig, client.Options{Scheme: lib.Scheme})
136167
gomega.Expect(err).NotTo(gomega.HaveOccurred())
137168

@@ -200,7 +231,6 @@ var _ = ginkgo.AfterSuite(func() {
200231
gomega.Expect(err).ToNot(gomega.HaveOccurred())
201232
err = lib.DeleteSecret(kubernetesClientForSuiteRun, namespace, bslSecretNameWithCarriageReturn)
202233
gomega.Expect(err).ToNot(gomega.HaveOccurred())
203-
204234
log.Printf("Deleting DPA")
205235
err = dpaCR.Delete()
206236
gomega.Expect(err).ToNot(gomega.HaveOccurred())

tests/e2e/hcp_backup_restore_suite_test.go

Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,29 @@ import (
88

99
"github.com/onsi/ginkgo/v2"
1010
"github.com/onsi/gomega"
11+
"sigs.k8s.io/controller-runtime/pkg/client"
1112

1213
"github.com/openshift/oadp-operator/tests/e2e/lib"
1314
libhcp "github.com/openshift/oadp-operator/tests/e2e/lib/hcp"
1415
)
1516

16-
type HCPBackupRestoreCase struct {
17-
BackupRestoreCase
18-
Template string
19-
Provider string
20-
}
17+
type HCBackupRestoreMode string
2118

22-
func runHCPBackupAndRestore(brCase HCPBackupRestoreCase, updateLastBRcase func(brCase HCPBackupRestoreCase), h *libhcp.HCHandler) {
19+
const (
20+
HCModeCreate HCBackupRestoreMode = "create" // Create new HostedCluster for test
21+
HCModeExisting HCBackupRestoreMode = "existing" // Get existing HostedCluster
22+
// TODO: Add HCModeExistingROSA for ROSA where DPA and some other resources are already installed
23+
)
24+
25+
// runHCPBackupAndRestore is the unified function that handles both create and existing HC modes
26+
func runHCPBackupAndRestore(
27+
brCase HCPBackupRestoreCase,
28+
updateLastBRcase func(HCPBackupRestoreCase),
29+
updateLastInstallTime func(),
30+
h *libhcp.HCHandler,
31+
) {
2332
updateLastBRcase(brCase)
33+
updateLastInstallTime()
2434

2535
log.Printf("Preparing backup and restore")
2636
backupName, restoreName := prepareBackupAndRestore(brCase.BackupRestoreCase, func() {})
@@ -29,19 +39,41 @@ func runHCPBackupAndRestore(brCase HCPBackupRestoreCase, updateLastBRcase func(b
2939
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to add HCP plugin to DPA: %v", err)
3040
// TODO: move the wait for HC just after the DPA modification to allow reconciliation to go ahead without waiting for the HC to be created
3141

32-
//Wait for HCP plugin to be added
42+
// Wait for HCP plugin to be added
3343
gomega.Eventually(libhcp.IsHCPPluginAdded(h.Client, dpaCR.Namespace, dpaCR.Name), 3*time.Minute, 1*time.Second).Should(gomega.BeTrue())
3444

35-
// Create the HostedCluster for the test
3645
h.HCPNamespace = libhcp.GetHCPNamespace(brCase.BackupRestoreCase.Name, libhcp.ClustersNamespace)
37-
h.HostedCluster, err = h.DeployHCManifest(brCase.Template, brCase.Provider, brCase.BackupRestoreCase.Name)
38-
gomega.Expect(err).ToNot(gomega.HaveOccurred())
3946

47+
// Unified HostedCluster setup
48+
switch brCase.Mode {
49+
case HCModeCreate:
50+
// Create new HostedCluster for test
51+
h.HostedCluster, err = h.DeployHCManifest(brCase.Template, brCase.Provider, brCase.BackupRestoreCase.Name)
52+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
53+
case HCModeExisting:
54+
// Get existing HostedCluster
55+
h.HostedCluster, err = h.GetHostedCluster(brCase.BackupRestoreCase.Name, libhcp.ClustersNamespace)
56+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
57+
default:
58+
ginkgo.Fail(fmt.Sprintf("unknown HCP mode: %s", brCase.Mode))
59+
}
60+
61+
// Pre-backup verification
4062
if brCase.PreBackupVerify != nil {
41-
err := brCase.PreBackupVerify(runTimeClientForSuiteRun, brCase.Namespace)
63+
log.Printf("Validating HC pre-backup")
64+
err := brCase.PreBackupVerify(runTimeClientForSuiteRun, "" /*unused*/)
4265
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run HCP pre-backup verification: %v", err)
4366
}
4467

68+
if brCase.Mode == HCModeExisting {
69+
// Pre-backup verification for guest cluster
70+
if brCase.PreBackupVerifyGuest != nil {
71+
log.Printf("Validating guest cluster pre-backup")
72+
err := brCase.PreBackupVerifyGuest(crClientForHC, "" /*unused*/)
73+
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run pre-backup verification for guest cluster: %v", err)
74+
}
75+
}
76+
4577
// Backup HCP & HC
4678
log.Printf("Backing up HC")
4779
includedResources := libhcp.HCPIncludedResources
@@ -59,10 +91,32 @@ func runHCPBackupAndRestore(brCase HCPBackupRestoreCase, updateLastBRcase func(b
5991
log.Printf("Restoring HC")
6092
runHCPRestore(brCase.BackupRestoreCase, backupName, restoreName, nsRequiresResticDCWorkaround)
6193

62-
// Wait for HCP to be restored
63-
log.Printf("Validating HC")
64-
err = libhcp.ValidateHCP(libhcp.ValidateHCPTimeout, libhcp.Wait10Min, []string{}, h.HCPNamespace)(h.Client, libhcp.ClustersNamespace)
65-
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run HCP post-restore verification: %v", err)
94+
// Unified post-restore verification
95+
if brCase.PostRestoreVerify != nil {
96+
log.Printf("Validating HC post-restore")
97+
err = brCase.PostRestoreVerify(runTimeClientForSuiteRun, "" /*unused*/)
98+
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run HCP post-restore verification: %v", err)
99+
}
100+
101+
if brCase.Mode == HCModeExisting {
102+
// Post-restore verification for guest cluster
103+
if brCase.PostRestoreVerifyGuest != nil {
104+
log.Printf("Validating guest cluster post-restore")
105+
err := brCase.PostRestoreVerifyGuest(crClientForHC, "" /*unused*/)
106+
gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run post-restore verification for guest cluster: %v", err)
107+
}
108+
}
109+
}
110+
111+
type VerificationFunctionGuest func(client.Client, string) error
112+
113+
type HCPBackupRestoreCase struct {
114+
BackupRestoreCase
115+
Mode HCBackupRestoreMode
116+
PreBackupVerifyGuest VerificationFunctionGuest
117+
PostRestoreVerifyGuest VerificationFunctionGuest
118+
Template string // Optional: only used when Mode == HCPModeCreate
119+
Provider string // Optional: only used when Mode == HCPModeCreate
66120
}
67121

68122
var _ = ginkgo.Describe("HCP Backup and Restore tests", ginkgo.Ordered, func() {
@@ -78,8 +132,15 @@ var _ = ginkgo.Describe("HCP Backup and Restore tests", ginkgo.Ordered, func() {
78132
lastBRCase = brCase
79133
}
80134

135+
updateLastInstallTime := func() {
136+
lastInstallTime = time.Now()
137+
}
138+
81139
// Before All
82140
var _ = ginkgo.BeforeAll(func() {
141+
if hcBackupRestoreMode == string(HCModeExisting) {
142+
ginkgo.Skip("Skipping HCP backup and restore test for existing HCP")
143+
}
83144
reqOperators := []libhcp.RequiredOperator{
84145
{
85146
Name: libhcp.MCEName,
@@ -124,11 +185,12 @@ var _ = ginkgo.Describe("HCP Backup and Restore tests", ginkgo.Ordered, func() {
124185
if ginkgo.CurrentSpecReport().NumAttempts > 1 && !knownFlake {
125186
ginkgo.Fail("No known FLAKE found in a previous run, marking test as failed.")
126187
}
127-
runHCPBackupAndRestore(brCase, updateLastBRcase, h)
188+
runHCPBackupAndRestore(brCase, updateLastBRcase, updateLastInstallTime, h)
128189
},
129190

130191
// Test Cases
131192
ginkgo.Entry("None HostedCluster backup and restore", ginkgo.Label("hcp"), HCPBackupRestoreCase{
193+
Mode: HCModeCreate,
132194
Template: libhcp.HCPNoneManifest,
133195
Provider: "None",
134196
BackupRestoreCase: BackupRestoreCase{
@@ -142,6 +204,7 @@ var _ = ginkgo.Describe("HCP Backup and Restore tests", ginkgo.Ordered, func() {
142204
}, nil),
143205

144206
ginkgo.Entry("Agent HostedCluster backup and restore", ginkgo.Label("hcp"), HCPBackupRestoreCase{
207+
Mode: HCModeCreate,
145208
Template: libhcp.HCPAgentManifest,
146209
Provider: "Agent",
147210
BackupRestoreCase: BackupRestoreCase{

0 commit comments

Comments
 (0)