Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions changelog/fragments/helm-operator-rollback.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# entries is a list of entries to include in
# release notes and/or the migration guide
entries:
- description: >
For Helm-based operators, whenever the operator encounters an
error during reconcilliation, it would attempt to rollback the
changes with the `--force` option. This behavior could have
undesired side effects in some scenario.

This change allows the users to change this behavior by adding the
annotation, `helm.sdk.operatorframework.io/rollback-force: false`
to the custom resource.

# kind is one of:
# - addition
# - change
# - deprecation
# - removal
# - bugfix
kind: "addition"

# Is this a breaking change?
breaking: false

# NOTE: ONLY USE `pull_request_override` WHEN ADDING THIS
# FILE FOR A PREVIOUSLY MERGED PULL_REQUEST!
#
# The generator auto-detects the PR number from the commit
# message in which this file was originally added.
#
# What is the pull request number (without the "#")?
# pull_request_override: 0

27 changes: 27 additions & 0 deletions internal/helm/controller/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"strconv"
"strings"
"time"

rpb "helm.sh/helm/v3/pkg/release"
Expand Down Expand Up @@ -63,6 +64,7 @@ const (
uninstallFinalizerLegacy = "uninstall-helm-release"

helmUpgradeForceAnnotation = "helm.sdk.operatorframework.io/upgrade-force"
helmRollbackForceAnnotation = "helm.sdk.operatorframework.io/rollback-force"
helmUninstallWaitAnnotation = "helm.sdk.operatorframework.io/uninstall-wait"
helmReconcilePeriodAnnotation = "helm.sdk.operatorframework.io/reconcile-period"
)
Expand Down Expand Up @@ -311,8 +313,18 @@ func (r HelmOperatorReconciler) Reconcile(ctx context.Context, request reconcile
"Chart value %q overridden to %q by operator's watches.yaml", k, v)
}
force := hasAnnotation(helmUpgradeForceAnnotation, o)

previousRelease, upgradedRelease, err := manager.UpgradeRelease(ctx, release.ForceUpgrade(force))
if err != nil {
if errors.Is(err, release.ErrUpgradeFailed) {
// the forceRollback variable takes the value of the annotation,
// "helm.sdk.operatorframework.io/rollback-force".
// The default value for the annotation is true
forceRollback := readBoolAnnotationWithDefault(o, helmRollbackForceAnnotation, true)
if err := manager.RollBack(ctx, release.ForceRollback(forceRollback)); err != nil {
log.Error(err, "Error rolling back release")
}
}
log.Error(err, "Release failed")
status.SetCondition(types.HelmAppCondition{
Type: types.ConditionReleaseFailed,
Expand Down Expand Up @@ -446,6 +458,21 @@ func hasAnnotation(anno string, o *unstructured.Unstructured) bool {
return value
}

func readBoolAnnotationWithDefault(obj *unstructured.Unstructured, annotation string, fallback bool) bool {
val, ok := obj.GetAnnotations()[annotation]
if !ok {
return fallback
}
r, err := strconv.ParseBool(val)
if err != nil {
log.Error(
fmt.Errorf(strings.ToLower(err.Error())), "error parsing annotation", "annotation", annotation)
return fallback
}

return r
}

func (r HelmOperatorReconciler) updateResource(ctx context.Context, o client.Object) error {
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
return r.Client.Update(ctx, o)
Expand Down
61 changes: 61 additions & 0 deletions internal/helm/controller/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,64 @@ func annotations(m map[string]interface{}) *unstructured.Unstructured {
},
}
}

func Test_readBoolAnnotationWithDefault(t *testing.T) {
objBuilder := func(anno map[string]string) *unstructured.Unstructured {
object := &unstructured.Unstructured{}
object.SetAnnotations(anno)
return object
}

type args struct {
obj *unstructured.Unstructured
annotation string
fallback bool
}

tests := []struct {
name string
args args
want bool
}{
{
name: "Should return value of annotation read",
args: args{
obj: objBuilder(map[string]string{
"helm.sdk.operatorframework.io/rollback-force": "false",
}),
annotation: "helm.sdk.operatorframework.io/rollback-force",
fallback: true,
},
want: false,
},
{
name: "Should return fallback when annotation is not present",
args: args{
obj: objBuilder(map[string]string{
"helm.sdk.operatorframework.io/upgrade-force": "true",
}),
annotation: "helm.sdk.operatorframework.io/rollback-force",
fallback: false,
},
want: false,
},
{
name: "Should return fallback when errors while parsing bool value",
args: args{
obj: objBuilder(map[string]string{
"helm.sdk.operatorframework.io/rollback-force": "force",
}),
annotation: "helm.sdk.operatorframework.io/rollback-force",
fallback: true,
},
want: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if got := readBoolAnnotationWithDefault(tc.args.obj, tc.args.annotation, tc.args.fallback); got != tc.want {
assert.Equal(t, tc.want, got, "readBoolAnnotationWithDefault() function")
}
})
}
}
39 changes: 32 additions & 7 deletions internal/helm/release/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type Manager interface {
Sync(context.Context) error
InstallRelease(context.Context, ...InstallOption) (*rpb.Release, error)
UpgradeRelease(context.Context, ...UpgradeOption) (*rpb.Release, *rpb.Release, error)
RollBack(context.Context, ...RollBackOption) error
ReconcileRelease(context.Context) (*rpb.Release, error)
UninstallRelease(context.Context, ...UninstallOption) (*rpb.Release, error)
CleanupRelease(context.Context, string) (bool, error)
Expand All @@ -79,6 +80,7 @@ type manager struct {
type InstallOption func(*action.Install) error
type UpgradeOption func(*action.Upgrade) error
type UninstallOption func(*action.Uninstall) error
type RollBackOption func(*action.Rollback) error

// ReleaseName returns the name of the release.
func (m manager) ReleaseName() string {
Expand Down Expand Up @@ -202,10 +204,13 @@ func ForceUpgrade(force bool) UpgradeOption {
}
}

var ErrUpgradeFailed = errors.New("upgrade failed; rollback required")

// UpgradeRelease performs a Helm release upgrade.
func (m manager) UpgradeRelease(ctx context.Context, opts ...UpgradeOption) (*rpb.Release, *rpb.Release, error) {
upgrade := action.NewUpgrade(m.actionConfig)
upgrade.Namespace = m.namespace

for _, o := range opts {
if err := o(upgrade); err != nil {
return nil, nil, fmt.Errorf("failed to apply upgrade option: %w", err)
Expand All @@ -216,24 +221,44 @@ func (m manager) UpgradeRelease(ctx context.Context, opts ...UpgradeOption) (*rp
if err != nil {
// Workaround for helm/helm#3338
if upgradedRelease != nil {
rollback := action.NewRollback(m.actionConfig)
rollback.Force = true

// As of Helm 2.13, if UpgradeRelease returns a non-nil release, that
// means the release was also recorded in the release store.
// Therefore, we should perform the rollback when we have a non-nil
// release. Any rollback error here would be unexpected, so always
// log both the upgrade and rollback errors.
rollbackErr := rollback.Run(m.releaseName)
if rollbackErr != nil {
return nil, nil, fmt.Errorf("failed upgrade (%s) and failed rollback: %w", err, rollbackErr)
}
fmt.Printf("release upgrade failed; %v", err)

return nil, nil, ErrUpgradeFailed
}
return nil, nil, fmt.Errorf("failed to upgrade release: %w", err)
}
return m.deployedRelease, upgradedRelease, err
}

func ForceRollback(force bool) RollBackOption {
return func(r *action.Rollback) error {
r.Force = force
return nil
}
}

// RollBack attempts to reverse any partially applied releases
func (m manager) RollBack(ctx context.Context, opts ...RollBackOption) error {
rollback := action.NewRollback(m.actionConfig)

for _, fn := range opts {
if err := fn(rollback); err != nil {
return fmt.Errorf("failed to apply rollback option: %w", err)
}
}

if err := rollback.Run(m.releaseName); err != nil {
return fmt.Errorf("rollback failed: %w", err)
}

return nil
}

// ReconcileRelease creates or patches resources as necessary to match the
// deployed release's manifest.
func (m manager) ReconcileRelease(ctx context.Context) (*rpb.Release, error) {
Expand Down
2 changes: 1 addition & 1 deletion testdata/ansible/memcached-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
2 changes: 1 addition & 1 deletion testdata/go/v3/memcached-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
2 changes: 1 addition & 1 deletion testdata/go/v3/monitoring/memcached-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
2 changes: 1 addition & 1 deletion testdata/go/v4-alpha/memcached-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
2 changes: 1 addition & 1 deletion testdata/helm/memcached-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ endif

# Set the Operator SDK version to use. By default, what is installed on the system is used.
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.30.0
OPERATOR_SDK_VERSION ?= v1.31.0

# Image URL to use all building/pushing image targets
IMG ?= controller:latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,19 @@ metadata:
The value that is present under this key must be in the h/m/s format. For example, 1h2m4s, 3m0s, 4s are all valid values, but 1x3m9s is invalid.

**NOTE**: This is just one way of specifying the reconcile period for Helm-based operators. There are two other ways: using the `--reconcile-period` command-line flag and under the 'reconcilePeriod' key in the watches.yaml file. If these three methods are used simultaneously to specify reconcile period (which they should not be), the order of precedence is as follows:
Custom Resource Annotations > watches.yaml > command-line flag.
Custom Resource Annotations > watches.yaml > command-line flag.

## `helm.sdk.operatorframework.io/rollback-force`

Whenever a helm-based operator encounters an error during reconcilliation, by default, it would attempt to perform a rollback with the `--force` option. While this works as expected in most scenarios, there are a few edge cases where performing a rollback with `--force` could have undesired side effects.

```sh
...
metadata:
name: nginx-sample
annotations:
helm.sdk.operatorframework.io/rollback-force: false
...
```

Adding annotation to the custom resource, `helm.sdk.operatorframework.io/rollback-force: false` therefore allows a user, to change the default behavior of the helm-based operator whereby, rollbacks will be performed without the `--force` option whenever an error is encountered.