@@ -27,13 +27,15 @@ import (
2727	bootstrapv1 "sigs.k8s.io/cluster-api/api/bootstrap/kubeadm/v1beta2" 
2828	controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" 
2929	"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" 
30+ 	"sigs.k8s.io/cluster-api/feature" 
3031	"sigs.k8s.io/cluster-api/util/collections" 
3132)
3233
33- func  (r  * KubeadmControlPlaneReconciler ) upgradeControlPlane (
34+ func  (r  * KubeadmControlPlaneReconciler ) updateControlPlane (
3435	ctx  context.Context ,
3536	controlPlane  * internal.ControlPlane ,
36- 	machinesRequireUpgrade  collections.Machines ,
37+ 	machinesNeedingRollout  collections.Machines ,
38+ 	machinesNeedingRolloutResults  map [string ]internal.NotUpToDateResult ,
3739) (ctrl.Result , error ) {
3840	log  :=  ctrl .LoggerFrom (ctx )
3941
@@ -42,17 +44,17 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane(
4244	workloadCluster , err  :=  controlPlane .GetWorkloadCluster (ctx )
4345	if  err  !=  nil  {
4446		log .Error (err , "failed to get remote client for workload cluster" , "Cluster" , klog .KObj (controlPlane .Cluster ))
45- 		return  ctrl.Result {}, err 
47+ 		return  ctrl.Result {}, errors . Wrapf ( err ,  "failed to update control plane" ) 
4648	}
4749
4850	parsedVersion , err  :=  semver .ParseTolerant (controlPlane .KCP .Spec .Version )
4951	if  err  !=  nil  {
50- 		return  ctrl.Result {}, errors .Wrapf (err , "failed to parse kubernetes  version %q" , controlPlane .KCP .Spec .Version )
52+ 		return  ctrl.Result {}, errors .Wrapf (err , "failed to update control plane: failed to  parse Kubernetes  version %q" , controlPlane .KCP .Spec .Version )
5153	}
5254
5355	// Ensure kubeadm clusterRoleBinding for v1.29+ as per https://github.com/kubernetes/kubernetes/pull/121305 
5456	if  err  :=  workloadCluster .AllowClusterAdminPermissions (ctx , parsedVersion ); err  !=  nil  {
55- 		return  ctrl.Result {}, errors .Wrap (err , "failed to set cluster-admin ClusterRoleBinding for kubeadm" )
57+ 		return  ctrl.Result {}, errors .Wrap (err , "failed to update control plane: failed to  set cluster-admin ClusterRoleBinding for kubeadm" )
5658	}
5759
5860	kubeadmCMMutators  :=  make ([]func (* bootstrapv1.ClusterConfiguration ), 0 )
@@ -81,21 +83,75 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane(
8183
8284	// collectively update Kubeadm config map 
8385	if  err  =  workloadCluster .UpdateClusterConfiguration (ctx , parsedVersion , kubeadmCMMutators ... ); err  !=  nil  {
84- 		return  ctrl.Result {}, err 
86+ 		return  ctrl.Result {}, errors . Wrapf ( err ,  "failed to update control plane" ) 
8587	}
8688
8789	switch  controlPlane .KCP .Spec .Rollout .Strategy .Type  {
8890	case  controlplanev1 .RollingUpdateStrategyType :
8991		// RolloutStrategy is currently defaulted and validated to be RollingUpdate 
90- 		// We can ignore MaxUnavailable because we are enforcing health checks before we get here. 
91- 		maxNodes  :=  * controlPlane .KCP .Spec .Replicas  +  int32 (controlPlane .KCP .Spec .Rollout .Strategy .RollingUpdate .MaxSurge .IntValue ())
92- 		if  int32 (controlPlane .Machines .Len ()) <  maxNodes  {
93- 			// scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs 
94- 			return  r .scaleUpControlPlane (ctx , controlPlane )
92+ 		res , err  :=  r .rollingUpdate (ctx , controlPlane , machinesNeedingRollout , machinesNeedingRolloutResults )
93+ 		if  err  !=  nil  {
94+ 			return  ctrl.Result {}, errors .Wrapf (err , "failed to update control plane" )
9595		}
96- 		return  r . scaleDownControlPlane ( ctx ,  controlPlane ,  machinesRequireUpgrade ) 
96+ 		return  res ,  nil 
9797	default :
9898		log .Info ("RolloutStrategy type is not set to RollingUpdate, unable to determine the strategy for rolling out machines" )
9999		return  ctrl.Result {}, nil 
100100	}
101101}
102+ 
103+ func  (r  * KubeadmControlPlaneReconciler ) rollingUpdate (
104+ 	ctx  context.Context ,
105+ 	controlPlane  * internal.ControlPlane ,
106+ 	machinesNeedingRollout  collections.Machines ,
107+ 	machinesNeedingRolloutResults  map [string ]internal.NotUpToDateResult ,
108+ ) (ctrl.Result , error ) {
109+ 	currentReplicas  :=  int32 (controlPlane .Machines .Len ())
110+ 	currentUpToDateReplicas  :=  int32 (len (controlPlane .UpToDateMachines ()))
111+ 	desiredReplicas  :=  * controlPlane .KCP .Spec .Replicas 
112+ 	maxSurge  :=  int32 (controlPlane .KCP .Spec .Rollout .Strategy .RollingUpdate .MaxSurge .IntValue ())
113+ 	// Note: As MaxSurge is validated to be either 0 or 1, maxReplicas will be either desiredReplicas or desiredReplicas+1 
114+ 	maxReplicas  :=  desiredReplicas  +  maxSurge 
115+ 
116+ 	// If currentReplicas < maxReplicas we have to scale up 
117+ 	// Note: This is done to ensure we have as many Machines as allowed during rollout to maximize fault tolerance. 
118+ 	if  currentReplicas  <  maxReplicas  {
119+ 		// Note: scaleUpControlPlane ensures that we don't continue scale up while waiting for Machines to have NodeRefs. 
120+ 		return  r .scaleUpControlPlane (ctx , controlPlane )
121+ 	}
122+ 
123+ 	// If currentReplicas >= maxReplicas we have to scale down 
124+ 	// Note: If we are already at or above the maximum Machines we have to in-place update or delete a Machine 
125+ 	// to make progress with the update (as we cannot create additional new Machines above the maximum). 
126+ 
127+ 	// Pick the Machine that we should in-place update or scale down. 
128+ 	machineToInPlaceUpdateOrScaleDown , err  :=  selectMachineForInPlaceUpdateOrScaleDown (ctx , controlPlane , machinesNeedingRollout )
129+ 	if  err  !=  nil  {
130+ 		return  ctrl.Result {}, errors .Wrap (err , "failed to select next Machine for rollout" )
131+ 	}
132+ 	machinesNeedingRolloutResult , ok  :=  machinesNeedingRolloutResults [machineToInPlaceUpdateOrScaleDown .Name ]
133+ 	if  ! ok  {
134+ 		// Note: This should never happen as we store results for all Machines in machinesNeedingRolloutResults. 
135+ 		return  ctrl.Result {}, errors .Errorf ("failed to check if Machine is UpToDate %s" , machineToInPlaceUpdateOrScaleDown .Name )
136+ 	}
137+ 
138+ 	// If the selected Machine is eligible for in-place update and we don't already have enough up-to-date replicas, try in-place update. 
139+ 	// Note: To be safe we only try an in-place update when we would otherwise delete a Machine. This ensures we could 
140+ 	// afford if the in-place update fails and the Machine becomes unavailable (and eventually MHC kicks in and the Machine is recreated). 
141+ 	if  feature .Gates .Enabled (feature .InPlaceUpdates ) && 
142+ 		machinesNeedingRolloutResult .EligibleForInPlaceUpdate  &&  currentUpToDateReplicas  <  desiredReplicas  {
143+ 		fallbackToScaleDown , res , err  :=  r .tryInPlaceUpdate (ctx , controlPlane , machineToInPlaceUpdateOrScaleDown , machinesNeedingRolloutResult )
144+ 		if  err  !=  nil  {
145+ 			return  ctrl.Result {}, err 
146+ 		}
147+ 		if  ! res .IsZero () {
148+ 			return  res , nil 
149+ 		}
150+ 		if  fallbackToScaleDown  {
151+ 			return  r .scaleDownControlPlane (ctx , controlPlane , machineToInPlaceUpdateOrScaleDown )
152+ 		}
153+ 		// In-place update triggered 
154+ 		return  ctrl.Result {}, nil  // Note: Requeue is not needed, changes to Machines trigger another reconcile. 
155+ 	}
156+ 	return  r .scaleDownControlPlane (ctx , controlPlane , machineToInPlaceUpdateOrScaleDown )
157+ }
0 commit comments