From 5adaf893ffb29c681bb58317b60c9a0752393fcc Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 29 Oct 2025 11:24:50 +0530 Subject: [PATCH 01/79] [WIP] Add support for machine preservation through annotations # Conflicts: # pkg/util/provider/machinecontroller/machine.go # pkg/util/provider/machinecontroller/machine_util.go --- ...achine.sapcloud.io_machinedeployments.yaml | 4 + .../crds/machine.sapcloud.io_machines.yaml | 9 ++ .../crds/machine.sapcloud.io_machinesets.yaml | 9 ++ machine-controller-manager | 1 + pkg/apis/machine/types.go | 3 + pkg/apis/machine/v1alpha1/machine_types.go | 6 + pkg/apis/machine/v1alpha1/machineset_types.go | 4 + pkg/apis/machine/v1alpha1/shared_types.go | 4 + pkg/controller/machineset.go | 1 + .../provider/machinecontroller/machine.go | 60 ++++++++++ .../machinecontroller/machine_util.go | 111 +++++++++++++++++- pkg/util/provider/machineutils/utils.go | 23 ++++ pkg/util/provider/options/types.go | 3 + 13 files changed, 236 insertions(+), 2 deletions(-) create mode 160000 machine-controller-manager diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index d836282a2..848dc96ce 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -419,6 +419,10 @@ spec: type: boolean type: object type: object + preserveTimeout: + description: MachinePreserveTimeout is the timeout after the + machine preservation is stopped + type: string providerID: description: ProviderID represents the provider's unique ID given to a machine diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml index 9378f0274..1d9150c4d 100644 --- a/kubernetes/crds/machine.sapcloud.io_machines.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machines.yaml @@ -216,6 +216,10 @@ spec: type: boolean type: object type: object + preserveTimeout: + description: MachinePreserveTimeout is the timeout after the machine + preservation is stopped + type: string providerID: description: ProviderID represents the provider's unique ID given to a machine @@ -287,6 +291,11 @@ spec: description: MachinePhase is a label for the condition of a machine at the current time. type: string + preserveExpiryTime: + description: PreserveExpiryTime is the time at which MCM will + stop preserving the machine + format: date-time + type: string timeoutActive: type: boolean type: object diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index c569dffe1..9a6f616fc 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -301,6 +301,10 @@ spec: type: boolean type: object type: object + preserveTimeout: + description: MachinePreserveTimeout is the timeout after the + machine preservation is stopped + type: string providerID: description: ProviderID represents the provider's unique ID given to a machine @@ -312,6 +316,11 @@ spec: description: MachineSetStatus holds the most recently observed status of MachineSet. properties: + autoPreserveFailedMachineCount: + description: AutoPreserveFailedMachineCount is the number of machines + in the machine set that have been auto-preserved upon failure + format: int32 + type: integer availableReplicas: description: The number of available replicas (ready for at least minReadySeconds) for this replica set. diff --git a/machine-controller-manager b/machine-controller-manager new file mode 160000 index 000000000..f2cbb0378 --- /dev/null +++ b/machine-controller-manager @@ -0,0 +1 @@ +Subproject commit f2cbb037802eb399e7b655388ef6e182c90cb70f diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 0facf09ba..4f6c6a8d5 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -97,6 +97,9 @@ type MachineConfiguration struct { // MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed. MachineInPlaceUpdateTimeout *metav1.Duration + // MachinePreserveTimeout is the timeout after the machine preservation is stopped + // +optional + MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` // DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. // This is intended to be used only for in-place updates. DisableHealthTimeout *bool diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 19e017925..72ac95880 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -204,6 +204,9 @@ const ( // MachineOperationDelete indicates that the operation was a delete MachineOperationDelete MachineOperationType = "Delete" + + // MachineOperationPreserve indicates that the operation was a preserve + MachineOperationPreserve MachineOperationType = "Preserve" ) // The below types are used by kube_client and api_server. @@ -252,6 +255,9 @@ type CurrentStatus struct { // Last update time of current status LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` + + // PreserveExpiryTime is the time at which MCM will stop preserving the machine + PreserveExpiryTime metav1.Time `json:"preserveExpiryTime,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 2e6eb1d6e..b9ad20ec4 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -135,4 +135,8 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed // +optional FailedMachines *[]MachineSummary `json:"failedMachines,omitempty"` + + // AutoPreserveFailedMachineCount is the number of machines in the machine set that have been auto-preserved upon failure + // +optional + AutoPreserveFailedMachineCount int32 `json:"autoPreserveFailedMachineCount,omitempty"` } diff --git a/pkg/apis/machine/v1alpha1/shared_types.go b/pkg/apis/machine/v1alpha1/shared_types.go index 687151218..832254149 100644 --- a/pkg/apis/machine/v1alpha1/shared_types.go +++ b/pkg/apis/machine/v1alpha1/shared_types.go @@ -44,6 +44,10 @@ type MachineConfiguration struct { // +optional MachineInPlaceUpdateTimeout *metav1.Duration `json:"inPlaceUpdateTimeout,omitempty"` + // MachinePreserveTimeout is the timeout after the machine preservation is stopped + // +optional + MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` + // DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. // This is intended to be used only for in-place updates. // +optional diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index acd41e80e..6dd5027da 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -658,6 +658,7 @@ func slowStartBatch(count int, initialBatchSize int, fn func() error) (int, erro return successes, nil } +// TODO@thiyyakat: ensure preserved machines are the last to be deleted func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1alpha1.Machine { // No need to sort machines if we are about to delete all of them. // diff will always be <= len(filteredMachines), so not need to handle > case. diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 108f6f1d1..608189b25 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -724,3 +724,63 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { _, isMachineInCreationFlow := c.pendingMachineCreationMap.Load(getMachineKey(machine)) return isMachineInCreationFlow } + +// TODO@thiyyakat: check case where, preserved and annotated but times out. Not handled currently +// possible cases: +// 1. Annotated +// - already preserved, check for timeout +// - already preserved, check for explicit stop preservation +// - needs to be preserved on failure +// - needs to be preserved now +// 2. Unannotated +// - failed machine, autoPreserveMax not breached, must be preserved +// - failed machine, already preserved, check for timeout +// Auto-preserve case will have to be handled where machine moved from Unknown to Failed + +func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + // check if rolling update is ongoing, if yes, do nothing + machineDeployment, err := c.getMachineDeploymentForMachine(machine) + if err != nil { + klog.Errorf("Error getting machine deployment for machine %q: %s", machine.Name, err) + return machineutils.ShortRetry, err + } + for _, c := range machineDeployment.Status.Conditions { + if c.Type == v1alpha1.MachineDeploymentProgressing { + if c.Status == v1alpha1.ConditionTrue { + return machineutils.LongRetry, nil + } + break + } + } + // check if machine needs to be preserved due to annotation + isPreserved := machineutils.IsMachinePreserved(machine) + value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + if !isPreserved && exists { + switch value { + case machineutils.PreserveMachineAnnotationValueNow: + return c.preserveMachine(ctx, machine) + case machineutils.PreserveMachineAnnotationValueWhenFailed: + // check if machine is in Failed state + if machineutils.IsMachineFailed(machine) { + return c.preserveMachine(ctx, machine) + } + } + } else if isPreserved { + if value == machineutils.PreserveMachineAnnotationValueFalse || metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { + return c.stopMachinePreservation(ctx, machine) + } + } + // if the machine is neither preserved nor annotated, need not handle it here. Auto-preservation + // handled on failure + return machineutils.LongRetry, nil +} + +// getMachineDeploymentForMachine returns the machine deployment for a given machine +func (c *controller) getMachineDeploymentForMachine(machine *v1alpha1.Machine) (*v1alpha1.MachineDeployment, error) { + machineDeploymentName := getMachineDeploymentName(machine) + machineDeployment, err := c.controlMachineClient.MachineDeployments(c.namespace).Get(context.TODO(), machineDeploymentName, metav1.GetOptions{ + TypeMeta: metav1.TypeMeta{}, + ResourceVersion: "", + }) + return machineDeployment, err +} diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 36fa6f414..ef1a54a42 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -29,6 +29,8 @@ import ( "errors" "fmt" "maps" + "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" + "github.com/gardener/machine-controller-manager/pkg/util/annotations" "math" "runtime" "strconv" @@ -1293,7 +1295,8 @@ func (c *controller) setMachineTerminationStatus(ctx context.Context, deleteMach clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineTerminating, // TimeoutActive: false, - LastUpdateTime: metav1.Now(), + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{}, } _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) @@ -2034,7 +2037,7 @@ func (c *controller) getEffectiveHealthTimeout(machine *v1alpha1.Machine) *metav return effectiveHealthTimeout } -// getEffectiveHealthTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag. +// getEffectiveCreationTimeout returns the creationTimeout set on the machine-object, otherwise returns the timeout set using the global-flag. func (c *controller) getEffectiveCreationTimeout(machine *v1alpha1.Machine) *metav1.Duration { var effectiveCreationTimeout *metav1.Duration if machine.Spec.MachineConfiguration != nil && machine.Spec.MachineConfiguration.MachineCreationTimeout != nil { @@ -2333,3 +2336,107 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { } return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } + +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + clone := machine.DeepCopy() + klog.V(2).Infof("Preserving machine %q", machine.Name) + clone.Status.LastOperation = v1alpha1.LastOperation{ + Description: "Preserving machine", + State: v1alpha1.MachineStateSuccessful, + Type: v1alpha1.MachineOperationPreserve, + LastUpdateTime: metav1.Now(), + } + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: clone.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + } + // if backing node exists, add annotations to prevent scale down by autoscaler + if machine.Labels[v1alpha1.NodeLabelKey] != "" { + clusterAutoscalerScaleDownAnnotations := make(map[string]string) + clusterAutoscalerScaleDownAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + // We do this to avoid accidentally deleting the user provided annotations. + clusterAutoscalerScaleDownAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("Error trying to get node %q: %v", nodeName, err) + return machineutils.ShortRetry, err + } + updatedNode, _, err := annotations.AddOrUpdateAnnotation(node, clusterAutoscalerScaleDownAnnotations) + if err != nil { + klog.Warningf("Adding annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) + } + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Error trying to update node %q: %v", nodeName, err) + return machineutils.ShortRetry, err + } + } + _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + // Keep retrying until update goes through + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + } else { + klog.V(2).Infof("Machine %q status updated to preserved ", machine.Name) + // Return error even when machine object is updated to ensure reconcilation is restarted + err = fmt.Errorf("machine preservation in process") + } + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err +} + +func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + clone := machine.DeepCopy() + delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) + delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) + delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + // if backing node exists, remove annotations that would prevent scale down by autoscaler + if machine.Labels[v1alpha1.NodeLabelKey] != "" { + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("Error trying to get node %q: %v", nodeName, err) + return machineutils.ShortRetry, err + } + //remove annotations from node, values do not matter here + preservationAnnotations := make(map[string]string) + preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" + preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = "" + preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = "" + updatedNode, _, err := annotations.RemoveAnnotation(node, preservationAnnotations) + if err != nil { + klog.Warningf("Removing annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) + } + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Error trying to update node %q: %v", nodeName, err) + return machineutils.ShortRetry, err + } + } + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: clone.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{}, + } + _, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + // Keep retrying until update goes through + klog.Errorf("Machine UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + return machineutils.ShortRetry, err + } else { + klog.V(2).Infof("Machine %q updated to stop preservation ", machine.Name) + // Return error even when machine object is updated to ensure reconcilation is restarted + } + // if machine is in failed state transition to Terminating + if clone.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, machine.Name, metav1.DeleteOptions{}) + if err != nil { + klog.Errorf("Error trying to delete machine %q: %v", machine.Name, err) + return machineutils.ShortRetry, err + } + } + return machineutils.LongRetry, nil +} diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 141e3a09c..240aeb2a8 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -81,6 +81,21 @@ const ( // LabelKeyMachineSetScaleUpDisabled is the label key that indicates scaling up of the machine set is disabled. LabelKeyMachineSetScaleUpDisabled = "node.machine.sapcloud.io/scale-up-disabled" + + // PreserveMachineAnnotationKey is the annotation used to explicitly request that a Machine be preserved + PreserveMachineAnnotationKey = "node.machine.sapcloud.io/preserve" + + // PreserveMachineAnnotationValueNow is the annotation value used to explicitly request that + // a Machine be preserved immediately in its current phase + PreserveMachineAnnotationValueNow = "now" + + // PreserveMachineAnnotationValueWhenFailed is the annotation value used to explicitly request that + // a Machine be preserved if and when in it enters Failed phase + PreserveMachineAnnotationValueWhenFailed = "when-failed" + + //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that + // a Machine should not be preserved any longer, even if the expiry timeout has not been reached + PreserveMachineAnnotationValueFalse = "false" ) // RetryPeriod is an alias for specifying the retry period @@ -124,3 +139,11 @@ func IsMachineFailed(p *v1alpha1.Machine) bool { func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } + +// IsMachinePreserved checks if machine is preserved by MCM +func IsMachinePreserved(m *v1alpha1.Machine) bool { + if !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() { + return true + } + return false +} diff --git a/pkg/util/provider/options/types.go b/pkg/util/provider/options/types.go index d1be4c2c9..08ff30da7 100644 --- a/pkg/util/provider/options/types.go +++ b/pkg/util/provider/options/types.go @@ -97,6 +97,9 @@ type SafetyOptions struct { PvDetachTimeout metav1.Duration // Timeout (in duration) used while waiting for PV to reattach on new node PvReattachTimeout metav1.Duration + // Timeout (in duration) used while preserving a machine, + // beyond which preservation is stopped + MachinePreserveTimeout metav1.Duration // Timeout (in duration) for which the APIServer can be down before // declare the machine controller frozen by safety controller From 5fc6abbc53598e15747c927a00008319e9512fce Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 10:38:38 +0530 Subject: [PATCH 02/79] Add MachinePreserveTimeout to SafetyOptions. --- pkg/util/provider/app/options/options.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 9e66b4b27..00e5ced7e 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -78,6 +78,7 @@ func NewMCServer() *MCServer { MachineSafetyOrphanVMsPeriod: metav1.Duration{Duration: 15 * time.Minute}, MachineSafetyAPIServerStatusCheckPeriod: metav1.Duration{Duration: 1 * time.Minute}, MachineSafetyAPIServerStatusCheckTimeout: metav1.Duration{Duration: 30 * time.Second}, + MachinePreserveTimeout: metav1.Duration{Duration: 3 * time.Hour}, }, }, } From d8a0764f32518597265fdb33870630be6e42e71d Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 10:39:14 +0530 Subject: [PATCH 03/79] Add PreserveExpiryTime to `machine.Status.CurrentStatus`. --- pkg/apis/machine/types.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 4f6c6a8d5..03c09f9d9 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -161,6 +161,9 @@ type CurrentStatus struct { // Last update time of current status LastUpdateTime metav1.Time + + // PreserveExpiryTime is the time at which MCM will stop preserving the machine + PreserveExpiryTime metav1.Time } // MachineStatus holds the most recently observed status of Machine. From bba1108e3b2a8b364f3dab15e9acbf6656df695c Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 10:41:55 +0530 Subject: [PATCH 04/79] Remove `AutoPreserveFailedMachineCount` from machine set --- pkg/apis/machine/v1alpha1/machineset_types.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index b9ad20ec4..2e6eb1d6e 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -135,8 +135,4 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed // +optional FailedMachines *[]MachineSummary `json:"failedMachines,omitempty"` - - // AutoPreserveFailedMachineCount is the number of machines in the machine set that have been auto-preserved upon failure - // +optional - AutoPreserveFailedMachineCount int32 `json:"autoPreserveFailedMachineCount,omitempty"` } From 7e86537dec6c440f34482131e07241f8ecb75237 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 10:42:31 +0530 Subject: [PATCH 05/79] Fix linting error --- pkg/util/provider/machineutils/utils.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 240aeb2a8..afba674e0 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -142,8 +142,5 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // IsMachinePreserved checks if machine is preserved by MCM func IsMachinePreserved(m *v1alpha1.Machine) bool { - if !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() { - return true - } - return false + return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } From f0586745f9b26d7bdcc1d73500e2a1d3023ddaa8 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 10:44:31 +0530 Subject: [PATCH 06/79] Add generated files --- docs/documents/apis.md | 39 +++++++++++++++++-- .../v1alpha1/zz_generated.conversion.go | 4 ++ .../machine/v1alpha1/zz_generated.deepcopy.go | 6 +++ pkg/apis/machine/zz_generated.deepcopy.go | 6 +++ pkg/openapi/api_violations.report | 1 + pkg/openapi/openapi_generated.go | 18 +++++++++ 6 files changed, 70 insertions(+), 4 deletions(-) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index 1ed350d9e..a9be985cb 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -833,6 +833,21 @@ Kubernetes meta/v1.Time

Last update time of current status

+ + +preserveExpiryTime + + + + +Kubernetes meta/v1.Time + + + + +

PreserveExpiryTime is the time at which MCM will stop preserving the machine

+ +
@@ -1071,6 +1086,22 @@ Kubernetes meta/v1.Duration +preserveTimeout + + + + +Kubernetes meta/v1.Duration + + + + +(Optional) +

MachinePreserveTimeout is the timeout after the machine preservation is stopped

+ + + + disableHealthTimeout @@ -1543,8 +1574,8 @@ newest MachineSet.

- -[]*../../pkg/apis/machine/v1alpha1.MachineSummary + +[]*github.com/thiyyakat/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary @@ -1988,8 +2019,8 @@ LastOperation - -[]../../pkg/apis/machine/v1alpha1.MachineSummary + +[]github.com/thiyyakat/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go index 7d5d1b485..fcca5ee3f 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -348,6 +348,7 @@ func autoConvert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in *CurrentStat out.Phase = machine.MachinePhase(in.Phase) out.TimeoutActive = in.TimeoutActive out.LastUpdateTime = in.LastUpdateTime + out.PreserveExpiryTime = in.PreserveExpiryTime return nil } @@ -360,6 +361,7 @@ func autoConvert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.Cur out.Phase = MachinePhase(in.Phase) out.TimeoutActive = in.TimeoutActive out.LastUpdateTime = in.LastUpdateTime + out.PreserveExpiryTime = in.PreserveExpiryTime return nil } @@ -531,6 +533,7 @@ func autoConvert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(i out.MachineHealthTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) out.MachineCreationTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) out.MachineInPlaceUpdateTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) + out.MachinePreserveTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) @@ -547,6 +550,7 @@ func autoConvert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(i out.MachineHealthTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) out.MachineCreationTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) out.MachineInPlaceUpdateTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) + out.MachinePreserveTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) diff --git a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go index 2691557c9..13aab59e2 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go @@ -36,6 +36,7 @@ func (in *ClassSpec) DeepCopy() *ClassSpec { func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { *out = *in in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.PreserveExpiryTime.DeepCopyInto(&out.PreserveExpiryTime) return } @@ -209,6 +210,11 @@ func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { *out = new(metav1.Duration) **out = **in } + if in.MachinePreserveTimeout != nil { + in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout + *out = new(metav1.Duration) + **out = **in + } if in.DisableHealthTimeout != nil { in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout *out = new(bool) diff --git a/pkg/apis/machine/zz_generated.deepcopy.go b/pkg/apis/machine/zz_generated.deepcopy.go index 05c40b651..90aa57743 100644 --- a/pkg/apis/machine/zz_generated.deepcopy.go +++ b/pkg/apis/machine/zz_generated.deepcopy.go @@ -36,6 +36,7 @@ func (in *ClassSpec) DeepCopy() *ClassSpec { func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { *out = *in in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.PreserveExpiryTime.DeepCopyInto(&out.PreserveExpiryTime) return } @@ -209,6 +210,11 @@ func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { *out = new(metav1.Duration) **out = **in } + if in.MachinePreserveTimeout != nil { + in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout + *out = new(metav1.Duration) + **out = **in + } if in.DisableHealthTimeout != nil { in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout *out = new(bool) diff --git a/pkg/openapi/api_violations.report b/pkg/openapi/api_violations.report index 0861c8d4c..5cb955c9e 100644 --- a/pkg/openapi/api_violations.report +++ b/pkg/openapi/api_violations.report @@ -7,6 +7,7 @@ API rule violation: names_match,github.com/gardener/machine-controller-manager/p API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineDrainTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineHealthTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineInPlaceUpdateTimeout +API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachinePreserveTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSetStatus,Conditions API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSpec,NodeTemplateSpec API rule violation: names_match,k8s.io/api/core/v1,AzureDiskVolumeSource,DataDiskURI diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index c4ee08c09..fe95775f3 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -394,6 +394,12 @@ func schema_pkg_apis_machine_v1alpha1_CurrentStatus(ref common.ReferenceCallback Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), }, }, + "preserveExpiryTime": { + SchemaProps: spec.SchemaProps{ + Description: "PreserveExpiryTime is the time at which MCM will stop preserving the machine", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Time"), + }, + }, }, }, }, @@ -682,6 +688,12 @@ func schema_pkg_apis_machine_v1alpha1_MachineConfiguration(ref common.ReferenceC Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, + "preserveTimeout": { + SchemaProps: spec.SchemaProps{ + Description: "MachinePreserveTimeout is the timeout after the machine preservation is stopped", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), + }, + }, "disableHealthTimeout": { SchemaProps: spec.SchemaProps{ Description: "DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. This is intended to be used only for in-place updates.", @@ -1462,6 +1474,12 @@ func schema_pkg_apis_machine_v1alpha1_MachineSpec(ref common.ReferenceCallback) Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, + "preserveTimeout": { + SchemaProps: spec.SchemaProps{ + Description: "MachinePreserveTimeout is the timeout after the machine preservation is stopped", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), + }, + }, "disableHealthTimeout": { SchemaProps: spec.SchemaProps{ Description: "DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. This is intended to be used only for in-place updates.", From 7f1861a807be27428cd9eaeb32fb9250c5efe08a Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 15:31:43 +0530 Subject: [PATCH 07/79] Add support for preserve=now on node and machine objects # Conflicts: # pkg/util/provider/machinecontroller/machine.go --- Makefile | 6 +- .../crds/machine.sapcloud.io_machinesets.yaml | 5 - .../provider/machinecontroller/machine.go | 28 ++-- .../machinecontroller/machine_util.go | 124 +++++++++++------- 4 files changed, 91 insertions(+), 72 deletions(-) diff --git a/Makefile b/Makefile index aba0236ac..6b0f4f913 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @./hack/generate-code - @./hack/api-reference/generate-spec-doc.sh + GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @GOFLAGS="-buildvcs=false" ./hack/generate-code + @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index 9a6f616fc..6dcc797c1 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -316,11 +316,6 @@ spec: description: MachineSetStatus holds the most recently observed status of MachineSet. properties: - autoPreserveFailedMachineCount: - description: AutoPreserveFailedMachineCount is the number of machines - in the machine set that have been auto-preserved upon failure - format: int32 - type: integer availableReplicas: description: The number of available replicas (ready for at least minReadySeconds) for this replica set. diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 608189b25..e252880dd 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -7,6 +7,7 @@ package controller import ( "context" + "errors" "fmt" "maps" "slices" @@ -16,6 +17,8 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" @@ -58,9 +61,12 @@ func (c *controller) updateMachine(oldObj, newObj any) { klog.Errorf("couldn't convert to machine resource from object") return } + if preserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { + c.enqueueMachine(newObj, "handling preserving machine object UPDATE event") + } if oldMachine.Generation == newMachine.Generation { - klog.V(3).Infof("Skipping non-spec updates for machine %s", oldMachine.Name) + klog.V(3).Infof("Skipping other non-spec updates for machine %s", oldMachine.Name) return } @@ -298,6 +304,11 @@ func (c *controller) reconcileClusterMachineTermination(key string) error { } return nil } +func preserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { + valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] + valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] + return existsInOld != existsInNew || valueOld != valueNew +} /* SECTION @@ -738,26 +749,13 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // Auto-preserve case will have to be handled where machine moved from Unknown to Failed func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - // check if rolling update is ongoing, if yes, do nothing - machineDeployment, err := c.getMachineDeploymentForMachine(machine) - if err != nil { - klog.Errorf("Error getting machine deployment for machine %q: %s", machine.Name, err) - return machineutils.ShortRetry, err - } - for _, c := range machineDeployment.Status.Conditions { - if c.Type == v1alpha1.MachineDeploymentProgressing { - if c.Status == v1alpha1.ConditionTrue { - return machineutils.LongRetry, nil - } - break - } - } // check if machine needs to be preserved due to annotation isPreserved := machineutils.IsMachinePreserved(machine) value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] if !isPreserved && exists { switch value { case machineutils.PreserveMachineAnnotationValueNow: + klog.V(2).Infof("Machine %s has annotation %s", machine.Name, machineutils.PreserveMachineAnnotationKey) return c.preserveMachine(ctx, machine) case machineutils.PreserveMachineAnnotationValueWhenFailed: // check if machine is in Failed state diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index ef1a54a42..85aa08106 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2058,6 +2058,17 @@ func (c *controller) getEffectiveInPlaceUpdateTimeout(machine *v1alpha1.Machine) return effectiveDependenciesUpdateTimeout } +// getEffectiveMachinePreserveTimeout returns the MachinePreserveTimeout set on the machine-object, otherwise returns the timeout set using the global-flag. +func (c *controller) getEffectiveMachinePreserveTimeout(machine *v1alpha1.Machine) *metav1.Duration { + var effectivePreserveTimeout *metav1.Duration + if machine.Spec.MachineConfiguration != nil && machine.Spec.MachineConfiguration.MachinePreserveTimeout != nil { + effectivePreserveTimeout = machine.Spec.MachineConfiguration.MachinePreserveTimeout + } else { + effectivePreserveTimeout = &c.safetyOptions.MachinePreserveTimeout + } + return effectivePreserveTimeout +} + // getEffectiveNodeConditions returns the nodeConditions set on the machine-object, otherwise returns the conditions set using the global-flag. func (c *controller) getEffectiveNodeConditions(machine *v1alpha1.Machine) *string { var effectiveNodeConditions *string @@ -2339,66 +2350,57 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { clone := machine.DeepCopy() - klog.V(2).Infof("Preserving machine %q", machine.Name) - clone.Status.LastOperation = v1alpha1.LastOperation{ - Description: "Preserving machine", - State: v1alpha1.MachineStateSuccessful, - Type: v1alpha1.MachineOperationPreserve, - LastUpdateTime: metav1.Now(), - } - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: clone.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), - } + //klog.V(2).Infof("Preserving machine %q", machine.Name) // if backing node exists, add annotations to prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { - clusterAutoscalerScaleDownAnnotations := make(map[string]string) - clusterAutoscalerScaleDownAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + preservationAnnotations := make(map[string]string) + // if preserve annotation not updated on node, we add it + preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = machine.Annotations[machineutils.PreserveMachineAnnotationKey] + preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue // We do this to avoid accidentally deleting the user provided annotations. - clusterAutoscalerScaleDownAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("Error trying to get node %q: %v", nodeName, err) + klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - updatedNode, _, err := annotations.AddOrUpdateAnnotation(node, clusterAutoscalerScaleDownAnnotations) - if err != nil { - klog.Warningf("Adding annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) - } + // function never returns error, can be ignored + updatedNode, _, _ := annotations.AddOrUpdateAnnotation(node, preservationAnnotations) _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("Error trying to update node %q: %v", nodeName, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) return machineutils.ShortRetry, err } + klog.V(2).Infof("Updated node %s for machine %q successfully", node.Name, machine.Name) + } + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: clone.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), } _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { - // Keep retrying until update goes through - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - } else { - klog.V(2).Infof("Machine %q status updated to preserved ", machine.Name) - // Return error even when machine object is updated to ensure reconcilation is restarted - err = fmt.Errorf("machine preservation in process") - } - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err + klog.Errorf("machine status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err } - return machineutils.ShortRetry, err + klog.V(2).Infof("Machine %q preserved.", machine.Name) + return machineutils.LongRetry, nil } func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - clone := machine.DeepCopy() - delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) - delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) // if backing node exists, remove annotations that would prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("Error trying to get node %q: %v", nodeName, err) + klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } //remove annotations from node, values do not matter here @@ -2408,35 +2410,59 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = "" updatedNode, _, err := annotations.RemoveAnnotation(node, preservationAnnotations) if err != nil { - klog.Warningf("Removing annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) + klog.Errorf("removing annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) + return machineutils.ShortRetry, err } _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("Error trying to update node %q: %v", nodeName, err) + klog.Errorf("error trying to update node %q: %v", nodeName, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } return machineutils.ShortRetry, err } } - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: clone.Status.CurrentStatus.Phase, + clone := machine.DeepCopy() + delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) + delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) + delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("machine UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err + } else { + klog.V(2).Infof("Machine %q updated to remove annotations ", machine.Name) + // Return error even when machine object is updated to ensure reconcilation is restarted + } + updatedMachine.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: updatedMachine.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.Time{}, } - _, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + _, err = c.controlMachineClient.Machines(updatedMachine.Namespace).UpdateStatus(ctx, updatedMachine, metav1.UpdateOptions{}) if err != nil { - // Keep retrying until update goes through - klog.Errorf("Machine UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", updatedMachine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } return machineutils.ShortRetry, err - } else { - klog.V(2).Infof("Machine %q updated to stop preservation ", machine.Name) - // Return error even when machine object is updated to ensure reconcilation is restarted } + klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) + // if machine is in failed state transition to Terminating - if clone.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, machine.Name, metav1.DeleteOptions{}) + if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) if err != nil { - klog.Errorf("Error trying to delete machine %q: %v", machine.Name, err) + klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } return machineutils.ShortRetry, err } + klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) } return machineutils.LongRetry, nil } From bd90ed1b1db776f787e39c438cf1a5b743cd52ef Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 5 Nov 2025 15:35:59 +0530 Subject: [PATCH 08/79] Update TODOs --- .../provider/machinecontroller/machine_util.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 85aa08106..3f55e2f55 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2348,6 +2348,8 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } +// TODO:@thiyyakat - when-failed annotation should be added to the node as well. +// preserveMachine contains logic to start the preservation of a node func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { clone := machine.DeepCopy() //klog.V(2).Infof("Preserving machine %q", machine.Name) @@ -2357,8 +2359,9 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // if preserve annotation not updated on node, we add it preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = machine.Annotations[machineutils.PreserveMachineAnnotationKey] preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - // We do this to avoid accidentally deleting the user provided annotations. - preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added + //// We do this to avoid accidentally deleting the user provided annotations. + //preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { @@ -2406,7 +2409,8 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp //remove annotations from node, values do not matter here preservationAnnotations := make(map[string]string) preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" - preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = "" + // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added + //preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = "" preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = "" updatedNode, _, err := annotations.RemoveAnnotation(node, preservationAnnotations) if err != nil { @@ -2424,7 +2428,8 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } clone := machine.DeepCopy() delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) + // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added + //delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err != nil { @@ -2451,7 +2456,6 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp return machineutils.ShortRetry, err } klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) - // if machine is in failed state transition to Terminating if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) From a2082bbf26b1c5dd166287cb5ac71f60ae00ccb1 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 10 Nov 2025 13:46:47 +0530 Subject: [PATCH 09/79] [WIP] Implement add/remove/update of node and machine annotations --- pkg/controller/machineset.go | 37 +++++- .../provider/machinecontroller/machine.go | 46 ++++---- .../machinecontroller/machine_util.go | 108 +++++++++++------- 3 files changed, 126 insertions(+), 65 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 6dd5027da..ee78f5f53 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -27,6 +27,7 @@ import ( "errors" "fmt" "reflect" + "slices" "sort" "sync" "time" @@ -431,6 +432,18 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 logMachinesWithPriority1(machinesWithoutUpdateSuccessfulLabel) machinesToDelete := getMachinesToDelete(machinesWithoutUpdateSuccessfulLabel, machinesWithoutUpdateSuccessfulLabelDiff) logMachinesToDelete(machinesToDelete) + //if machines are preserved, stop preservation + //for _, mc := range machinesToDelete { + // if machineutils.IsMachinePreserved(mc) { + // + // } + // + //}for _, mc := range machinesToDelete { + // if machineutils.IsMachinePreserved(mc) { + // + // } + // + //} // Snapshot the UIDs (ns/name) of the machines we're expecting to see // deleted, so we know to record their expectations exactly once either @@ -667,14 +680,34 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al // < scheduled, and pending < running. This ensures that we delete machines // in the earlier stages whenever possible. sort.Sort(ActiveMachines(filteredMachines)) + // machines in Preserved stage will be the last ones to be deleted + // at all times, replica count will be upheld, even if it means deletion of a pending machine + // TODO@thiyyakat: write unit test for this scenario + filteredMachines = prioritisePreservedMachines(filteredMachines) + + fmt.Printf("len(filteredMachines)=%d, diff=%d\n", len(filteredMachines), diff) + } return filteredMachines[:diff] } +func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machine { + pendingMachines := make([]*v1alpha1.Machine, 0, len(machines)) + otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) + for _, mc := range machines { + if machineutils.IsMachinePreserved(mc) { + pendingMachines = append(pendingMachines, mc) + } else { + otherMachines = append(otherMachines, mc) + } + } + return slices.Concat(otherMachines, pendingMachines) +} + func getMachineKeys(machines []*v1alpha1.Machine) []string { machineKeys := make([]string, 0, len(machines)) - for _, machine := range machines { - machineKeys = append(machineKeys, MachineKey(machine)) + for _, mc := range machines { + machineKeys = append(machineKeys, MachineKey(mc)) } return machineKeys } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index e252880dd..7b2e9b2fa 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -62,7 +62,8 @@ func (c *controller) updateMachine(oldObj, newObj any) { return } if preserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { - c.enqueueMachine(newObj, "handling preserving machine object UPDATE event") + c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") + return } if oldMachine.Generation == newMachine.Generation { @@ -739,34 +740,39 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // TODO@thiyyakat: check case where, preserved and annotated but times out. Not handled currently // possible cases: // 1. Annotated -// - already preserved, check for timeout -// - already preserved, check for explicit stop preservation -// - needs to be preserved on failure -// - needs to be preserved now +// - already preserved, check for timeout +// - already preserved, check for explicit stop preservation +// - needs to be preserved on failure +// - needs to be preserved now +// // 2. Unannotated -// - failed machine, autoPreserveMax not breached, must be preserved -// - failed machine, already preserved, check for timeout +// - failed machine, autoPreserveMax not breached, must be preserved +// - failed machine, already preserved, check for timeout +// // Auto-preserve case will have to be handled where machine moved from Unknown to Failed func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { // check if machine needs to be preserved due to annotation - isPreserved := machineutils.IsMachinePreserved(machine) value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - if !isPreserved && exists { + klog.V(3).Infof("TEST: machine:%s annotation value: %s", machine.Name, value) + isPreserved := machineutils.IsMachinePreserved(machine) + if exists { switch value { - case machineutils.PreserveMachineAnnotationValueNow: - klog.V(2).Infof("Machine %s has annotation %s", machine.Name, machineutils.PreserveMachineAnnotationKey) - return c.preserveMachine(ctx, machine) - case machineutils.PreserveMachineAnnotationValueWhenFailed: - // check if machine is in Failed state - if machineutils.IsMachineFailed(machine) { - return c.preserveMachine(ctx, machine) + case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: + if !isPreserved { + return c.preserveMachine(ctx, machine, value) } + case machineutils.PreserveMachineAnnotationValueFalse: + klog.V(2).Infof("TEST: false annotation value set.") + if isPreserved { + return c.stopMachinePreservation(ctx, machine) + } + default: + klog.V(3).Infof("Annotation value %s not part of accepted values for preserve", value) + return machineutils.LongRetry, nil } - } else if isPreserved { - if value == machineutils.PreserveMachineAnnotationValueFalse || metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { - return c.stopMachinePreservation(ctx, machine) - } + } else if isPreserved && metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { + return c.stopMachinePreservation(ctx, machine) } // if the machine is neither preserved nor annotated, need not handle it here. Auto-preservation // handled on failure diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 3f55e2f55..6b269c8e2 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1096,11 +1096,18 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph timeOutDuration, machine.Status.Conditions, ) - machineDeployName := getMachineDeploymentName(machine) // creating lock for machineDeployment, if not allocated c.permitGiver.RegisterPermits(machineDeployName, 1) - return c.tryMarkingMachineFailed(ctx, machine, clone, machineDeployName, description, lockAcquireTimeout) + retry, err := c.tryMarkingMachineFailed(ctx, machine, clone, machineDeployName, description, lockAcquireTimeout) + if err != nil { + return retry, err + } + if value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && value == machineutils.PreserveMachineAnnotationValueWhenFailed { + klog.V(3).Infof("TEST:Preserving machine on failure due to annotation") + return c.preserveMachine(ctx, machine, machineutils.PreserveMachineAnnotationValueWhenFailed) + } + return retry, err } if isMachineInPlaceUpdating { @@ -1154,7 +1161,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } if cloneDirty { - _, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { // Keep retrying across reconciles until update goes through klog.Errorf("Update of Phase/Conditions failed for machine %q. Retrying, error: %q", machine.Name, err) @@ -1163,6 +1170,10 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } } else { klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", machine.Name, getProviderID(machine), getNodeName(machine)) + if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueWhenFailed { + klog.V(3).Infof("TEST:Preserving machine on failure due to annotation") + return c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) + } // Return error to end the reconcile err = errSuccessfulPhaseUpdate } @@ -2348,26 +2359,30 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } -// TODO:@thiyyakat - when-failed annotation should be added to the node as well. -// preserveMachine contains logic to start the preservation of a node -func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - clone := machine.DeepCopy() - //klog.V(2).Infof("Preserving machine %q", machine.Name) - // if backing node exists, add annotations to prevent scale down by autoscaler +// TODO@thiyyakat: check if node needs to be updated at all. +// TODO@thiyyakat: handle when annotation changed from when-failed to now and vice versa +// preserveMachine contains logic to start the preservation of a machine and node +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { + klog.V(3).Infof("TEST:Entering preserve machine flow") + isFailed := machineutils.IsMachineFailed(machine) + // machine needs to be preserved now if annotated with preserve=now or if annotated with preserve=when-failed and machine has failed + toBePreservedNow := preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && isFailed + klog.V(3).Infof("TEST:machine: %s, tobepreservednow: %s", machine.Name, toBePreservedNow) + // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID if machine.Labels[v1alpha1.NodeLabelKey] != "" { preservationAnnotations := make(map[string]string) - // if preserve annotation not updated on node, we add it preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = machine.Annotations[machineutils.PreserveMachineAnnotationKey] - preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added - //// We do this to avoid accidentally deleting the user provided annotations. - //preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + if toBePreservedNow { + preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + } nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } + klog.V(3).Infof("TEST:preservationAnnotations on node : %s: %v, and machine: %s", node.Name, node.Annotations[machineutils.PreserveMachineAnnotationKey], machine.Annotations[machineutils.PreserveMachineAnnotationKey]) + // if preserve annotation not updated on node, we add it // function never returns error, can be ignored updatedNode, _, _ := annotations.AddOrUpdateAnnotation(node, preservationAnnotations) _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) @@ -2378,8 +2393,12 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) return machineutils.ShortRetry, err } - klog.V(2).Infof("Updated node %s for machine %q successfully", node.Name, machine.Name) + klog.V(2).Infof("Updated preservation annotations for node %s, for machine %q, successfully", node.Name, machine.Name) + if !toBePreservedNow { + return machineutils.LongRetry, nil + } } + clone := machine.DeepCopy() clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: clone.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), @@ -2398,7 +2417,9 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + klog.V(3).Infof("TEST:Entering stopMachinePreservation on machine %q", machine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler + // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) @@ -2406,16 +2427,15 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - //remove annotations from node, values do not matter here - preservationAnnotations := make(map[string]string) - preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" - // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added - //preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = "" - preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = "" - updatedNode, _, err := annotations.RemoveAnnotation(node, preservationAnnotations) - if err != nil { - klog.Errorf("removing annotation failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) - return machineutils.ShortRetry, err + // remove CA annotation from node, values do not matter here + CAAnnotations := make(map[string]string) + CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" + updatedNode, _, _ := annotations.RemoveAnnotation(node, CAAnnotations) // error can be ignored, always returns nil + // set preserve=false on node if it is set on machine + if machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { + preserveAnnotations := make(map[string]string) + preserveAnnotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValueFalse + updatedNode, _, _ = annotations.AddOrUpdateAnnotation(updatedNode, preserveAnnotations) // error can be ignored, always returns nil } _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { @@ -2442,31 +2462,33 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp klog.V(2).Infof("Machine %q updated to remove annotations ", machine.Name) // Return error even when machine object is updated to ensure reconcilation is restarted } - updatedMachine.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: updatedMachine.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, - } - _, err = c.controlMachineClient.Machines(updatedMachine.Namespace).UpdateStatus(ctx, updatedMachine, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", updatedMachine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err + if machineutils.IsMachinePreserved(machine) { + updatedMachine.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: updatedMachine.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{}, } - return machineutils.ShortRetry, err - } - klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) - // if machine is in failed state transition to Terminating - if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) + _, err = c.controlMachineClient.Machines(updatedMachine.Namespace).UpdateStatus(ctx, updatedMachine, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", updatedMachine.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } return machineutils.ShortRetry, err } - klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) + klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) + // if machine is in failed state transition to Terminating + if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) + if err != nil { + klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err + } + klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) + } } return machineutils.LongRetry, nil } From 27d180744dbf7dc754720a492eddff2e7e5e3ec8 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 13 Nov 2025 14:41:32 +0530 Subject: [PATCH 10/79] Update preserve logic to honour node annotations over machine --- .../provider/machinecontroller/machine.go | 89 ++++++++++++++----- .../machinecontroller/machine_util.go | 12 +-- 2 files changed, 74 insertions(+), 27 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 7b2e9b2fa..7e29c25bc 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -752,33 +752,78 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // Auto-preserve case will have to be handled where machine moved from Unknown to Failed func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - // check if machine needs to be preserved due to annotation - value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - klog.V(3).Infof("TEST: machine:%s annotation value: %s", machine.Name, value) - isPreserved := machineutils.IsMachinePreserved(machine) - if exists { - switch value { - case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: - if !isPreserved { - return c.preserveMachine(ctx, machine, value) - } - case machineutils.PreserveMachineAnnotationValueFalse: - klog.V(2).Infof("TEST: false annotation value set.") - if isPreserved { - return c.stopMachinePreservation(ctx, machine) - } - default: - klog.V(3).Infof("Annotation value %s not part of accepted values for preserve", value) - return machineutils.LongRetry, nil + // check effective preservation value based on node's and machine's annotations. + updatedMachine, preserveValue, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) + if err != nil { + klog.Errorf("Error getting preserve annotation value for machine %q: %s", machine.Name, err) + return machineutils.ShortRetry, err + } + klog.V(3).Infof("TEST effective preservation value for machine %q: %s", updatedMachine.Name, preserveValue) + isPreserved := machineutils.IsMachinePreserved(updatedMachine) + switch preserveValue { + case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: + if !isPreserved { + return c.preserveMachine(ctx, machine, preserveValue) + } else if metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { + return c.stopMachinePreservation(ctx, machine) } - } else if isPreserved && metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { - return c.stopMachinePreservation(ctx, machine) + case machineutils.PreserveMachineAnnotationValueFalse: + if isPreserved { + return c.stopMachinePreservation(ctx, machine) + } + case "": + return machineutils.LongRetry, nil + default: + klog.V(3).Infof("Annotation value %s not part of accepted values for preserve", preserveValue) + return machineutils.LongRetry, nil } - // if the machine is neither preserved nor annotated, need not handle it here. Auto-preservation - // handled on failure return machineutils.LongRetry, nil } +func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, string, error) { + var effectivePreserveAnnotationValue string + mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + // node annotation value, if exists, will always override and overwrite machine annotation value for preserve + if machine.Labels[v1alpha1.NodeLabelKey] != "" { + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q: %v", nodeName, err) + return machine, "", err + } + nAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] + switch { + case nExists && mExists: + if nAnnotationValue == mAnnotationValue { + return machine, nAnnotationValue, nil + } + effectivePreserveAnnotationValue = nAnnotationValue + case nExists && !mExists: + effectivePreserveAnnotationValue = nAnnotationValue + case mExists && !nExists: + return machine, mAnnotationValue, nil + case !nExists && !mExists: + return machine, "", nil + } + clone := machine.DeepCopy() + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[machineutils.PreserveMachineAnnotationKey] = effectivePreserveAnnotationValue + updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error updating machine with preserve annotations %q: %v", machine.Name, err) + return machine, "", err + } + return updatedMachine, effectivePreserveAnnotationValue, nil + } + //if no backing node + if mExists { + return machine, mAnnotationValue, nil + } + return machine, "", nil +} + // getMachineDeploymentForMachine returns the machine deployment for a given machine func (c *controller) getMachineDeploymentForMachine(machine *v1alpha1.Machine) (*v1alpha1.MachineDeployment, error) { machineDeploymentName := getMachineDeploymentName(machine) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 6b269c8e2..9efc51d5a 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2359,15 +2359,12 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } -// TODO@thiyyakat: check if node needs to be updated at all. // TODO@thiyyakat: handle when annotation changed from when-failed to now and vice versa // preserveMachine contains logic to start the preservation of a machine and node func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { klog.V(3).Infof("TEST:Entering preserve machine flow") - isFailed := machineutils.IsMachineFailed(machine) - // machine needs to be preserved now if annotated with preserve=now or if annotated with preserve=when-failed and machine has failed - toBePreservedNow := preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && isFailed - klog.V(3).Infof("TEST:machine: %s, tobepreservednow: %s", machine.Name, toBePreservedNow) + toBePreservedNow := shouldMachineBePreservedNow(machine, preserveValue) + klog.V(3).Infof("TEST:machine: %s, tobepreservednow: %v", machine.Name, toBePreservedNow) // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID if machine.Labels[v1alpha1.NodeLabelKey] != "" { preservationAnnotations := make(map[string]string) @@ -2416,6 +2413,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return machineutils.LongRetry, nil } +func shouldMachineBePreservedNow(machine *v1alpha1.Machine, preserveValue string) bool { + isFailed := machineutils.IsMachineFailed(machine) + return preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && isFailed +} + func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { klog.V(3).Infof("TEST:Entering stopMachinePreservation on machine %q", machine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler From 1e87d0b7d5f6b5bc29cca34d79b330c4197c152c Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 19 Nov 2025 08:06:01 +0530 Subject: [PATCH 11/79] Add preservation logic in machineset controller. TODO: remove debug logs # Conflicts: # pkg/controller/machineset.go --- pkg/apis/machine/v1alpha1/machine_types.go | 5 + pkg/controller/machineset.go | 6 +- .../provider/machinecontroller/machine.go | 14 +- .../machinecontroller/machine_util.go | 173 +++++++++--------- pkg/util/provider/machineutils/utils.go | 32 +++- 5 files changed, 133 insertions(+), 97 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 72ac95880..eee38e3a5 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -247,6 +247,11 @@ const ( UpdateFailed string = "UpdateFailed" ) +const ( + // NodePreserved is a node condition type for preservation of machines to allow end-user to know that a node is preserved + NodePreserved corev1.NodeConditionType = "NodePreserved" +) + // CurrentStatus contains information about the current status of Machine. type CurrentStatus struct { Phase MachinePhase `json:"phase,omitempty"` diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index ee78f5f53..281ccdd75 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -695,7 +695,7 @@ func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machi pendingMachines := make([]*v1alpha1.Machine, 0, len(machines)) otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) for _, mc := range machines { - if machineutils.IsMachinePreserved(mc) { + if machineutils.IsPreserveExpiryTimeSet(mc) { pendingMachines = append(pendingMachines, mc) } else { otherMachines = append(otherMachines, mc) @@ -770,8 +770,8 @@ func (c *controller) terminateMachines(ctx context.Context, inactiveMachines []* defer close(errCh) wg.Add(numOfInactiveMachines) - for _, machine := range inactiveMachines { - go c.prepareMachineForDeletion(ctx, machine, machineSet, &wg, errCh) + for _, m := range inactiveMachines { + go c.prepareMachineForDeletion(ctx, m, machineSet, &wg, errCh) } wg.Wait() diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 7e29c25bc..e540e3307 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -759,17 +759,17 @@ func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1. return machineutils.ShortRetry, err } klog.V(3).Infof("TEST effective preservation value for machine %q: %s", updatedMachine.Name, preserveValue) - isPreserved := machineutils.IsMachinePreserved(updatedMachine) + preserveExpiryTimeSet := machineutils.IsPreserveExpiryTimeSet(updatedMachine) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: - if !isPreserved { - return c.preserveMachine(ctx, machine, preserveValue) - } else if metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) { - return c.stopMachinePreservation(ctx, machine) + if !preserveExpiryTimeSet { + return c.preserveMachine(ctx, updatedMachine, preserveValue) + } else if metav1.Now().After(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.Time) { + return c.stopMachinePreservation(ctx, updatedMachine) } case machineutils.PreserveMachineAnnotationValueFalse: - if isPreserved { - return c.stopMachinePreservation(ctx, machine) + if preserveExpiryTimeSet { + return c.stopMachinePreservation(ctx, updatedMachine) } case "": return machineutils.LongRetry, nil diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 9efc51d5a..6ecc22965 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -943,8 +943,9 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.Warning(description) clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineUnknown, - LastUpdateTime: metav1.Now(), + Phase: v1alpha1.MachineUnknown, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } clone.Status.LastOperation = v1alpha1.LastOperation{ Description: description, @@ -997,7 +998,8 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineRunning, // TimeoutActive: false, - LastUpdateTime: metav1.Now(), + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } cloneDirty = true } @@ -1087,7 +1089,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } if timeElapsed > timeOutDuration { // Machine health timeout occurred while joining or rejoining of machine - + klog.V(2).Infof("TEST: timeout has occurred %s", machine.Name) if !isMachinePending && !isMachineInPlaceUpdating && !disableHealthTimeout { // Timeout occurred due to machine being unhealthy for too long description = fmt.Sprintf( @@ -1103,9 +1105,9 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if err != nil { return retry, err } - if value, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && value == machineutils.PreserveMachineAnnotationValueWhenFailed { - klog.V(3).Infof("TEST:Preserving machine on failure due to annotation") - return c.preserveMachine(ctx, machine, machineutils.PreserveMachineAnnotationValueWhenFailed) + if val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed { + klog.V(2).Infof("TEST: timeout has occurred, preserve machine %s", machine.Name) + return c.preserveMachine(ctx, machine, val) } return retry, err } @@ -1147,8 +1149,9 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph LastUpdateTime: metav1.Now(), } clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineFailed, - LastUpdateTime: metav1.Now(), + Phase: v1alpha1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } cloneDirty = true } @@ -1168,17 +1171,17 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } - } else { - klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", machine.Name, getProviderID(machine), getNodeName(machine)) - if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueWhenFailed { - klog.V(3).Infof("TEST:Preserving machine on failure due to annotation") - return c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) - } - // Return error to end the reconcile - err = errSuccessfulPhaseUpdate + return machineutils.ShortRetry, err } - return machineutils.ShortRetry, err + klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) + if val, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed && (updatedMachine.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdating) { + klog.V(2).Infof("TEST: timeout has occurred, preserve machine") + return c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) + } + // TODO@thiyyakat: fix this. check earlier code. + // Return error to end the reconcile + err = errSuccessfulPhaseUpdate } return machineutils.LongRetry, nil @@ -2359,29 +2362,28 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } -// TODO@thiyyakat: handle when annotation changed from when-failed to now and vice versa -// preserveMachine contains logic to start the preservation of a machine and node +// preserveMachine contains logic to start the preservation of a machine and node. It syncs node annotations to the machine if the backing node exists, +// or has an annotation related to preservation. +// it does not sync preserve annotation values from machine to node to prevent bi-directional syncing issues. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { + if !machineutils.ShouldMachineBePreservedNow(machine, preserveValue) { + return machineutils.LongRetry, nil + } klog.V(3).Infof("TEST:Entering preserve machine flow") - toBePreservedNow := shouldMachineBePreservedNow(machine, preserveValue) - klog.V(3).Infof("TEST:machine: %s, tobepreservednow: %v", machine.Name, toBePreservedNow) // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID if machine.Labels[v1alpha1.NodeLabelKey] != "" { - preservationAnnotations := make(map[string]string) - preservationAnnotations[machineutils.PreserveMachineAnnotationKey] = machine.Annotations[machineutils.PreserveMachineAnnotationKey] - if toBePreservedNow { - preservationAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - } nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - klog.V(3).Infof("TEST:preservationAnnotations on node : %s: %v, and machine: %s", node.Name, node.Annotations[machineutils.PreserveMachineAnnotationKey], machine.Annotations[machineutils.PreserveMachineAnnotationKey]) - // if preserve annotation not updated on node, we add it + // not updating node's preserve annotations here in case operator is manipulating machine annotations only + // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change // function never returns error, can be ignored - updatedNode, _, _ := annotations.AddOrUpdateAnnotation(node, preservationAnnotations) + CAScaleDownAnnotation := make(map[string]string) + CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + updatedNode, _, _ := annotations.AddOrUpdateAnnotation(node, CAScaleDownAnnotation) _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { if apierrors.IsConflict(err) { @@ -2390,18 +2392,32 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) return machineutils.ShortRetry, err } - klog.V(2).Infof("Updated preservation annotations for node %s, for machine %q, successfully", node.Name, machine.Name) - if !toBePreservedNow { - return machineutils.LongRetry, nil + klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", node.Name, machine.Name) + + preservedCondition := v1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.Now(), + } + updatedNode = nodeops.AddOrUpdateCondition(updatedNode, preservedCondition) + _, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + klog.Errorf("Node Status UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) + return machineutils.ShortRetry, err } + klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) } + clone := machine.DeepCopy() clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: clone.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), } - _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) if apierrors.IsConflict(err) { @@ -2409,20 +2425,13 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } return machineutils.ShortRetry, err } - klog.V(2).Infof("Machine %q preserved.", machine.Name) + klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return machineutils.LongRetry, nil } - -func shouldMachineBePreservedNow(machine *v1alpha1.Machine, preserveValue string) bool { - isFailed := machineutils.IsMachineFailed(machine) - return preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && isFailed -} - func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { klog.V(3).Infof("TEST:Entering stopMachinePreservation on machine %q", machine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler - // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID - if machine.Labels[v1alpha1.NodeLabelKey] != "" { + if machine.Labels[v1alpha1.NodeLabelKey] != "" { // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { @@ -2433,12 +2442,6 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp CAAnnotations := make(map[string]string) CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" updatedNode, _, _ := annotations.RemoveAnnotation(node, CAAnnotations) // error can be ignored, always returns nil - // set preserve=false on node if it is set on machine - if machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { - preserveAnnotations := make(map[string]string) - preserveAnnotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValueFalse - updatedNode, _, _ = annotations.AddOrUpdateAnnotation(updatedNode, preserveAnnotations) // error can be ignored, always returns nil - } _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error trying to update node %q: %v", nodeName, err) @@ -2447,50 +2450,50 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } return machineutils.ShortRetry, err } - } - clone := machine.DeepCopy() - delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - // TODO@thiyyakat: understand why ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey is added - //delete(clone.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) - delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("machine UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err - } else { - klog.V(2).Infof("Machine %q updated to remove annotations ", machine.Name) - // Return error even when machine object is updated to ensure reconcilation is restarted - } - if machineutils.IsMachinePreserved(machine) { - updatedMachine.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: updatedMachine.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, + preservedCondition := v1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: v1.ConditionFalse, + LastTransitionTime: metav1.Now(), } - _, err = c.controlMachineClient.Machines(updatedMachine.Namespace).UpdateStatus(ctx, updatedMachine, metav1.UpdateOptions{}) + updatedNode = nodeops.AddOrUpdateCondition(updatedNode, preservedCondition) + _, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", updatedMachine.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } + klog.Errorf("Node Status UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) return machineutils.ShortRetry, err } - klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) - // if machine is in failed state transition to Terminating - if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) - if err != nil { - klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err - } - klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) + klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) + } + clone := machine.DeepCopy() + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: machine.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{}, + } + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err } + return machineutils.ShortRetry, err } + klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) + // TODO@thiyyakat: if machine was in failed state and machinehealthtimeout has not expired, then it should + // continue to be in Failed. Normal flow is not changed. + //// if machine is in failed state transition to Terminating + //if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + // err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) + // if err != nil { + // klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) + // if apierrors.IsConflict(err) { + // return machineutils.ConflictRetry, err + // } + // return machineutils.ShortRetry, err + // } + // klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) + //} return machineutils.LongRetry, nil } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index afba674e0..dfaa3c6e3 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -140,7 +140,35 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } -// IsMachinePreserved checks if machine is preserved by MCM -func IsMachinePreserved(m *v1alpha1.Machine) bool { +// IsPreserveExpiryTimeSet checks if machine is preserved by MCM +func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } + +// HasPreservationTimedOut checks if the Status.CurrentStatus.PreserveExpiryTime has not yet passed +func HasPreservationTimedOut(m *v1alpha1.Machine) bool { + if m.Status.CurrentStatus.PreserveExpiryTime.IsZero() { + return true + } else if m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + return false + } + return true +} + +func ShouldMachineBePreservedNow(machine *v1alpha1.Machine, preserveValue string) bool { + isFailed := IsMachineFailed(machine) + return preserveValue == PreserveMachineAnnotationValueNow || preserveValue == PreserveMachineAnnotationValueWhenFailed && isFailed +} + +//func IsMachinePreserved(machine *v1alpha1.Machine) bool { +// val, exists := machine.Annotations[PreserveMachineAnnotationKey] +// if !exists { +// return false +// } else { +// if val == PreserveMachineAnnotationValueNow && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { +// return true +// } else if val == PreserveMachineAnnotationValueWhenFailed && IsMachineFailed(machine) { +// return true +// } +// } +//} From 36f6c4fe5997eb6a33881f93db7f9487e4f92020 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 19 Nov 2025 14:53:20 +0530 Subject: [PATCH 12/79] Add drain logic post preservation of failed machine --- .../provider/machinecontroller/machine.go | 1 - .../machinecontroller/machine_util.go | 160 ++++++++++-------- pkg/util/provider/machineutils/utils.go | 5 + 3 files changed, 91 insertions(+), 75 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index e540e3307..8df36e8db 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -758,7 +758,6 @@ func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1. klog.Errorf("Error getting preserve annotation value for machine %q: %s", machine.Name, err) return machineutils.ShortRetry, err } - klog.V(3).Infof("TEST effective preservation value for machine %q: %s", updatedMachine.Name, preserveValue) preserveExpiryTimeSet := machineutils.IsPreserveExpiryTimeSet(updatedMachine) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 6ecc22965..985cf3275 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1089,7 +1089,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } if timeElapsed > timeOutDuration { // Machine health timeout occurred while joining or rejoining of machine - klog.V(2).Infof("TEST: timeout has occurred %s", machine.Name) if !isMachinePending && !isMachineInPlaceUpdating && !disableHealthTimeout { // Timeout occurred due to machine being unhealthy for too long description = fmt.Sprintf( @@ -1106,7 +1105,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph return retry, err } if val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed { - klog.V(2).Infof("TEST: timeout has occurred, preserve machine %s", machine.Name) return c.preserveMachine(ctx, machine, val) } return retry, err @@ -1176,7 +1174,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) if val, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed && (updatedMachine.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdating) { - klog.V(2).Infof("TEST: timeout has occurred, preserve machine") return c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) } // TODO@thiyyakat: fix this. check earlier code. @@ -1635,8 +1632,15 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if skipDrain { state = v1alpha1.MachineStateProcessing } else { - timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) - + if machineutils.IsPreserveExpiryTimeSet(machine) { + preserveStartTime := machine.Status.CurrentStatus.PreserveExpiryTime.Time.Add(-c.getEffectiveMachinePreserveTimeout(machine).Duration) + timeOutOccurred = utiltime.HasTimeOutOccurred( + metav1.Time{Time: preserveStartTime}, + timeOutDuration, + ) + } else { + timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) + } if forceDeleteLabelPresent || timeOutOccurred { // To perform forceful machine drain/delete either one of the below conditions must be satified // 1. force-deletion: "True" label must be present @@ -1717,7 +1721,12 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if forceDeletePods { description = fmt.Sprintf("Force Drain successful. %s", machineutils.DelVolumesAttachments) } else { // regular drain already waits for vol detach and attach for another node. - description = fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion) + if machineutils.IsPreserveExpiryTimeSet(machine) { + description = fmt.Sprintf("Drain successful. Machine preserved.") + } else { + description = fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion) + } + } err = fmt.Errorf("%s", description) state = v1alpha1.MachineStateProcessing @@ -2369,7 +2378,40 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if !machineutils.ShouldMachineBePreservedNow(machine, preserveValue) { return machineutils.LongRetry, nil } - klog.V(3).Infof("TEST:Entering preserve machine flow") + clone := machine.DeepCopy() + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: clone.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + } + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("machine status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err + } + klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + // Validate MachineClass + machineClass, secretData, retry, err := c.ValidateMachineClass(ctx, &machine.Spec.Class) + if err != nil { + klog.Errorf("cannot reconcile machine %s: %s", machine.Name, err) + return retry, err + } + + deleteMachineRequest := &driver.DeleteMachineRequest{ + Machine: updatedMachine, + MachineClass: machineClass, + Secret: &v1.Secret{Data: secretData}, + } + retry, err = c.drainNode(ctx, deleteMachineRequest) + if err != nil { + klog.Errorf("error draining node backing machine: %s, error:%s", updatedMachine.Name, err) + return retry, err + } + } // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] @@ -2378,28 +2420,13 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - // not updating node's preserve annotations here in case operator is manipulating machine annotations only - // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change - // function never returns error, can be ignored - CAScaleDownAnnotation := make(map[string]string) - CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - updatedNode, _, _ := annotations.AddOrUpdateAnnotation(node, CAScaleDownAnnotation) - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) - return machineutils.ShortRetry, err - } - klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", node.Name, machine.Name) preservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionTrue, LastTransitionTime: metav1.Now(), } - updatedNode = nodeops.AddOrUpdateCondition(updatedNode, preservedCondition) + updatedNode := nodeops.AddOrUpdateCondition(node, preservedCondition) _, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { if apierrors.IsConflict(err) { @@ -2409,27 +2436,41 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return machineutils.ShortRetry, err } klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) - } + // not updating node's preserve annotations here in case operator is manipulating machine annotations only + // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change + // function never returns error, can be ignored + CAScaleDownAnnotation := make(map[string]string) + CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + updatedNode, _, _ = annotations.AddOrUpdateAnnotation(node, CAScaleDownAnnotation) + updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) + return machineutils.ShortRetry, err + } + klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", node.Name, machine.Name) + } + return machineutils.LongRetry, nil +} +func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { clone := machine.DeepCopy() clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: clone.Status.CurrentStatus.Phase, + Phase: machine.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + PreserveExpiryTime: metav1.Time{}, } updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("machine status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } return machineutils.ShortRetry, err } - klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) - return machineutils.LongRetry, nil -} -func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - klog.V(3).Infof("TEST:Entering stopMachinePreservation on machine %q", machine.Name) + klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID nodeName := machine.Labels[v1alpha1.NodeLabelKey] @@ -2438,25 +2479,13 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - // remove CA annotation from node, values do not matter here - CAAnnotations := make(map[string]string) - CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" - updatedNode, _, _ := annotations.RemoveAnnotation(node, CAAnnotations) // error can be ignored, always returns nil - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error trying to update node %q: %v", nodeName, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err - } preservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } - updatedNode = nodeops.AddOrUpdateCondition(updatedNode, preservedCondition) - _, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) + updatedNode := nodeops.AddOrUpdateCondition(node, preservedCondition) + updatedNode, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err @@ -2465,35 +2494,18 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp return machineutils.ShortRetry, err } klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) - } - clone := machine.DeepCopy() - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: machine.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, - } - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err + // remove CA annotation from node, values do not matter here + CAAnnotations := make(map[string]string) + CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" + updatedNode, _, _ = annotations.RemoveAnnotation(updatedNode, CAAnnotations) // error can be ignored, always returns nil + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error trying to update node %q: %v", nodeName, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err } - return machineutils.ShortRetry, err } - klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) - // TODO@thiyyakat: if machine was in failed state and machinehealthtimeout has not expired, then it should - // continue to be in Failed. Normal flow is not changed. - //// if machine is in failed state transition to Terminating - //if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - // err = c.controlMachineClient.Machines(c.namespace).Delete(ctx, updatedMachine.Name, metav1.DeleteOptions{}) - // if err != nil { - // klog.Errorf("error trying to delete machine %q: %v", updatedMachine.Name, err) - // if apierrors.IsConflict(err) { - // return machineutils.ConflictRetry, err - // } - // return machineutils.ShortRetry, err - // } - // klog.V(2).Infof("Machine %q marked for deletion", updatedMachine.Name) - //} return machineutils.LongRetry, nil } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index dfaa3c6e3..e591a8407 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -142,6 +142,11 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // IsPreserveExpiryTimeSet checks if machine is preserved by MCM func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { + //klog.V(3).Infof("DEBUG: machine:%s, time=%v, IsZero=%v, Unix=%d", + // m.Name, + // m.Status.CurrentStatus.PreserveExpiryTime, + // m.Status.CurrentStatus.PreserveExpiryTime.IsZero(), + // m.Status.CurrentStatus.PreserveExpiryTime.Unix()) return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } From 11010d66455edb0d7d26d857f709a431ada23927 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 19 Nov 2025 15:27:15 +0530 Subject: [PATCH 13/79] Fix return for reconcileMachineHealth. Unit tests passing --- pkg/apis/machine/types.go | 2 +- pkg/apis/machine/v1alpha1/machine_types.go | 3 --- pkg/apis/machine/v1alpha1/shared_types.go | 2 +- .../provider/machinecontroller/machine_util.go | 17 ++++++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 03c09f9d9..d0cf1de77 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -97,7 +97,7 @@ type MachineConfiguration struct { // MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed. MachineInPlaceUpdateTimeout *metav1.Duration - // MachinePreserveTimeout is the timeout after the machine preservation is stopped + // MachinePreserveTimeout is the timeout after which the machine preservation is stopped // +optional MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` // DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index eee38e3a5..49753259f 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -204,9 +204,6 @@ const ( // MachineOperationDelete indicates that the operation was a delete MachineOperationDelete MachineOperationType = "Delete" - - // MachineOperationPreserve indicates that the operation was a preserve - MachineOperationPreserve MachineOperationType = "Preserve" ) // The below types are used by kube_client and api_server. diff --git a/pkg/apis/machine/v1alpha1/shared_types.go b/pkg/apis/machine/v1alpha1/shared_types.go index 832254149..1a673b79f 100644 --- a/pkg/apis/machine/v1alpha1/shared_types.go +++ b/pkg/apis/machine/v1alpha1/shared_types.go @@ -44,7 +44,7 @@ type MachineConfiguration struct { // +optional MachineInPlaceUpdateTimeout *metav1.Duration `json:"inPlaceUpdateTimeout,omitempty"` - // MachinePreserveTimeout is the timeout after the machine preservation is stopped + // MachinePreserveTimeout is the timeout after which the machine preservation is stopped // +optional MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 985cf3275..7a2a75b0e 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1170,17 +1170,20 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph return machineutils.ConflictRetry, err } return machineutils.ShortRetry, err + } else { + klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) + if val, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed && (updatedMachine.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdating) { + retry, err := c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) + if err != nil { + return retry, err + } + } + err = errSuccessfulPhaseUpdate } - - klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) - if val, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed && (updatedMachine.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdating) { - return c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) - } + return machineutils.ShortRetry, err // TODO@thiyyakat: fix this. check earlier code. // Return error to end the reconcile - err = errSuccessfulPhaseUpdate } - return machineutils.LongRetry, nil } From 5dadf8533676a18c1a357e1c88629a7d4fa7ec4a Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 19 Nov 2025 15:30:33 +0530 Subject: [PATCH 14/79] Update CRDs --- docs/documents/apis.md | 2 +- kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml | 4 ++-- kubernetes/crds/machine.sapcloud.io_machines.yaml | 4 ++-- kubernetes/crds/machine.sapcloud.io_machinesets.yaml | 4 ++-- pkg/openapi/openapi_generated.go | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index a9be985cb..f629c3f8b 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -1097,7 +1097,7 @@ Kubernetes meta/v1.Duration (Optional) -

MachinePreserveTimeout is the timeout after the machine preservation is stopped

+

MachinePreserveTimeout is the timeout after which the machine preservation is stopped

diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index 848dc96ce..39c59908d 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -420,8 +420,8 @@ spec: type: object type: object preserveTimeout: - description: MachinePreserveTimeout is the timeout after the - machine preservation is stopped + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped type: string providerID: description: ProviderID represents the provider's unique ID diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml index 1d9150c4d..6e75f1441 100644 --- a/kubernetes/crds/machine.sapcloud.io_machines.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machines.yaml @@ -217,8 +217,8 @@ spec: type: object type: object preserveTimeout: - description: MachinePreserveTimeout is the timeout after the machine - preservation is stopped + description: MachinePreserveTimeout is the timeout after which the + machine preservation is stopped type: string providerID: description: ProviderID represents the provider's unique ID given diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index 6dcc797c1..4f7d82cd4 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -302,8 +302,8 @@ spec: type: object type: object preserveTimeout: - description: MachinePreserveTimeout is the timeout after the - machine preservation is stopped + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped type: string providerID: description: ProviderID represents the provider's unique ID diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index fe95775f3..60a1d1fb0 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -690,7 +690,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineConfiguration(ref common.ReferenceC }, "preserveTimeout": { SchemaProps: spec.SchemaProps{ - Description: "MachinePreserveTimeout is the timeout after the machine preservation is stopped", + Description: "MachinePreserveTimeout is the timeout after which the machine preservation is stopped", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, @@ -1476,7 +1476,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineSpec(ref common.ReferenceCallback) }, "preserveTimeout": { SchemaProps: spec.SchemaProps{ - Description: "MachinePreserveTimeout is the timeout after the machine preservation is stopped", + Description: "MachinePreserveTimeout is the timeout after which the machine preservation is stopped", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, From 3980331bb4bf593740ea2bd4479584dc420cfb3e Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 24 Nov 2025 16:17:51 +0530 Subject: [PATCH 15/79] Fix bug causing repeated requeuing --- pkg/apis/machine/v1alpha1/machine_types.go | 2 + .../provider/machinecontroller/machine.go | 46 ++- .../machinecontroller/machine_util.go | 276 ++++++++++++------ pkg/util/provider/machineutils/utils.go | 23 -- 4 files changed, 217 insertions(+), 130 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 49753259f..057fd95f1 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -247,6 +247,8 @@ const ( const ( // NodePreserved is a node condition type for preservation of machines to allow end-user to know that a node is preserved NodePreserved corev1.NodeConditionType = "NodePreserved" + + // ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 8df36e8db..d0b86ba7d 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -9,6 +9,7 @@ import ( "context" "errors" "fmt" + "github.com/gardener/machine-controller-manager/pkg/util/nodeops" "maps" "slices" "strings" @@ -750,7 +751,6 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // - failed machine, already preserved, check for timeout // // Auto-preserve case will have to be handled where machine moved from Unknown to Failed - func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { // check effective preservation value based on node's and machine's annotations. updatedMachine, preserveValue, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) @@ -758,23 +758,27 @@ func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1. klog.Errorf("Error getting preserve annotation value for machine %q: %s", machine.Name, err) return machineutils.ShortRetry, err } - preserveExpiryTimeSet := machineutils.IsPreserveExpiryTimeSet(updatedMachine) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: - if !preserveExpiryTimeSet { - return c.preserveMachine(ctx, updatedMachine, preserveValue) - } else if metav1.Now().After(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.Time) { - return c.stopMachinePreservation(ctx, updatedMachine) + if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { + return machineutils.LongRetry, nil } - case machineutils.PreserveMachineAnnotationValueFalse: - if preserveExpiryTimeSet { + isComplete, err := c.isMachinePreservationComplete(ctx, machine) + if err != nil { + return machineutils.ShortRetry, err + } + if !isComplete { + return c.preserveMachine(ctx, machine) + } + if hasMachinePreservationTimedOut(machine) { return c.stopMachinePreservation(ctx, updatedMachine) } + case machineutils.PreserveMachineAnnotationValueFalse: + return c.stopMachinePreservation(ctx, updatedMachine) case "": return machineutils.LongRetry, nil default: - klog.V(3).Infof("Annotation value %s not part of accepted values for preserve", preserveValue) - return machineutils.LongRetry, nil + klog.Warningf("Preserve annotation value %s on machine %s is invalid", preserveValue, machine.Name) } return machineutils.LongRetry, nil } @@ -823,6 +827,28 @@ func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, m return machine, "", nil } +func (c *controller) isMachinePreservationComplete(ctx context.Context, machine *v1alpha1.Machine) (bool, error) { + // if preservetime is set and machine is not failed, then yes, + // if preservetime is set and machine is failed, the node condition must be there saying drain successful + // if preserve time is not set, then no + if !machineutils.IsPreserveExpiryTimeSet(machine) { + return false, nil + } else if machineutils.IsMachineFailed(machine) { + node, err := c.nodeLister.Get(getNodeName(machine)) + if err != nil { + klog.Errorf("error trying to get node %q: %v", getNodeName(machine), err) + return false, err + } + if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil { + if cond.Reason == v1alpha1.DrainSuccessful { + return true, nil + } + } + return false, nil + } + return true, nil +} + // getMachineDeploymentForMachine returns the machine deployment for a given machine func (c *controller) getMachineDeploymentForMachine(machine *v1alpha1.Machine) (*v1alpha1.MachineDeployment, error) { machineDeploymentName := getMachineDeploymentName(machine) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 7a2a75b0e..ac31d5411 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -212,6 +212,7 @@ func nodeConditionsHaveChanged(oldConditions []v1.NodeCondition, newConditions [ if !exists || (oldC.Status != c.Status) || (c.Type == v1alpha1.NodeInPlaceUpdate && oldC.Reason != c.Reason) { addedOrUpdatedConditions = append(addedOrUpdatedConditions, c) } + } // checking for any deleted condition @@ -1104,9 +1105,9 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if err != nil { return retry, err } - if val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed { - return c.preserveMachine(ctx, machine, val) - } + //if attemptMachinePreservation(machine) && !machineutils.IsPreserveExpiryTimeSet(machine) { + // return c.preserveMachine(ctx, machine) + //} return retry, err } @@ -1172,12 +1173,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph return machineutils.ShortRetry, err } else { klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) - if val, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueWhenFailed && (updatedMachine.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdating) { - retry, err := c.preserveMachine(ctx, updatedMachine, machineutils.PreserveMachineAnnotationValueWhenFailed) - if err != nil { - return retry, err - } - } err = errSuccessfulPhaseUpdate } return machineutils.ShortRetry, err @@ -1635,15 +1630,8 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if skipDrain { state = v1alpha1.MachineStateProcessing } else { - if machineutils.IsPreserveExpiryTimeSet(machine) { - preserveStartTime := machine.Status.CurrentStatus.PreserveExpiryTime.Time.Add(-c.getEffectiveMachinePreserveTimeout(machine).Duration) - timeOutOccurred = utiltime.HasTimeOutOccurred( - metav1.Time{Time: preserveStartTime}, - timeOutDuration, - ) - } else { - timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) - } + timeOutOccurred = utiltime.HasTimeOutOccurred(*machine.DeletionTimestamp, timeOutDuration) + if forceDeleteLabelPresent || timeOutOccurred { // To perform forceful machine drain/delete either one of the below conditions must be satified // 1. force-deletion: "True" label must be present @@ -1724,12 +1712,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if forceDeletePods { description = fmt.Sprintf("Force Drain successful. %s", machineutils.DelVolumesAttachments) } else { // regular drain already waits for vol detach and attach for another node. - if machineutils.IsPreserveExpiryTimeSet(machine) { - description = fmt.Sprintf("Drain successful. Machine preserved.") - } else { - description = fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion) - } - + description = fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion) } err = fmt.Errorf("%s", description) state = v1alpha1.MachineStateProcessing @@ -2377,45 +2360,22 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { // preserveMachine contains logic to start the preservation of a machine and node. It syncs node annotations to the machine if the backing node exists, // or has an annotation related to preservation. // it does not sync preserve annotation values from machine to node to prevent bi-directional syncing issues. -func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { - if !machineutils.ShouldMachineBePreservedNow(machine, preserveValue) { - return machineutils.LongRetry, nil - } - clone := machine.DeepCopy() - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: clone.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), - } - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("machine status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err - } - klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) - if updatedMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - // Validate MachineClass - machineClass, secretData, retry, err := c.ValidateMachineClass(ctx, &machine.Spec.Class) +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + klog.V(3).Infof("Entering preserve machine") + if !machineutils.IsPreserveExpiryTimeSet(machine) { + preservedCurrentStatus := v1alpha1.CurrentStatus{ + Phase: machine.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + } + retry, err := c.machineStatusUpdate(ctx, machine, machine.Status.LastOperation, preservedCurrentStatus, machine.Status.LastKnownState) if err != nil { - klog.Errorf("cannot reconcile machine %s: %s", machine.Name, err) - return retry, err - } - - deleteMachineRequest := &driver.DeleteMachineRequest{ - Machine: updatedMachine, - MachineClass: machineClass, - Secret: &v1.Secret{Data: secretData}, - } - retry, err = c.drainNode(ctx, deleteMachineRequest) - if err != nil { - klog.Errorf("error draining node backing machine: %s, error:%s", updatedMachine.Name, err) + klog.Errorf("machine PreserveExpiryTime UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return retry, err } + klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) } - // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID + if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) @@ -2423,57 +2383,73 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.Errorf("error trying to get node %q: %v", nodeName, err) return machineutils.ShortRetry, err } - - preservedCondition := v1.NodeCondition{ + nodeCopy := node.DeepCopy() + // not updating node's preserve annotations here in case operator is manipulating machine annotations only + // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change + // function never returns error, can be ignored + if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { + CAScaleDownAnnotation := make(map[string]string) + CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) + updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) + return machineutils.ShortRetry, err + } + klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", nodeCopy.Name, machine.Name) + } + newNodePreservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionTrue, LastTransitionTime: metav1.Now(), } - updatedNode := nodeops.AddOrUpdateCondition(node, preservedCondition) - _, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err + // drain node only if machine has failed + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) + if err != nil { + klog.V(3).Infof("Error trying to get node preserved condition for machine %s: %v", machine.Name, err) + return machineutils.ShortRetry, err } - klog.Errorf("Node Status UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) - return machineutils.ShortRetry, err - } - klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) - // not updating node's preserve annotations here in case operator is manipulating machine annotations only - // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change - // function never returns error, can be ignored - CAScaleDownAnnotation := make(map[string]string) - CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - updatedNode, _, _ = annotations.AddOrUpdateAnnotation(node, CAScaleDownAnnotation) - updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err + if existingNodePreservedCondition == nil || existingNodePreservedCondition.Reason != v1alpha1.DrainSuccessful { + + err = c.drainPreservedNode(ctx, machine) + if err != nil { + klog.V(3).Infof("TEST: drain failed with error:%s", err) + // drain not successful, retry + // if node condition of NodePreserved is not set, set it: + newNodePreservedCondition.Status = v1.ConditionUnknown + if existingNodePreservedCondition == nil { + if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { + klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) + return machineutils.ShortRetry, err + } + } + return machineutils.ShortRetry, err + } + newNodePreservedCondition.Reason = v1alpha1.DrainSuccessful + } else { + klog.V(3).Infof("TEST: unnecessary entry into preserved machine %s", machine.Name) + return machineutils.LongRetry, nil } - klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) + } + klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) + if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { + klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) return machineutils.ShortRetry, err } - klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", node.Name, machine.Name) - + klog.V(2).Infof("TEST: update drain condition %s, Successful %s", newNodePreservedCondition.Reason, machine.Name) } return machineutils.LongRetry, nil } + func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - clone := machine.DeepCopy() - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: machine.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, - } - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err + // check if preserveExpiryTime is set, if not, no need to do anything + if !machineutils.IsPreserveExpiryTimeSet(machine) { + return machineutils.LongRetry, nil } - klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID nodeName := machine.Labels[v1alpha1.NodeLabelKey] @@ -2510,5 +2486,111 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp return machineutils.ShortRetry, err } } + clone := machine.DeepCopy() + clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ + Phase: machine.Status.CurrentStatus.Phase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{}, + } + updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err + } + klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) return machineutils.LongRetry, nil } + +// drainPreservedNode attempts to drain the node backing a preserved machine +func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { + var ( + // Declarations + err error + timeOutOccurred bool + forceDeletePods bool + // Initialization + maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/drain.PodEvictionRetryInterval.Seconds())) + pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration + pvReattachTimeOut = c.safetyOptions.PvReattachTimeout.Duration + timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration + nodeName = machine.Labels[v1alpha1.NodeLabelKey] + ) + + // verify and log node object's existence + if _, err := c.nodeLister.Get(nodeName); err == nil { + klog.V(3).Infof("(drainNode) For node %q, machine %q", nodeName, machine.Name) + } else if apierrors.IsNotFound(err) { + klog.Warningf("(drainNode) Node %q for machine %q doesn't exist, so drain will finish instantly", nodeName, machine.Name) + } + // TODO@thiyyakat: how to calculate timeout? In the case of preserve=now, preserveexpirytime will not coincide with time of failure in which case pods will et force + // drained. + timeOutOccurred = utiltime.HasTimeOutOccurred(machine.Status.CurrentStatus.PreserveExpiryTime, timeOutDuration) + if timeOutOccurred { + forceDeletePods = true + timeOutDuration = 1 * time.Minute + maxEvictRetries = 1 + klog.V(2).Infof( + "Force delete/drain has been triggerred for machine %q with providerID %q and backing node %q due to timeout:%t", + machine.Name, + getProviderID(machine), + getNodeName(machine), + timeOutOccurred, + ) + } else { + klog.V(2).Infof( + "Drain has been triggerred for preserved machine %q with providerID %q and backing node %q with drain-timeout:%v & maxEvictRetries:%d", + machine.Name, + getProviderID(machine), + getNodeName(machine), + timeOutDuration, + maxEvictRetries, + ) + } + + buf := bytes.NewBuffer([]byte{}) + errBuf := bytes.NewBuffer([]byte{}) + + drainOptions := drain.NewDrainOptions( + c.targetCoreClient, + c.targetKubernetesVersion, + timeOutDuration, + maxEvictRetries, + pvDetachTimeOut, + pvReattachTimeOut, + nodeName, + -1, + forceDeletePods, + true, + true, + true, + buf, + errBuf, + c.driver, + c.pvcLister, + c.pvLister, + c.pdbLister, + c.nodeLister, + c.podLister, + c.volumeAttachmentHandler, + c.podSynced, + ) + klog.V(3).Infof("(drainNode) Invoking RunDrain, timeOutDuration: %s", timeOutDuration) + err = drainOptions.RunDrain(ctx) + if err != nil { + klog.Warningf("Drain failed for machine %q , providerID %q ,backing node %q. \nBuf:%v \nErrBuf:%v \nErr-Message:%v", machine.Name, getProviderID(machine), getNodeName(machine), buf, errBuf, err) + return err + } + if forceDeletePods { + klog.V(3).Infof("Force drain successful for machine %q , providerID %q ,backing node %q.", machine.Name, getProviderID(machine), getNodeName(machine)) + } else { + klog.V(3).Infof("Drain successful for machine %q , providerID %q ,backing node %q.", machine.Name, getProviderID(machine), getNodeName(machine)) + } + return nil +} + +func hasMachinePreservationTimedOut(machine *v1alpha1.Machine) bool { + return machineutils.IsPreserveExpiryTimeSet(machine) && metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) +} diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index e591a8407..aab15d1bd 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -142,11 +142,6 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // IsPreserveExpiryTimeSet checks if machine is preserved by MCM func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { - //klog.V(3).Infof("DEBUG: machine:%s, time=%v, IsZero=%v, Unix=%d", - // m.Name, - // m.Status.CurrentStatus.PreserveExpiryTime, - // m.Status.CurrentStatus.PreserveExpiryTime.IsZero(), - // m.Status.CurrentStatus.PreserveExpiryTime.Unix()) return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } @@ -159,21 +154,3 @@ func HasPreservationTimedOut(m *v1alpha1.Machine) bool { } return true } - -func ShouldMachineBePreservedNow(machine *v1alpha1.Machine, preserveValue string) bool { - isFailed := IsMachineFailed(machine) - return preserveValue == PreserveMachineAnnotationValueNow || preserveValue == PreserveMachineAnnotationValueWhenFailed && isFailed -} - -//func IsMachinePreserved(machine *v1alpha1.Machine) bool { -// val, exists := machine.Annotations[PreserveMachineAnnotationKey] -// if !exists { -// return false -// } else { -// if val == PreserveMachineAnnotationValueNow && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { -// return true -// } else if val == PreserveMachineAnnotationValueWhenFailed && IsMachineFailed(machine) { -// return true -// } -// } -//} From 98d1b214f91e5cd9115d5549d35b12ac61d9b790 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 26 Nov 2025 16:14:38 +0530 Subject: [PATCH 16/79] Fix drain logic in machine preservation for Unknown->Failed case: * remove use of machineStatusUpdate in machine preservation code since it uses a similarity check * introduce check of phase change in updateMachine() to initiate drain of preserved machine on failure. This check is only for preserved machines --- .../provider/machinecontroller/machine.go | 68 ++++++++--- .../machinecontroller/machine_util.go | 114 +++++++++++------- pkg/util/provider/machineutils/utils.go | 2 + 3 files changed, 122 insertions(+), 62 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index d0b86ba7d..6fd938d49 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -62,11 +62,20 @@ func (c *controller) updateMachine(oldObj, newObj any) { klog.Errorf("couldn't convert to machine resource from object") return } + { // TODO@thiyyakat: remove after testing + if newMachine.Labels["test-failed"] != oldMachine.Labels["test-failed"] { + c.enqueueMachine(newObj, "TEST: handling machine failure simulation UPDATE event") + } + } if preserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } + if _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && newMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { + c.enqueueMachine(newObj, "handling preserved machine phase update") + } + if oldMachine.Generation == newMachine.Generation { klog.V(3).Infof("Skipping other non-spec updates for machine %s", oldMachine.Name) return @@ -213,7 +222,30 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp klog.Errorf("cannot reconcile machine %s: %s", machine.Name, err) return retry, err } + { //TODO@thiyyakat: remove after drain + //insert condition changing code here + if machine.Labels["test-failed"] == "true" { + node, err := c.nodeLister.Get(getNodeName(machine)) + if err != nil { + klog.V(3).Infof("TEST:Machine %q: Failed to get node %q: %v", machine.Name, machine.Name, err) + return machineutils.ShortRetry, err + } + if cond := nodeops.GetCondition(node, corev1.NodeNetworkUnavailable); cond.Status != corev1.ConditionTrue { + cond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionTrue} + err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), cond) + if err != nil { + klog.V(2).Infof("TEST:Machine %q: Failed to change node condition %q: %v", machine.Name, machine.Name, err) + return machineutils.ShortRetry, err + } + klog.V(2).Infof("TEST:Machine %q: updated node %q condition", machine.Name, machine.Name) + } + } + } + retry, err = c.manageMachinePreservation(ctx, machine) + if err != nil { + return retry, err + } if machine.Labels[v1alpha1.NodeLabelKey] != "" && machine.Status.CurrentStatus.Phase != "" { // If reference to node object exists execute the below retry, err := c.reconcileMachineHealth(ctx, machine) @@ -751,13 +783,18 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // - failed machine, already preserved, check for timeout // // Auto-preserve case will have to be handled where machine moved from Unknown to Failed -func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { +func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { + klog.V(3).Infof("TEST: entering manageMachinePreservation ") // check effective preservation value based on node's and machine's annotations. - updatedMachine, preserveValue, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) + updatedMachine, preserveValue, exists, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) if err != nil { klog.Errorf("Error getting preserve annotation value for machine %q: %s", machine.Name, err) return machineutils.ShortRetry, err } + if !exists { + return machineutils.LongRetry, nil + } + klog.V(3).Infof("TEST: preserve:%s", preserveValue) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { @@ -779,11 +816,12 @@ func (c *controller) machinePreservation(ctx context.Context, machine *v1alpha1. return machineutils.LongRetry, nil default: klog.Warningf("Preserve annotation value %s on machine %s is invalid", preserveValue, machine.Name) + return machineutils.LongRetry, nil } return machineutils.LongRetry, nil } -func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, string, error) { +func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, string, bool, error) { var effectivePreserveAnnotationValue string mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] // node annotation value, if exists, will always override and overwrite machine annotation value for preserve @@ -792,21 +830,21 @@ func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, m node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q: %v", nodeName, err) - return machine, "", err + return machine, "", false, err } nAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] switch { case nExists && mExists: if nAnnotationValue == mAnnotationValue { - return machine, nAnnotationValue, nil + return machine, nAnnotationValue, true, nil } effectivePreserveAnnotationValue = nAnnotationValue case nExists && !mExists: effectivePreserveAnnotationValue = nAnnotationValue case mExists && !nExists: - return machine, mAnnotationValue, nil + return machine, mAnnotationValue, true, nil case !nExists && !mExists: - return machine, "", nil + return machine, "", false, nil } clone := machine.DeepCopy() if clone.Annotations == nil { @@ -816,21 +854,17 @@ func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, m updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error updating machine with preserve annotations %q: %v", machine.Name, err) - return machine, "", err + return machine, "", true, err } - return updatedMachine, effectivePreserveAnnotationValue, nil - } - //if no backing node - if mExists { - return machine, mAnnotationValue, nil + return updatedMachine, effectivePreserveAnnotationValue, true, nil } - return machine, "", nil + return machine, mAnnotationValue, mExists, nil } func (c *controller) isMachinePreservationComplete(ctx context.Context, machine *v1alpha1.Machine) (bool, error) { - // if preservetime is set and machine is not failed, then yes, - // if preservetime is set and machine is failed, the node condition must be there saying drain successful - // if preserve time is not set, then no + // if PreserveExpiryTime is set and machine has not failed, then yes, + // if PreserveExpiryTime is set and machine has failed, the node condition must be there saying drain successful + // if PreserveExpiryTime is not set, then no if !machineutils.IsPreserveExpiryTimeSet(machine) { return false, nil } else if machineutils.IsMachineFailed(machine) { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index ac31d5411..c2cd0804a 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1014,7 +1014,8 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineUnknown, // TimeoutActive: true, - LastUpdateTime: metav1.Now(), + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } clone.Status.LastOperation = v1alpha1.LastOperation{ Description: description, @@ -1105,9 +1106,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if err != nil { return retry, err } - //if attemptMachinePreservation(machine) && !machineutils.IsPreserveExpiryTimeSet(machine) { - // return c.preserveMachine(ctx, machine) - //} return retry, err } @@ -1176,7 +1174,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph err = errSuccessfulPhaseUpdate } return machineutils.ShortRetry, err - // TODO@thiyyakat: fix this. check earlier code. // Return error to end the reconcile } return machineutils.LongRetry, nil @@ -2141,7 +2138,8 @@ func (c *controller) updateMachineToFailedState(ctx context.Context, description clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineFailed, // TimeoutActive: false, - LastUpdateTime: metav1.Now(), + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) @@ -2361,21 +2359,23 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { // or has an annotation related to preservation. // it does not sync preserve annotation values from machine to node to prevent bi-directional syncing issues. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - klog.V(3).Infof("Entering preserve machine") + klog.V(3).Infof("TEST: Entering preserve machine flow") if !machineutils.IsPreserveExpiryTimeSet(machine) { preservedCurrentStatus := v1alpha1.CurrentStatus{ Phase: machine.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), } - retry, err := c.machineStatusUpdate(ctx, machine, machine.Status.LastOperation, preservedCurrentStatus, machine.Status.LastKnownState) + clone := machine.DeepCopy() + clone.Status.CurrentStatus = preservedCurrentStatus + _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("machine PreserveExpiryTime UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - return retry, err + klog.Warningf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + return machineutils.ConflictRetry, err } - klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) + klog.V(3).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) } - + klog.V(3).Infof("TEST: preserveexpiry set for machine %q", machine.Name) if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) @@ -2414,13 +2414,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return machineutils.ShortRetry, err } if existingNodePreservedCondition == nil || existingNodePreservedCondition.Reason != v1alpha1.DrainSuccessful { - err = c.drainPreservedNode(ctx, machine) if err != nil { klog.V(3).Infof("TEST: drain failed with error:%s", err) // drain not successful, retry // if node condition of NodePreserved is not set, set it: - newNodePreservedCondition.Status = v1.ConditionUnknown if existingNodePreservedCondition == nil { if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) @@ -2429,18 +2427,18 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } return machineutils.ShortRetry, err } + klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) newNodePreservedCondition.Reason = v1alpha1.DrainSuccessful } else { klog.V(3).Infof("TEST: unnecessary entry into preserved machine %s", machine.Name) return machineutils.LongRetry, nil } } - klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) return machineutils.ShortRetry, err } - klog.V(2).Infof("TEST: update drain condition %s, Successful %s", newNodePreservedCondition.Reason, machine.Name) + klog.V(3).Infof("TEST: updating machine %q with new node condition was successful", machine.Name) } return machineutils.LongRetry, nil } @@ -2451,11 +2449,11 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp return machineutils.LongRetry, nil } // if backing node exists, remove annotations that would prevent scale down by autoscaler - if machine.Labels[v1alpha1.NodeLabelKey] != "" { // TODO@thiyyakat: may need to change this check to machine.Spec.ProviderID + if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) + klog.Errorf("error trying to get backing node %q for machine %s. error: %v", nodeName, machine.Name, err) return machineutils.ShortRetry, err } preservedCondition := v1.NodeCondition{ @@ -2463,23 +2461,25 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } - updatedNode := nodeops.AddOrUpdateCondition(node, preservedCondition) - updatedNode, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) + err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), preservedCondition) if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - klog.Errorf("Node Status UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) + klog.Warningf("Node/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return machineutils.ShortRetry, err } - klog.V(2).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) + klog.V(3).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) // remove CA annotation from node, values do not matter here CAAnnotations := make(map[string]string) CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" - updatedNode, _, _ = annotations.RemoveAnnotation(updatedNode, CAAnnotations) // error can be ignored, always returns nil - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + latestNode, err := c.targetCoreClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { - klog.Errorf("error trying to update node %q: %v", nodeName, err) + klog.Errorf("error trying to get backing node %q for machine %s. error: %v", nodeName, machine.Name, err) + return machineutils.ShortRetry, err + } + latestNodeCopy := latestNode.DeepCopy() + latestNodeCopy, _, _ = annotations.RemoveAnnotation(latestNodeCopy, CAAnnotations) // error can be ignored, always returns nil + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, latestNodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Warningf("Node UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } @@ -2492,15 +2492,12 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.Time{}, } - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", clone.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err + klog.Warningf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + return machineutils.ConflictRetry, err } - klog.V(2).Infof("Machine %q status updated to stop preservation ", updatedMachine.Name) + klog.V(3).Infof("Machine status updated to stop preservation for machine %q", clone.Name) return machineutils.LongRetry, nil } @@ -2508,16 +2505,29 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { var ( // Declarations - err error - timeOutOccurred bool - forceDeletePods bool + err error + forceDeletePods bool + timeOutOccurred bool + description string + readOnlyFileSystemCondition, nodeReadyCondition v1.NodeCondition + // Initialization - maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/drain.PodEvictionRetryInterval.Seconds())) - pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration - pvReattachTimeOut = c.safetyOptions.PvReattachTimeout.Duration - timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration - nodeName = machine.Labels[v1alpha1.NodeLabelKey] + maxEvictRetries = int32(math.Min(float64(*c.getEffectiveMaxEvictRetries(machine)), c.getEffectiveDrainTimeout(machine).Seconds()/drain.PodEvictionRetryInterval.Seconds())) + pvDetachTimeOut = c.safetyOptions.PvDetachTimeout.Duration + pvReattachTimeOut = c.safetyOptions.PvReattachTimeout.Duration + timeOutDuration = c.getEffectiveDrainTimeout(machine).Duration + forceDrainLabelPresent = machine.Labels["force-drain"] == "True" + nodeName = machine.Labels[v1alpha1.NodeLabelKey] + nodeNotReadyDuration = 5 * time.Minute + ReadonlyFilesystem v1.NodeConditionType = "ReadonlyFilesystem" ) + for _, condition := range machine.Status.Conditions { + if condition.Type == v1.NodeReady { + nodeReadyCondition = condition + } else if condition.Type == ReadonlyFilesystem { + readOnlyFileSystemCondition = condition + } + } // verify and log node object's existence if _, err := c.nodeLister.Get(nodeName); err == nil { @@ -2525,10 +2535,24 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M } else if apierrors.IsNotFound(err) { klog.Warningf("(drainNode) Node %q for machine %q doesn't exist, so drain will finish instantly", nodeName, machine.Name) } + + if !isConditionEmpty(nodeReadyCondition) && (nodeReadyCondition.Status != v1.ConditionTrue) && (time.Since(nodeReadyCondition.LastTransitionTime.Time) > nodeNotReadyDuration) { + message := "Setting forceDeletePods to true for drain as machine is NotReady for over 5min" + forceDeletePods = true + printLogInitError(message, &err, &description, machine, true) + } else if !isConditionEmpty(readOnlyFileSystemCondition) && (readOnlyFileSystemCondition.Status != v1.ConditionFalse) && (time.Since(readOnlyFileSystemCondition.LastTransitionTime.Time) > nodeNotReadyDuration) { + message := "Setting forceDeletePods to true for drain as machine is in ReadonlyFilesystem for over 5min" + forceDeletePods = true + printLogInitError(message, &err, &description, machine, true) + } + // TODO@thiyyakat: how to calculate timeout? In the case of preserve=now, preserveexpirytime will not coincide with time of failure in which case pods will et force // drained. - timeOutOccurred = utiltime.HasTimeOutOccurred(machine.Status.CurrentStatus.PreserveExpiryTime, timeOutDuration) - if timeOutOccurred { + // current solution: since we want to know when machine transitioned to Failed, using lastupdatetime. + // in the case of preserve=now, preserveExpiryTime is set from the time the annotation is added, and can't tell us when + // machine moved to Failed + timeOutOccurred = utiltime.HasTimeOutOccurred(machine.Status.CurrentStatus.LastUpdateTime, timeOutDuration) + if forceDrainLabelPresent || timeOutOccurred { forceDeletePods = true timeOutDuration = 1 * time.Minute maxEvictRetries = 1 diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index aab15d1bd..317eba191 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -6,6 +6,7 @@ package machineutils import ( + "k8s.io/klog/v2" "time" v1 "k8s.io/api/core/v1" @@ -142,6 +143,7 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // IsPreserveExpiryTimeSet checks if machine is preserved by MCM func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { + klog.V(3).Infof("Preserve Expiry Time: %v, machine: %s", m.Status.CurrentStatus.PreserveExpiryTime, m.Name) return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } From c0a56477ad89bbf430be45c17a2f9f51926d61f8 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 27 Nov 2025 16:11:38 +0530 Subject: [PATCH 17/79] Fix toggle between now and when-failed when machine has not failed. --- pkg/util/provider/machinecontroller/machine.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 6fd938d49..c428f1a63 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -797,7 +797,11 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a klog.V(3).Infof("TEST: preserve:%s", preserveValue) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: + // if preserve annotation value has switched from now to when-failed, then stop preservation if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { + if machineutils.IsPreserveExpiryTimeSet(updatedMachine) { + return c.stopMachinePreservation(ctx, updatedMachine) + } return machineutils.LongRetry, nil } isComplete, err := c.isMachinePreservationComplete(ctx, machine) From fcbca23492b668dc4b7a19399b47eb5a09c406a7 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 4 Dec 2025 11:43:22 +0530 Subject: [PATCH 18/79] Refactor changes to support auto-preservation of failed machines * Introduce new annotation value for preservation `PreserveMachineAnnotationValuePreservedByMCM` * Update Condition.Reason and Condition.Message to reflect preservation by user and auto-preservation * Update Machine Deployment Spec to include AutoPreservedFailedMachineMax * Modify MachineSet controller to update status with count of auto-preserved machines * Add updated CRDs and generated code --- docs/documents/apis.md | 70 +++++++++++++++++++ ...achine.sapcloud.io_machinedeployments.yaml | 6 ++ .../crds/machine.sapcloud.io_machinesets.yaml | 8 +++ pkg/apis/machine/types.go | 9 +++ pkg/apis/machine/v1alpha1/machine_types.go | 9 ++- .../v1alpha1/machinedeployment_types.go | 5 ++ pkg/apis/machine/v1alpha1/machineset_types.go | 7 ++ .../v1alpha1/zz_generated.conversion.go | 6 ++ pkg/controller/deployment_machineset_util.go | 8 +++ pkg/controller/deployment_sync.go | 9 +-- pkg/controller/machineset.go | 16 +++++ pkg/openapi/openapi_generated.go | 20 ++++++ .../machinecontroller/machine_util.go | 11 ++- pkg/util/provider/machineutils/utils.go | 4 ++ 14 files changed, 180 insertions(+), 8 deletions(-) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index f629c3f8b..c6f19d3e3 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -513,6 +513,21 @@ not be estimated during the time a MachineDeployment is paused. This is not set by default, which is treated as infinite deadline.

+ + +autoPreserveFailedMachineMax + + + +int32 + + + +(Optional) +

The maximum number of machines in the machine deployment that will be auto-preserved. +In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

+ + @@ -678,6 +693,19 @@ int32 (Optional) + + +autoPreserveFailedMachineMax + + + +int32 + + + +(Optional) + + @@ -1429,6 +1457,21 @@ not be estimated during the time a MachineDeployment is paused. This is not set by default, which is treated as infinite deadline.

+ + +autoPreserveFailedMachineMax + + + +int32 + + + +(Optional) +

The maximum number of machines in the machine deployment that will be auto-preserved. +In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

+ +
@@ -1891,6 +1934,19 @@ int32 (Optional) + + +autoPreserveFailedMachineMax + + + +int32 + + + +(Optional) + +
@@ -2029,6 +2085,20 @@ LastOperation

FailedMachines has summary of machines on which lastOperation Failed

+ + +autoPreservedFailedMachineCount + + + +int32 + + + +(Optional) +

AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved

+ +
diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index 39c59908d..555d898c4 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -67,6 +67,12 @@ spec: spec: description: Specification of the desired behavior of the MachineDeployment. properties: + autoPreserveFailedMachineMax: + description: |- + The maximum number of machines in the machine deployment that will be auto-preserved. + In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments + format: int32 + type: integer minReadySeconds: description: |- Minimum number of seconds for which a newly created machine should be ready diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index 4f7d82cd4..b76c22cc3 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -60,6 +60,9 @@ spec: spec: description: MachineSetSpec is the specification of a MachineSet. properties: + autoPreserveFailedMachineMax: + format: int32 + type: integer machineClass: description: ClassSpec is the class specification of machine properties: @@ -316,6 +319,11 @@ spec: description: MachineSetStatus holds the most recently observed status of MachineSet. properties: + autoPreservedFailedMachineCount: + description: AutoPreservedFailedMachineCount has a count of the number + of failed machines in the machineset that have been auto-preserved + format: int32 + type: integer availableReplicas: description: The number of available replicas (ready for at least minReadySeconds) for this replica set. diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index d0cf1de77..6c6487d78 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -357,6 +357,8 @@ type MachineSetSpec struct { Template MachineTemplateSpec MinReadySeconds int32 + + AutoPreserveFailedMachineMax int32 } // MachineSetConditionType is the condition on machineset object @@ -415,6 +417,9 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed FailedMachines *[]MachineSummary + + // AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + AutoPreservedFailedMachineCount int32 } // MachineSummary store the summary of machine. @@ -493,6 +498,10 @@ type MachineDeploymentSpec struct { // not be estimated during the time a MachineDeployment is paused. This is not set // by default. ProgressDeadlineSeconds *int32 + + // The maximum number of machines in the machine deployment that will be auto-preserved. + // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments + AutoPreserveFailedMachineMax int32 } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 057fd95f1..57a532949 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -248,7 +248,14 @@ const ( // NodePreserved is a node condition type for preservation of machines to allow end-user to know that a node is preserved NodePreserved corev1.NodeConditionType = "NodePreserved" - // + // NodePreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM + NodePreservedByMCM string = "PreservedByMCM" + + //NodePreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user + NodePreservedByUser string = "PreservedByUser" + + //PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful + PreservedNodeDrainSuccessful string = "PreservedNodeDrainSuccessful" ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/apis/machine/v1alpha1/machinedeployment_types.go b/pkg/apis/machine/v1alpha1/machinedeployment_types.go index 1839ad866..6cebcd1a1 100644 --- a/pkg/apis/machine/v1alpha1/machinedeployment_types.go +++ b/pkg/apis/machine/v1alpha1/machinedeployment_types.go @@ -91,6 +91,11 @@ type MachineDeploymentSpec struct { // by default, which is treated as infinite deadline. // +optional ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty"` + + // The maximum number of machines in the machine deployment that will be auto-preserved. + // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments + // +optional + AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` } const ( diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 2e6eb1d6e..6e8cf1f03 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -68,6 +68,9 @@ type MachineSetSpec struct { // +optional MinReadySeconds int32 `json:"minReadySeconds,omitempty"` + + // +optional + AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` } // MachineSetConditionType is the condition on machineset object @@ -135,4 +138,8 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed // +optional FailedMachines *[]MachineSummary `json:"failedMachines,omitempty"` + + // AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + // +optional + AutoPreservedFailedMachineCount int32 `json:"autoPreservedFailedMachineCount,omitempty"` } diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go index fcca5ee3f..8286caba6 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -660,6 +660,7 @@ func autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec out.Paused = in.Paused out.RollbackTo = (*machine.RollbackConfig)(unsafe.Pointer(in.RollbackTo)) out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -682,6 +683,7 @@ func autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec out.Paused = in.Paused out.RollbackTo = (*RollbackConfig)(unsafe.Pointer(in.RollbackTo)) out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -864,6 +866,7 @@ func autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSe return err } out.MinReadySeconds = in.MinReadySeconds + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -882,6 +885,7 @@ func autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.M return err } out.MinReadySeconds = in.MinReadySeconds + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -901,6 +905,7 @@ func autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *Machi return err } out.FailedMachines = (*[]machine.MachineSummary)(unsafe.Pointer(in.FailedMachines)) + out.AutoPreservedFailedMachineCount = in.AutoPreservedFailedMachineCount return nil } @@ -920,6 +925,7 @@ func autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machi return err } out.FailedMachines = (*[]MachineSummary)(unsafe.Pointer(in.FailedMachines)) + out.AutoPreservedFailedMachineCount = in.AutoPreservedFailedMachineCount return nil } diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index ee2250451..b41220916 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -99,6 +99,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al fullyLabeledReplicasCount := 0 readyReplicasCount := 0 availableReplicasCount := 0 + autoPreservedFailedMachineCount := 0 failedMachines := []v1alpha1.MachineSummary{} var machineSummary v1alpha1.MachineSummary @@ -124,6 +125,11 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } failedMachines = append(failedMachines, machineSummary) } + if cond := getMachineCondition(machine, v1alpha1.NodePreserved); cond != nil { + if cond.Reason == v1alpha1.NodePreservedByMCM { + autoPreservedFailedMachineCount++ + } + } } // Update the FailedMachines field when we see new failures @@ -146,6 +152,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al SetCondition(&newStatus, cond) } else if manageReplicasErr == nil && failureCond != nil { RemoveCondition(&newStatus, v1alpha1.MachineSetReplicaFailure) + } else if manageReplicasErr != nil { } newStatus.Replicas = int32(len(filteredMachines)) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 @@ -153,6 +160,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al newStatus.ReadyReplicas = int32(readyReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.AvailableReplicas = int32(availableReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.LastOperation.LastUpdateTime = metav1.Now() + newStatus.AutoPreservedFailedMachineCount = int32(autoPreservedFailedMachineCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 return newStatus } diff --git a/pkg/controller/deployment_sync.go b/pkg/controller/deployment_sync.go index b0f13d440..e5079c393 100644 --- a/pkg/controller/deployment_sync.go +++ b/pkg/controller/deployment_sync.go @@ -311,10 +311,11 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD Labels: newISTemplate.Labels, }, Spec: v1alpha1.MachineSetSpec{ - Replicas: 0, - MinReadySeconds: d.Spec.MinReadySeconds, - Selector: newISSelector, - Template: newISTemplate, + Replicas: 0, + MinReadySeconds: d.Spec.MinReadySeconds, + Selector: newISSelector, + Template: newISTemplate, + AutoPreserveFailedMachineMax: d.Spec.AutoPreserveFailedMachineMax, }, } allISs := append(oldISs, &newIS) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 281ccdd75..083159bc9 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -923,3 +923,19 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } + +func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) (*v1alpha1.Machine, error) { + updatedMachine, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM + return nil + }) + if err != nil { + return nil, fmt.Errorf("error in annotating machine %s for auto-preservation, error:%v", m.Name, err) + } + klog.V(2).Infof("Updated Machine %s/%s with auto-preserve annotation.", m.Namespace, m.Name) + return updatedMachine, nil + +} diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index 60a1d1fb0..257954fd1 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -955,6 +955,13 @@ func schema_pkg_apis_machine_v1alpha1_MachineDeploymentSpec(ref common.Reference Format: "int32", }, }, + "autoPreserveFailedMachineMax": { + SchemaProps: spec.SchemaProps{ + Description: "The maximum number of machines in the machine deployment that will be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, Required: []string{"template"}, }, @@ -1329,6 +1336,12 @@ func schema_pkg_apis_machine_v1alpha1_MachineSetSpec(ref common.ReferenceCallbac Format: "int32", }, }, + "autoPreserveFailedMachineMax": { + SchemaProps: spec.SchemaProps{ + Type: []string{"integer"}, + Format: "int32", + }, + }, }, }, }, @@ -1414,6 +1427,13 @@ func schema_pkg_apis_machine_v1alpha1_MachineSetStatus(ref common.ReferenceCallb }, }, }, + "autoPreservedFailedMachineCount": { + SchemaProps: spec.SchemaProps{ + Description: "AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved", + Type: []string{"integer"}, + Format: "int32", + }, + }, }, }, }, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index c2cd0804a..2b3b46b75 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2358,7 +2358,7 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { // preserveMachine contains logic to start the preservation of a machine and node. It syncs node annotations to the machine if the backing node exists, // or has an annotation related to preservation. // it does not sync preserve annotation values from machine to node to prevent bi-directional syncing issues. -func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { klog.V(3).Infof("TEST: Entering preserve machine flow") if !machineutils.IsPreserveExpiryTimeSet(machine) { preservedCurrentStatus := v1alpha1.CurrentStatus{ @@ -2406,6 +2406,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach Status: v1.ConditionTrue, LastTransitionTime: metav1.Now(), } + if preserveValue != machineutils.PreserveMachineAnnotationValuePreservedByMCM { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser + } else { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM + } // drain node only if machine has failed if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) @@ -2413,7 +2418,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.V(3).Infof("Error trying to get node preserved condition for machine %s: %v", machine.Name, err) return machineutils.ShortRetry, err } - if existingNodePreservedCondition == nil || existingNodePreservedCondition.Reason != v1alpha1.DrainSuccessful { + if existingNodePreservedCondition == nil || existingNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { err = c.drainPreservedNode(ctx, machine) if err != nil { klog.V(3).Infof("TEST: drain failed with error:%s", err) @@ -2428,7 +2433,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return machineutils.ShortRetry, err } klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) - newNodePreservedCondition.Reason = v1alpha1.DrainSuccessful + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful } else { klog.V(3).Infof("TEST: unnecessary entry into preserved machine %s", machine.Name) return machineutils.LongRetry, nil diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 317eba191..261fbc356 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -94,6 +94,10 @@ const ( // a Machine be preserved if and when in it enters Failed phase PreserveMachineAnnotationValueWhenFailed = "when-failed" + // PreserveMachineAnnotationValuePreservedByMCM is the annotation value used to explicitly request that + // a Machine be preserved if and when in it enters Failed phase + PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" + //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that // a Machine should not be preserved any longer, even if the expiry timeout has not been reached PreserveMachineAnnotationValueFalse = "false" From bf520e144db2813a9c73f7665ff680e07269804b Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 5 Dec 2025 15:47:47 +0530 Subject: [PATCH 19/79] Fix bugs that prevented MCS update, and auto-preservation of machines --- docs/documents/apis.md | 4 +- .../crds/machine.sapcloud.io_machinesets.yaml | 4 +- pkg/apis/machine/types.go | 4 +- pkg/apis/machine/v1alpha1/machine_types.go | 2 +- pkg/apis/machine/v1alpha1/machineset_types.go | 4 +- .../v1alpha1/zz_generated.conversion.go | 4 +- pkg/controller/deployment_machineset_util.go | 16 ++++--- pkg/controller/machineset.go | 43 ++++++++++++++++++- pkg/openapi/openapi_generated.go | 4 +- .../provider/machinecontroller/machine.go | 15 ++++--- .../machinecontroller/machine_util.go | 18 ++++---- 11 files changed, 83 insertions(+), 35 deletions(-) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index c6f19d3e3..a4e2f7628 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -2087,7 +2087,7 @@ LastOperation -autoPreservedFailedMachineCount +autoPreserveFailedMachineCount @@ -2096,7 +2096,7 @@ int32 (Optional) -

AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved

+

AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved

diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index b76c22cc3..970be4b20 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -319,8 +319,8 @@ spec: description: MachineSetStatus holds the most recently observed status of MachineSet. properties: - autoPreservedFailedMachineCount: - description: AutoPreservedFailedMachineCount has a count of the number + autoPreserveFailedMachineCount: + description: AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved format: int32 type: integer diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 6c6487d78..10b3b507e 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -418,8 +418,8 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed FailedMachines *[]MachineSummary - // AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved - AutoPreservedFailedMachineCount int32 + // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + AutoPreserveFailedMachineCount int32 } // MachineSummary store the summary of machine. diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 57a532949..0ce3eb483 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -255,7 +255,7 @@ const ( NodePreservedByUser string = "PreservedByUser" //PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful - PreservedNodeDrainSuccessful string = "PreservedNodeDrainSuccessful" + PreservedNodeDrainSuccessful string = "Preserved Node Drained Successfully" ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 6e8cf1f03..8cd73d58e 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -139,7 +139,7 @@ type MachineSetStatus struct { // +optional FailedMachines *[]MachineSummary `json:"failedMachines,omitempty"` - // AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved // +optional - AutoPreservedFailedMachineCount int32 `json:"autoPreservedFailedMachineCount,omitempty"` + AutoPreserveFailedMachineCount int32 `json:"autoPreserveFailedMachineCount,omitempty"` } diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go index 8286caba6..1990c2f03 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -905,7 +905,7 @@ func autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *Machi return err } out.FailedMachines = (*[]machine.MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreservedFailedMachineCount = in.AutoPreservedFailedMachineCount + out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount return nil } @@ -925,7 +925,7 @@ func autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machi return err } out.FailedMachines = (*[]MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreservedFailedMachineCount = in.AutoPreservedFailedMachineCount + out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount return nil } diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index b41220916..fc9fcb7b9 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -48,7 +48,8 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin is.Status.AvailableReplicas == newStatus.AvailableReplicas && is.Generation == is.Status.ObservedGeneration && reflect.DeepEqual(is.Status.Conditions, newStatus.Conditions) && - reflect.DeepEqual(is.Status.FailedMachines, newStatus.FailedMachines) { + reflect.DeepEqual(is.Status.FailedMachines, newStatus.FailedMachines) && + is.Status.AutoPreserveFailedMachineCount == newStatus.AutoPreserveFailedMachineCount { return is, nil } @@ -66,7 +67,8 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin fmt.Sprintf("fullyLabeledReplicas %d->%d, ", is.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas)+ fmt.Sprintf("readyReplicas %d->%d, ", is.Status.ReadyReplicas, newStatus.ReadyReplicas)+ fmt.Sprintf("availableReplicas %d->%d, ", is.Status.AvailableReplicas, newStatus.AvailableReplicas)+ - fmt.Sprintf("sequence No: %v->%v", is.Status.ObservedGeneration, newStatus.ObservedGeneration)) + fmt.Sprintf("sequence No: %v->%v", is.Status.ObservedGeneration, newStatus.ObservedGeneration)+ + fmt.Sprintf("autoPreserveFailedMachineCount %v->%v", is.Status.AutoPreserveFailedMachineCount, newStatus.AutoPreserveFailedMachineCount)) is.Status = newStatus updatedIS, updateErr = c.UpdateStatus(ctx, is, metav1.UpdateOptions{}) @@ -78,7 +80,7 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin if i >= statusUpdateRetries { break } - // Update the MachineSet with the latest resource veision for the next poll + // Update the MachineSet with the latest resource version for the next poll if is, getErr = c.Get(ctx, is.Name, metav1.GetOptions{}); getErr != nil { // If the GET fails we can't trust status.Replicas anymore. This error // is bound to be more interesting than the update failure. @@ -99,7 +101,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al fullyLabeledReplicasCount := 0 readyReplicasCount := 0 availableReplicasCount := 0 - autoPreservedFailedMachineCount := 0 + autoPreserveFailedMachineCount := 0 failedMachines := []v1alpha1.MachineSummary{} var machineSummary v1alpha1.MachineSummary @@ -127,7 +129,8 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } if cond := getMachineCondition(machine, v1alpha1.NodePreserved); cond != nil { if cond.Reason == v1alpha1.NodePreservedByMCM { - autoPreservedFailedMachineCount++ + autoPreserveFailedMachineCount++ + klog.V(3).Infof("TEST: incrementing autoPreserveFailedMachineCount to %v", autoPreserveFailedMachineCount) } } } @@ -152,7 +155,6 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al SetCondition(&newStatus, cond) } else if manageReplicasErr == nil && failureCond != nil { RemoveCondition(&newStatus, v1alpha1.MachineSetReplicaFailure) - } else if manageReplicasErr != nil { } newStatus.Replicas = int32(len(filteredMachines)) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 @@ -160,7 +162,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al newStatus.ReadyReplicas = int32(readyReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.AvailableReplicas = int32(availableReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.LastOperation.LastUpdateTime = metav1.Now() - newStatus.AutoPreservedFailedMachineCount = int32(autoPreservedFailedMachineCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 + newStatus.AutoPreserveFailedMachineCount = int32(autoPreserveFailedMachineCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 return newStatus } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 083159bc9..fe7c5a1f2 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -480,6 +480,35 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 return nil } +// isMachineCandidateForPreservation checks if the machine is already preserved, in the process of being preserved +// or if it is a candidate for auto-preservation +// TODO@thiyyakat: find more suitable name for function +func (c *controller) isMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { + klog.V(2).Infof("TEST: machineutils.IsPreserveExpiryTimeSet(m): %v,machineutils.HasPreservationTimedOut(m):%v", machineutils.IsPreserveExpiryTimeSet(machine), machineutils.HasPreservationTimedOut(machine)) + if machineutils.IsPreserveExpiryTimeSet(machine) && !machineutils.HasPreservationTimedOut(machine) { + klog.V(3).Infof("Machine %s is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return true, nil + } + val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + if exists { + switch val { + case machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValueNow: // this is in case preservation process is not complete yet + return true, nil + case machineutils.PreserveMachineAnnotationValueFalse: + return false, nil + } + } + if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { + klog.V(2).Infof("TEST:marking machine %s for autopreservation", machine.Name) + _, err := c.annotateMachineForAutoPreservation(ctx, machine) + if err != nil { + return true, err + } + return true, nil + } + return false, nil +} + // syncMachineSet will sync the MachineSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its machines created or deleted. This function is not meant to be // invoked concurrently with the same key. @@ -925,6 +954,16 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. } func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) (*v1alpha1.Machine, error) { + if m.Labels[v1alpha1.NodeLabelKey] != "" { + // check if backing node has preserve=false annotation, if yes, do not auto-preserve + node, err := dc.nodeLister.Get(m.Labels[v1alpha1.NodeLabelKey]) + if err != nil { + return nil, err + } + if val, exists := node.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueFalse { + return nil, nil + } + } updatedMachine, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { if clone.Annotations == nil { clone.Annotations = make(map[string]string) @@ -933,9 +972,9 @@ func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m return nil }) if err != nil { - return nil, fmt.Errorf("error in annotating machine %s for auto-preservation, error:%v", m.Name, err) + return nil, err } - klog.V(2).Infof("Updated Machine %s/%s with auto-preserve annotation.", m.Namespace, m.Name) + klog.V(2).Infof("Updated machine %s with auto-preserve annotation.", m.Name) return updatedMachine, nil } diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index 257954fd1..b0cd12043 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -1427,9 +1427,9 @@ func schema_pkg_apis_machine_v1alpha1_MachineSetStatus(ref common.ReferenceCallb }, }, }, - "autoPreservedFailedMachineCount": { + "autoPreserveFailedMachineCount": { SchemaProps: spec.SchemaProps{ - Description: "AutoPreservedFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved", + Description: "AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved", Type: []string{"integer"}, Format: "int32", }, diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index c428f1a63..1b6bd5279 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -274,6 +274,11 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp } } + retry, err = c.manageMachinePreservation(ctx, machine) + if err != nil { + return retry, err + } + if machine.Spec.ProviderID == "" || machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff { return c.triggerCreationFlow( ctx, @@ -796,7 +801,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } klog.V(3).Infof("TEST: preserve:%s", preserveValue) switch preserveValue { - case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed: + case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValuePreservedByMCM: // if preserve annotation value has switched from now to when-failed, then stop preservation if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { if machineutils.IsPreserveExpiryTimeSet(updatedMachine) { @@ -804,12 +809,12 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } return machineutils.LongRetry, nil } - isComplete, err := c.isMachinePreservationComplete(ctx, machine) + isComplete, err := c.isMachinePreservationComplete(machine) if err != nil { return machineutils.ShortRetry, err } if !isComplete { - return c.preserveMachine(ctx, machine) + return c.preserveMachine(ctx, machine, preserveValue) } if hasMachinePreservationTimedOut(machine) { return c.stopMachinePreservation(ctx, updatedMachine) @@ -865,7 +870,7 @@ func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, m return machine, mAnnotationValue, mExists, nil } -func (c *controller) isMachinePreservationComplete(ctx context.Context, machine *v1alpha1.Machine) (bool, error) { +func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine) (bool, error) { // if PreserveExpiryTime is set and machine has not failed, then yes, // if PreserveExpiryTime is set and machine has failed, the node condition must be there saying drain successful // if PreserveExpiryTime is not set, then no @@ -878,7 +883,7 @@ func (c *controller) isMachinePreservationComplete(ctx context.Context, machine return false, err } if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil { - if cond.Reason == v1alpha1.DrainSuccessful { + if cond.Message == v1alpha1.PreservedNodeDrainSuccessful { return true, nil } } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 2b3b46b75..f267778ad 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2176,7 +2176,11 @@ func (c *controller) canMarkMachineFailed(machineDeployName, machineName, namesp for _, machine := range machineList { if machine.Status.CurrentStatus.Phase != v1alpha1.MachineUnknown && machine.Status.CurrentStatus.Phase != v1alpha1.MachineRunning { - inProgress++ + // since Preserved Failed machines are not replaced immediately, + // they need not be considered towards inProgress + if !machineutils.IsPreserveExpiryTimeSet(machine) { + inProgress++ + } switch machine.Status.CurrentStatus.Phase { case v1alpha1.MachineTerminating: terminating++ @@ -2391,7 +2395,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach CAScaleDownAnnotation := make(map[string]string) CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) - updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err @@ -2418,7 +2422,10 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.V(3).Infof("Error trying to get node preserved condition for machine %s: %v", machine.Name, err) return machineutils.ShortRetry, err } - if existingNodePreservedCondition == nil || existingNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { + if existingNodePreservedCondition != nil && existingNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { + klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + } else { err = c.drainPreservedNode(ctx, machine) if err != nil { klog.V(3).Infof("TEST: drain failed with error:%s", err) @@ -2432,11 +2439,6 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } return machineutils.ShortRetry, err } - klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful - } else { - klog.V(3).Infof("TEST: unnecessary entry into preserved machine %s", machine.Name) - return machineutils.LongRetry, nil } } if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { From 37ef7fa2b952b5b662c47ef5eff192cf9fb5ca85 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 8 Dec 2025 16:44:47 +0530 Subject: [PATCH 20/79] Add support for uncordoning preserved node that is healthy --- .../provider/machinecontroller/machine.go | 21 ++++++++++++++++--- .../machinecontroller/machine_util.go | 15 +++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 1b6bd5279..0baa292cf 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -231,13 +231,28 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp return machineutils.ShortRetry, err } if cond := nodeops.GetCondition(node, corev1.NodeNetworkUnavailable); cond.Status != corev1.ConditionTrue { - cond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionTrue} - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), cond) + newCond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionTrue} + err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newCond) if err != nil { klog.V(2).Infof("TEST:Machine %q: Failed to change node condition %q: %v", machine.Name, machine.Name, err) return machineutils.ShortRetry, err } - klog.V(2).Infof("TEST:Machine %q: updated node %q condition", machine.Name, machine.Name) + klog.V(2).Infof("TEST: marked nodenetwork as unavailable for machine %s", machine.Name) + } + } else if machine.Labels["test-failed"] == "false" { + node, err := c.nodeLister.Get(getNodeName(machine)) + if err != nil { + klog.V(3).Infof("TEST:Machine %q: Failed to get node %q: %v", machine.Name, machine.Name, err) + return machineutils.ShortRetry, err + } + if cond := nodeops.GetCondition(node, corev1.NodeNetworkUnavailable); cond.Status != corev1.ConditionFalse { + newCond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionFalse} + err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newCond) + if err != nil { + klog.V(2).Infof("TEST:Machine %q: Failed to change node condition %q: %v", machine.Name, machine.Name, err) + return machineutils.ShortRetry, err + } + klog.V(2).Infof("TEST: marked nodenetwork as available %s", machine.Name) } } } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index f267778ad..dda97a607 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -983,6 +983,19 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.Warning(err) } } else { + // if machine was auto-preserved (which means it is in Failed phase), stop preservation + if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil && machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + if cond.Reason == v1alpha1.NodePreservedByMCM { + // need to uncordon node + nodeCopy := node.DeepCopy() + nodeCopy.Spec.Unschedulable = false + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + return machineutils.ShortRetry, err + } + klog.V(3).Infof("TEST: preserved node uncordoned successfully %s", machine.Name) + } + } // Machine rejoined the cluster after a health-check description = fmt.Sprintf("Machine %s successfully re-joined the cluster", clone.Name) lastOperationType = v1alpha1.MachineOperationHealthCheck @@ -1027,6 +1040,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } } } + } if !cloneDirty && (machine.Status.CurrentStatus.Phase == v1alpha1.MachineInPlaceUpdating || @@ -2455,6 +2469,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp if !machineutils.IsPreserveExpiryTimeSet(machine) { return machineutils.LongRetry, nil } + klog.V(3).Infof("TEST: stopping preservation machine %q", machine.Name) // if backing node exists, remove annotations that would prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] From 37caeb71e087bef8d051a59844b0531ff33ef530 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 10 Dec 2025 12:19:17 +0530 Subject: [PATCH 21/79] Refactor code: * Split larger functions into smaller ones * Remove debug comments * Add comments where required --- pkg/apis/machine/v1alpha1/machine_types.go | 5 +- pkg/controller/deployment_machineset_util.go | 1 - pkg/controller/machineset.go | 2 - pkg/util/nodeops/conditions.go | 6 +- .../provider/machinecontroller/machine.go | 91 +++---- .../machinecontroller/machine_test.go | 8 +- .../machinecontroller/machine_util.go | 235 +++++++++++------- pkg/util/provider/machineutils/utils.go | 2 - 8 files changed, 199 insertions(+), 151 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 0ce3eb483..8fe3d75fa 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -255,7 +255,10 @@ const ( NodePreservedByUser string = "PreservedByUser" //PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful - PreservedNodeDrainSuccessful string = "Preserved Node Drained Successfully" + PreservedNodeDrainSuccessful string = "Preserved Node drained successfully" + + //PreservedNodeDrainUnsuccessful is a constant for the message in condition that indicates that the preserved node's drain was not successful + PreservedNodeDrainUnsuccessful string = "Preserved Node could not be drained" ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index fc9fcb7b9..82475f405 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -130,7 +130,6 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al if cond := getMachineCondition(machine, v1alpha1.NodePreserved); cond != nil { if cond.Reason == v1alpha1.NodePreservedByMCM { autoPreserveFailedMachineCount++ - klog.V(3).Infof("TEST: incrementing autoPreserveFailedMachineCount to %v", autoPreserveFailedMachineCount) } } } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index fe7c5a1f2..163f7f894 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -484,7 +484,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 // or if it is a candidate for auto-preservation // TODO@thiyyakat: find more suitable name for function func (c *controller) isMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { - klog.V(2).Infof("TEST: machineutils.IsPreserveExpiryTimeSet(m): %v,machineutils.HasPreservationTimedOut(m):%v", machineutils.IsPreserveExpiryTimeSet(machine), machineutils.HasPreservationTimedOut(machine)) if machineutils.IsPreserveExpiryTimeSet(machine) && !machineutils.HasPreservationTimedOut(machine) { klog.V(3).Infof("Machine %s is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true, nil @@ -499,7 +498,6 @@ func (c *controller) isMachineCandidateForPreservation(ctx context.Context, mach } } if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { - klog.V(2).Infof("TEST:marking machine %s for autopreservation", machine.Name) _, err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { return true, err diff --git a/pkg/util/nodeops/conditions.go b/pkg/util/nodeops/conditions.go index b02257d73..81b4b38c3 100644 --- a/pkg/util/nodeops/conditions.go +++ b/pkg/util/nodeops/conditions.go @@ -92,15 +92,13 @@ func AddOrUpdateConditionsOnNode(ctx context.Context, c clientset.Interface, nod } // UpdateNodeConditions is for updating the node conditions from oldNode to the newNode -// using the nodes Update() method +// using the node's UpdateStatus() method func UpdateNodeConditions(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { newNodeClone := oldNode.DeepCopy() newNodeClone.Status.Conditions = newNode.Status.Conditions - _, err := c.CoreV1().Nodes().UpdateStatus(ctx, newNodeClone, metav1.UpdateOptions{}) if err != nil { - return fmt.Errorf("failed to create update conditions for node %q: %v", nodeName, err) + return fmt.Errorf("failed to create/update conditions on node %q: %v", nodeName, err) } - return nil } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 0baa292cf..9166ab80f 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -802,19 +802,16 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // - failed machine, autoPreserveMax not breached, must be preserved // - failed machine, already preserved, check for timeout // -// Auto-preserve case will have to be handled where machine moved from Unknown to Failed +// manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - klog.V(3).Infof("TEST: entering manageMachinePreservation ") - // check effective preservation value based on node's and machine's annotations. + // get effective preservation value based on node's and machine's annotations. updatedMachine, preserveValue, exists, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) if err != nil { - klog.Errorf("Error getting preserve annotation value for machine %q: %s", machine.Name, err) return machineutils.ShortRetry, err } if !exists { return machineutils.LongRetry, nil } - klog.V(3).Infof("TEST: preserve:%s", preserveValue) switch preserveValue { case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValuePreservedByMCM: // if preserve annotation value has switched from now to when-failed, then stop preservation @@ -845,44 +842,45 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return machineutils.LongRetry, nil } +// syncEffectivePreserveAnnotationValue finds the preservation annotation value by considering both node and machine objects +// if the backing node is annotated with preserve annotation, the preserve value will be synced to the machine +// if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured +// if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, string, bool, error) { - var effectivePreserveAnnotationValue string mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - // node annotation value, if exists, will always override and overwrite machine annotation value for preserve - if machine.Labels[v1alpha1.NodeLabelKey] != "" { - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) - return machine, "", false, err - } - nAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] - switch { - case nExists && mExists: - if nAnnotationValue == mAnnotationValue { - return machine, nAnnotationValue, true, nil - } - effectivePreserveAnnotationValue = nAnnotationValue - case nExists && !mExists: - effectivePreserveAnnotationValue = nAnnotationValue - case mExists && !nExists: - return machine, mAnnotationValue, true, nil - case !nExists && !mExists: - return machine, "", false, nil - } - clone := machine.DeepCopy() - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } - clone.Annotations[machineutils.PreserveMachineAnnotationKey] = effectivePreserveAnnotationValue - updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error updating machine with preserve annotations %q: %v", machine.Name, err) - return machine, "", true, err - } - return updatedMachine, effectivePreserveAnnotationValue, true, nil + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName == "" { + return machine, mAnnotationValue, mExists, nil + } + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q: %v", nodeName, err) + return machine, "", false, err + } + nAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] + switch { + case nExists && mExists: + if nAnnotationValue == mAnnotationValue { + return machine, nAnnotationValue, nExists, nil + } // else falls through to update machine with node's value + case nExists && !mExists: + // falls through to update machine with node's value + case mExists && !nExists: + return machine, mAnnotationValue, mExists, nil + case !nExists && !mExists: + return machine, "", false, nil + } + clone := machine.DeepCopy() + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[machineutils.PreserveMachineAnnotationKey] = nAnnotationValue + updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error updating machine %q with preserve annotation %q: %v", machine.Name, nAnnotationValue, err) + return machine, "", true, err } - return machine, mAnnotationValue, mExists, nil + return updatedMachine, nAnnotationValue, nExists, nil } func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine) (bool, error) { @@ -892,6 +890,9 @@ func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine) (b if !machineutils.IsPreserveExpiryTimeSet(machine) { return false, nil } else if machineutils.IsMachineFailed(machine) { + if machine.Labels[v1alpha1.NodeLabelKey] == "" { + return true, nil + } node, err := c.nodeLister.Get(getNodeName(machine)) if err != nil { klog.Errorf("error trying to get node %q: %v", getNodeName(machine), err) @@ -906,13 +907,3 @@ func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine) (b } return true, nil } - -// getMachineDeploymentForMachine returns the machine deployment for a given machine -func (c *controller) getMachineDeploymentForMachine(machine *v1alpha1.Machine) (*v1alpha1.MachineDeployment, error) { - machineDeploymentName := getMachineDeploymentName(machine) - machineDeployment, err := c.controlMachineClient.MachineDeployments(c.namespace).Get(context.TODO(), machineDeploymentName, metav1.GetOptions{ - TypeMeta: metav1.TypeMeta{}, - ResourceVersion: "", - }) - return machineDeployment, err -} diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 76214eac7..d5c5b5925 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -2782,7 +2782,7 @@ var _ = Describe("machine", func() { }, }, expect: expect{ - err: fmt.Errorf("failed to create update conditions for node \"fakeID-0\": Failed to update node"), + err: fmt.Errorf("failed to create/update conditions on node \"fakeID-0\": Failed to update node"), retry: machineutils.ShortRetry, machine: newMachine( &v1alpha1.MachineTemplateSpec{ @@ -2801,7 +2801,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", "failed to create update conditions for node \"fakeID-0\": Failed to update node", machineutils.InitiateDrain), + Description: fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", "failed to create/update conditions on node \"fakeID-0\": Failed to update node", machineutils.InitiateDrain), State: v1alpha1.MachineStateFailed, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), @@ -2996,7 +2996,7 @@ var _ = Describe("machine", func() { }, }, expect: expect{ - err: fmt.Errorf("failed to create update conditions for node \"fakeNode-0\": Failed to update node"), + err: fmt.Errorf("failed to create/update conditions on node \"fakeNode-0\": Failed to update node"), retry: machineutils.ShortRetry, machine: newMachine( &v1alpha1.MachineTemplateSpec{ @@ -3015,7 +3015,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", "failed to create update conditions for node \"fakeNode-0\": Failed to update node", machineutils.InitiateDrain), + Description: fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", "failed to create/update conditions on node \"fakeNode-0\": Failed to update node", machineutils.InitiateDrain), State: v1alpha1.MachineStateFailed, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index dda97a607..4460a553d 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -993,7 +993,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if err != nil { return machineutils.ShortRetry, err } - klog.V(3).Infof("TEST: preserved node uncordoned successfully %s", machine.Name) } } // Machine rejoined the cluster after a health-check @@ -2373,153 +2372,216 @@ func (c *controller) fetchMatchingNodeName(machineName string) (string, error) { return "", fmt.Errorf("machine %q not found in node lister for machine %q", machineName, machineName) } -// preserveMachine contains logic to start the preservation of a machine and node. It syncs node annotations to the machine if the backing node exists, -// or has an annotation related to preservation. -// it does not sync preserve annotation values from machine to node to prevent bi-directional syncing issues. +/* +SECTION +Utility Functions for Machine Preservation +*/ + +// preserveMachine contains logic to start the preservation of a machine and node. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { - klog.V(3).Infof("TEST: Entering preserve machine flow") + // Step 1: Set PreserveExpiryTime if !machineutils.IsPreserveExpiryTimeSet(machine) { - preservedCurrentStatus := v1alpha1.CurrentStatus{ - Phase: machine.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), - } - clone := machine.DeepCopy() - clone.Status.CurrentStatus = preservedCurrentStatus - _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + _, retry, err := c.setPreserveExpiryTime(ctx, machine) if err != nil { - klog.Warningf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - return machineutils.ConflictRetry, err + return retry, err } - klog.V(3).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) } - klog.V(3).Infof("TEST: preserveexpiry set for machine %q", machine.Name) - if machine.Labels[v1alpha1.NodeLabelKey] != "" { - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - node, err := c.nodeLister.Get(nodeName) + if machine.Labels[v1alpha1.NodeLabelKey] == "" { + return machineutils.LongRetry, nil + } + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + return machineutils.ShortRetry, err + } + // Step 2: Add annotations to prevent scale down of node by CA + _, retry, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) + if err != nil { + return retry, err + } + existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) + if err != nil { + klog.Errorf("error trying to get existing node preserved condition for node %q of machine %q: %v", nodeName, machine.Name, err) + return machineutils.ShortRetry, err + } + drainSuccessful := false + // Step 3: If machine is in Failed Phase, drain the backing node + if c.shouldNodeBeDrained(machine, existingNodePreservedCondition) { + err = c.drainPreservedNode(ctx, machine) if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) + retry, err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) + if err != nil { + return retry, err + } return machineutils.ShortRetry, err } - nodeCopy := node.DeepCopy() - // not updating node's preserve annotations here in case operator is manipulating machine annotations only - // if node annotation is updated, machine annotation will be overwritten with this value even if operator wants it to change - // function never returns error, can be ignored - if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { - CAScaleDownAnnotation := make(map[string]string) - CAScaleDownAnnotation[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - klog.Errorf("node UPDATE failed for machine %s with node %s. Retrying, error: %s", machine.Name, nodeName, err) - return machineutils.ShortRetry, err + drainSuccessful = true + } + // Step 4: Update NodePreserved Condition on Node + retry, err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) + if err != nil { + return retry, err + } + klog.V(3).Infof("Machine %s preserved successfully.", machine.Name) + return machineutils.LongRetry, nil +} + +// setPreserveExpiryTime sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout +func (c *controller) setPreserveExpiryTime(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, machineutils.RetryPeriod, error) { + preservedCurrentStatus := v1alpha1.CurrentStatus{ + Phase: machine.Status.CurrentStatus.Phase, + TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + } + clone := machine.DeepCopy() + clone.Status.CurrentStatus = preservedCurrentStatus + _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + if apierrors.IsConflict(err) { + return nil, machineutils.ConflictRetry, err + } + return nil, machineutils.ShortRetry, err + } + klog.V(4).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) + return clone, machineutils.LongRetry, nil +} + +// addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node +func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, machineutils.RetryPeriod, error) { + nodeCopy := node.DeepCopy() + if nodeCopy.Annotations == nil || nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { + CAScaleDownAnnotation := map[string]string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + } + updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error trying to update CA annotation on node %q: %v", updatedNode.Name, err) + if apierrors.IsConflict(err) { + return nil, machineutils.ConflictRetry, err } - klog.V(2).Infof("Updated CA annotations for node %s, for machine %q, successfully", nodeCopy.Name, machine.Name) + return nil, machineutils.ShortRetry, err } - newNodePreservedCondition := v1.NodeCondition{ + return updatedNode, machineutils.LongRetry, nil + } + return node, machineutils.LongRetry, nil +} + +// getNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation +func (c *controller) updateNodePreservedCondition(ctx context.Context, machine *v1alpha1.Machine, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (machineutils.RetryPeriod, error) { + var newNodePreservedCondition *v1.NodeCondition + var changed bool + if existingNodeCondition == nil { + newNodePreservedCondition = &v1.NodeCondition{ Type: v1alpha1.NodePreserved, - Status: v1.ConditionTrue, + Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } + changed = true + } else { + newNodePreservedCondition = existingNodeCondition.DeepCopy() + } + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + if drainSuccessful { + if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + newNodePreservedCondition.Status = v1.ConditionTrue + changed = true + } + } else if newNodePreservedCondition.Status != v1.ConditionFalse { + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + newNodePreservedCondition.Status = v1.ConditionFalse + changed = true + } + } else if newNodePreservedCondition.Status != v1.ConditionTrue { + newNodePreservedCondition.Status = v1.ConditionTrue + changed = true + } + if changed { if preserveValue != machineutils.PreserveMachineAnnotationValuePreservedByMCM { newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser } else { newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM } - // drain node only if machine has failed - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) - if err != nil { - klog.V(3).Infof("Error trying to get node preserved condition for machine %s: %v", machine.Name, err) - return machineutils.ShortRetry, err - } - if existingNodePreservedCondition != nil && existingNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { - klog.V(2).Infof("TEST: drainPreservedNode Successful %s", machine.Name) - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful - } else { - err = c.drainPreservedNode(ctx, machine) - if err != nil { - klog.V(3).Infof("TEST: drain failed with error:%s", err) - // drain not successful, retry - // if node condition of NodePreserved is not set, set it: - if existingNodePreservedCondition == nil { - if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { - klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) - return machineutils.ShortRetry, err - } - } - return machineutils.ShortRetry, err - } + if err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *newNodePreservedCondition); err != nil { + klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", getNodeName(machine), machine.Name, err) + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err } - } - if err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newNodePreservedCondition); err != nil { - klog.V(3).Infof("TEST: updating node with preserved condition failed: %s", err) return machineutils.ShortRetry, err } - klog.V(3).Infof("TEST: updating machine %q with new node condition was successful", machine.Name) } return machineutils.LongRetry, nil } +// shouldNodeBeDrained returns true if the machine's backing node must be drained, else false +func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCondition *v1.NodeCondition) bool { + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + if existingCondition == nil { + return true + } + if existingCondition.Message == v1alpha1.PreservedNodeDrainSuccessful { + return false + } else { + return true + } + } + return false +} + +// stopMachinePreservation stops the preservation of the machine and node func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { // check if preserveExpiryTime is set, if not, no need to do anything if !machineutils.IsPreserveExpiryTimeSet(machine) { return machineutils.LongRetry, nil } - klog.V(3).Infof("TEST: stopping preservation machine %q", machine.Name) - // if backing node exists, remove annotations that would prevent scale down by autoscaler if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get backing node %q for machine %s. error: %v", nodeName, machine.Name, err) - return machineutils.ShortRetry, err - } preservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), preservedCondition) + // Step 1: if backing node exists, change node condition to reflect that preservation has stopped + err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), preservedCondition) if err != nil { - klog.Warningf("Node/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return machineutils.ShortRetry, err } - klog.V(3).Infof("Updated Node Condition NodePreserved for node %s, for machine %q, successfully", node.Name, machine.Name) - // remove CA annotation from node, values do not matter here + // Step 2: remove CA's scale-down disabled annotations to allow CA to scale down node if needed CAAnnotations := make(map[string]string) CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" latestNode, err := c.targetCoreClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { - klog.Errorf("error trying to get backing node %q for machine %s. error: %v", nodeName, machine.Name, err) + klog.Errorf("error trying to get backing node %q for machine %s. Retrying, error: %v", nodeName, machine.Name, err) return machineutils.ShortRetry, err } latestNodeCopy := latestNode.DeepCopy() latestNodeCopy, _, _ = annotations.RemoveAnnotation(latestNodeCopy, CAAnnotations) // error can be ignored, always returns nil _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, latestNodeCopy, metav1.UpdateOptions{}) if err != nil { - klog.Warningf("Node UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + klog.Errorf("Node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } return machineutils.ShortRetry, err } } + // Step 3: update machine status to set preserve expiry time to metav1.Time{} clone := machine.DeepCopy() clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: machine.Status.CurrentStatus.Phase, + Phase: clone.Status.CurrentStatus.Phase, LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.Time{}, } _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { - klog.Warningf("Machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return machineutils.ConflictRetry, err } - klog.V(3).Infof("Machine status updated to stop preservation for machine %q", clone.Name) + klog.V(3).Infof("Machine status updated to stop preservation for machine %q", machine.Name) return machineutils.LongRetry, nil } @@ -2568,11 +2630,9 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M printLogInitError(message, &err, &description, machine, true) } - // TODO@thiyyakat: how to calculate timeout? In the case of preserve=now, preserveexpirytime will not coincide with time of failure in which case pods will et force + // TODO@thiyyakat: how to calculate timeout? In the case of preserve=now, PreserveExpiryTime will not coincide with time of failure in which case pods will get force // drained. - // current solution: since we want to know when machine transitioned to Failed, using lastupdatetime. - // in the case of preserve=now, preserveExpiryTime is set from the time the annotation is added, and can't tell us when - // machine moved to Failed + // current solution: since we want to know when machine transitioned to Failed, the code uses LastUpdateTime. timeOutOccurred = utiltime.HasTimeOutOccurred(machine.Status.CurrentStatus.LastUpdateTime, timeOutDuration) if forceDrainLabelPresent || timeOutOccurred { forceDeletePods = true @@ -2637,6 +2697,7 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M return nil } +// hasMachinePreservationTimedOut returns true if preserve expiry time has lapsed func hasMachinePreservationTimedOut(machine *v1alpha1.Machine) bool { return machineutils.IsPreserveExpiryTimeSet(machine) && metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 261fbc356..56f9c13c7 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -6,7 +6,6 @@ package machineutils import ( - "k8s.io/klog/v2" "time" v1 "k8s.io/api/core/v1" @@ -147,7 +146,6 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // IsPreserveExpiryTimeSet checks if machine is preserved by MCM func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { - klog.V(3).Infof("Preserve Expiry Time: %v, machine: %s", m.Status.CurrentStatus.PreserveExpiryTime, m.Name) return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() } From e47153edbb47c1095c66080099a539fb2f97d2b6 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 10 Dec 2025 13:27:04 +0530 Subject: [PATCH 22/79] Fix bug so that recovered preserved nodes are uncordoned --- .../provider/machinecontroller/machine_util.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 4460a553d..652248f99 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -983,17 +983,15 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.Warning(err) } } else { - // if machine was auto-preserved (which means it is in Failed phase), stop preservation + // if machine was preserved (which means it is in Failed phase), uncordon node so that pods can be scheduled on it again if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil && machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - if cond.Reason == v1alpha1.NodePreservedByMCM { - // need to uncordon node - nodeCopy := node.DeepCopy() - nodeCopy.Spec.Unschedulable = false - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - return machineutils.ShortRetry, err - } + nodeCopy := node.DeepCopy() + nodeCopy.Spec.Unschedulable = false + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + return machineutils.ShortRetry, err } + } // Machine rejoined the cluster after a health-check description = fmt.Sprintf("Machine %s successfully re-joined the cluster", clone.Name) From de3f92f9be071d66cef78d7225f0552c9d7e1f8e Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 10 Dec 2025 15:01:40 +0530 Subject: [PATCH 23/79] Minor changes --- Makefile | 6 +++--- pkg/controller/machineset.go | 3 --- pkg/util/provider/machinecontroller/machine_util.go | 7 ++----- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 6b0f4f913..aba0236ac 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @GOFLAGS="-buildvcs=false" ./hack/generate-code - @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh + $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @./hack/generate-code + @./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 163f7f894..4c41eb560 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -711,9 +711,6 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al // at all times, replica count will be upheld, even if it means deletion of a pending machine // TODO@thiyyakat: write unit test for this scenario filteredMachines = prioritisePreservedMachines(filteredMachines) - - fmt.Printf("len(filteredMachines)=%d, diff=%d\n", len(filteredMachines), diff) - } return filteredMachines[:diff] } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 652248f99..e5d88125d 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -212,7 +212,6 @@ func nodeConditionsHaveChanged(oldConditions []v1.NodeCondition, newConditions [ if !exists || (oldC.Status != c.Status) || (c.Type == v1alpha1.NodeInPlaceUpdate && oldC.Reason != c.Reason) { addedOrUpdatedConditions = append(addedOrUpdatedConditions, c) } - } // checking for any deleted condition @@ -991,7 +990,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph if err != nil { return machineutils.ShortRetry, err } - } // Machine rejoined the cluster after a health-check description = fmt.Sprintf("Machine %s successfully re-joined the cluster", clone.Name) @@ -1037,7 +1035,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } } } - } if !cloneDirty && (machine.Status.CurrentStatus.Phase == v1alpha1.MachineInPlaceUpdating || @@ -1172,7 +1169,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } if cloneDirty { - updatedMachine, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { // Keep retrying across reconciles until update goes through klog.Errorf("Update of Phase/Conditions failed for machine %q. Retrying, error: %q", machine.Name, err) @@ -1181,7 +1178,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } return machineutils.ShortRetry, err } else { - klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", updatedMachine.Name, getProviderID(updatedMachine), getNodeName(updatedMachine)) + klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", machine.Name, getProviderID(machine), getNodeName(machine)) err = errSuccessfulPhaseUpdate } return machineutils.ShortRetry, err From 2a51c1e6a7968046e145ad191fda857b10c9c8d8 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 10 Dec 2025 15:48:56 +0530 Subject: [PATCH 24/79] Change verb used in log statements for machine/node name --- pkg/controller/machineset.go | 4 ++-- pkg/util/provider/machinecontroller/machine.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 4c41eb560..45b9b5b73 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -485,7 +485,7 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 // TODO@thiyyakat: find more suitable name for function func (c *controller) isMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { if machineutils.IsPreserveExpiryTimeSet(machine) && !machineutils.HasPreservationTimedOut(machine) { - klog.V(3).Infof("Machine %s is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + klog.V(3).Infof("Machine %q is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true, nil } val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] @@ -969,7 +969,7 @@ func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m if err != nil { return nil, err } - klog.V(2).Infof("Updated machine %s with auto-preserve annotation.", m.Name) + klog.V(2).Infof("Updated machine %q with auto-preserved annotation.", m.Name) return updatedMachine, nil } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 9166ab80f..d09b8f7e5 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -77,7 +77,7 @@ func (c *controller) updateMachine(oldObj, newObj any) { } if oldMachine.Generation == newMachine.Generation { - klog.V(3).Infof("Skipping other non-spec updates for machine %s", oldMachine.Name) + klog.V(3).Infof("Skipping other non-spec updates for machine %q", oldMachine.Name) return } From 95438c10ba87ba048297d8d9ec92e67f309a3427 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 10 Dec 2025 16:57:17 +0530 Subject: [PATCH 25/79] Fix mistake made during rebasing --- .../provider/machinecontroller/machine_util.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index e5d88125d..f99635a67 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -28,9 +28,9 @@ import ( "encoding/json" "errors" "fmt" - "maps" "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" "github.com/gardener/machine-controller-manager/pkg/util/annotations" + "maps" "math" "runtime" "strconv" @@ -982,7 +982,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.Warning(err) } } else { - // if machine was preserved (which means it is in Failed phase), uncordon node so that pods can be scheduled on it again + // if machine was preserved and in Failed phase, uncordon node so that pods can be scheduled on it again if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil && machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { nodeCopy := node.DeepCopy() nodeCopy.Spec.Unschedulable = false @@ -1110,11 +1110,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph machineDeployName := getMachineDeploymentName(machine) // creating lock for machineDeployment, if not allocated c.permitGiver.RegisterPermits(machineDeployName, 1) - retry, err := c.tryMarkingMachineFailed(ctx, machine, clone, machineDeployName, description, lockAcquireTimeout) - if err != nil { - return retry, err - } - return retry, err + return c.tryMarkingMachineFailed(ctx, machine, clone, machineDeployName, description, lockAcquireTimeout) } if isMachineInPlaceUpdating { @@ -1169,20 +1165,19 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } if cloneDirty { - _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + _, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { // Keep retrying across reconciles until update goes through klog.Errorf("Update of Phase/Conditions failed for machine %q. Retrying, error: %q", machine.Name, err) if apierrors.IsConflict(err) { return machineutils.ConflictRetry, err } - return machineutils.ShortRetry, err } else { klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", machine.Name, getProviderID(machine), getNodeName(machine)) + // Return error to end the reconcile err = errSuccessfulPhaseUpdate } return machineutils.ShortRetry, err - // Return error to end the reconcile } return machineutils.LongRetry, nil } From 6fc631772d7563f8c267e23a10108ab5d242c365 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 11 Dec 2025 13:03:54 +0530 Subject: [PATCH 26/79] Change return types of preservation util functions such that only caller returns retry period --- pkg/controller/machineset.go | 2 +- .../provider/machinecontroller/machine.go | 32 +++++++- .../machinecontroller/machine_util.go | 75 ++++++++----------- 3 files changed, 59 insertions(+), 50 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 45b9b5b73..b116e050e 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -631,7 +631,7 @@ func (c *controller) reconcileClusterMachineSet(key string) error { // Multiple things could lead to this update failing. Requeuing the machine set ensures // Returning an error causes a requeue without forcing a hotloop if !apierrors.IsNotFound(err) { - klog.Errorf("Update machineSet %s failed with: %s", machineSet.Name, err) + klog.Errorf("update machineSet %s failed with: %s", machineSet.Name, err) } return err } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index d09b8f7e5..a9771c65c 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -817,7 +817,13 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // if preserve annotation value has switched from now to when-failed, then stop preservation if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { if machineutils.IsPreserveExpiryTimeSet(updatedMachine) { - return c.stopMachinePreservation(ctx, updatedMachine) + err = c.stopMachinePreservation(ctx, updatedMachine) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, nil + } + return machineutils.ShortRetry, err + } } return machineutils.LongRetry, nil } @@ -826,13 +832,31 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return machineutils.ShortRetry, err } if !isComplete { - return c.preserveMachine(ctx, machine, preserveValue) + err = c.preserveMachine(ctx, machine, preserveValue) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, nil + } + return machineutils.ShortRetry, err + } } if hasMachinePreservationTimedOut(machine) { - return c.stopMachinePreservation(ctx, updatedMachine) + err = c.stopMachinePreservation(ctx, updatedMachine) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, nil + } + return machineutils.ShortRetry, err + } } case machineutils.PreserveMachineAnnotationValueFalse: - return c.stopMachinePreservation(ctx, updatedMachine) + err = c.stopMachinePreservation(ctx, updatedMachine) + if err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, nil + } + return machineutils.ShortRetry, err + } case "": return machineutils.LongRetry, nil default: diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index f99635a67..0271c1b2a 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2368,57 +2368,54 @@ Utility Functions for Machine Preservation */ // preserveMachine contains logic to start the preservation of a machine and node. -func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (machineutils.RetryPeriod, error) { +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { // Step 1: Set PreserveExpiryTime if !machineutils.IsPreserveExpiryTimeSet(machine) { - _, retry, err := c.setPreserveExpiryTime(ctx, machine) + _, err := c.setPreserveExpiryTime(ctx, machine) if err != nil { - return retry, err + return err } } if machine.Labels[v1alpha1.NodeLabelKey] == "" { - return machineutils.LongRetry, nil + return nil } nodeName := machine.Labels[v1alpha1.NodeLabelKey] node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return machineutils.ShortRetry, err + return err } // Step 2: Add annotations to prevent scale down of node by CA - _, retry, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) + _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { - return retry, err + return err } existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) if err != nil { klog.Errorf("error trying to get existing node preserved condition for node %q of machine %q: %v", nodeName, machine.Name, err) - return machineutils.ShortRetry, err + return err } drainSuccessful := false // Step 3: If machine is in Failed Phase, drain the backing node if c.shouldNodeBeDrained(machine, existingNodePreservedCondition) { err = c.drainPreservedNode(ctx, machine) if err != nil { - retry, err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) - if err != nil { - return retry, err - } - return machineutils.ShortRetry, err + _ = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) + return err } drainSuccessful = true } // Step 4: Update NodePreserved Condition on Node - retry, err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) + err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) if err != nil { - return retry, err + return err } klog.V(3).Infof("Machine %s preserved successfully.", machine.Name) - return machineutils.LongRetry, nil + return nil } // setPreserveExpiryTime sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout -func (c *controller) setPreserveExpiryTime(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, machineutils.RetryPeriod, error) { +func (c *controller) setPreserveExpiryTime(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { preservedCurrentStatus := v1alpha1.CurrentStatus{ Phase: machine.Status.CurrentStatus.Phase, TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, @@ -2430,17 +2427,14 @@ func (c *controller) setPreserveExpiryTime(ctx context.Context, machine *v1alpha _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - if apierrors.IsConflict(err) { - return nil, machineutils.ConflictRetry, err - } - return nil, machineutils.ShortRetry, err + return nil, err } klog.V(4).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) - return clone, machineutils.LongRetry, nil + return clone, nil } // addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node -func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, machineutils.RetryPeriod, error) { +func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { nodeCopy := node.DeepCopy() if nodeCopy.Annotations == nil || nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { CAScaleDownAnnotation := map[string]string{ @@ -2450,18 +2444,15 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error trying to update CA annotation on node %q: %v", updatedNode.Name, err) - if apierrors.IsConflict(err) { - return nil, machineutils.ConflictRetry, err - } - return nil, machineutils.ShortRetry, err + return nil, err } - return updatedNode, machineutils.LongRetry, nil + return updatedNode, nil } - return node, machineutils.LongRetry, nil + return node, nil } // getNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation -func (c *controller) updateNodePreservedCondition(ctx context.Context, machine *v1alpha1.Machine, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (machineutils.RetryPeriod, error) { +func (c *controller) updateNodePreservedCondition(ctx context.Context, machine *v1alpha1.Machine, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) error { var newNodePreservedCondition *v1.NodeCondition var changed bool if existingNodeCondition == nil { @@ -2498,13 +2489,10 @@ func (c *controller) updateNodePreservedCondition(ctx context.Context, machine * } if err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *newNodePreservedCondition); err != nil { klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", getNodeName(machine), machine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err + return err } } - return machineutils.LongRetry, nil + return nil } // shouldNodeBeDrained returns true if the machine's backing node must be drained, else false @@ -2523,10 +2511,10 @@ func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCond } // stopMachinePreservation stops the preservation of the machine and node -func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { +func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) error { // check if preserveExpiryTime is set, if not, no need to do anything if !machineutils.IsPreserveExpiryTimeSet(machine) { - return machineutils.LongRetry, nil + return nil } if machine.Labels[v1alpha1.NodeLabelKey] != "" { nodeName := machine.Labels[v1alpha1.NodeLabelKey] @@ -2538,7 +2526,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp // Step 1: if backing node exists, change node condition to reflect that preservation has stopped err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), preservedCondition) if err != nil { - return machineutils.ShortRetry, err + return err } // Step 2: remove CA's scale-down disabled annotations to allow CA to scale down node if needed CAAnnotations := make(map[string]string) @@ -2546,17 +2534,14 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp latestNode, err := c.targetCoreClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { klog.Errorf("error trying to get backing node %q for machine %s. Retrying, error: %v", nodeName, machine.Name, err) - return machineutils.ShortRetry, err + return err } latestNodeCopy := latestNode.DeepCopy() latestNodeCopy, _, _ = annotations.RemoveAnnotation(latestNodeCopy, CAAnnotations) // error can be ignored, always returns nil _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, latestNodeCopy, metav1.UpdateOptions{}) if err != nil { klog.Errorf("Node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, err - } - return machineutils.ShortRetry, err + return err } } // Step 3: update machine status to set preserve expiry time to metav1.Time{} @@ -2569,10 +2554,10 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) - return machineutils.ConflictRetry, err + return err } klog.V(3).Infof("Machine status updated to stop preservation for machine %q", machine.Name) - return machineutils.LongRetry, nil + return nil } // drainPreservedNode attempts to drain the node backing a preserved machine From 2c74ef8d557364a9295d6f558533fe0dd58669ca Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 12 Dec 2025 10:12:30 +0530 Subject: [PATCH 27/79] Address review comments --- pkg/controller/deployment_machineset_util.go | 3 +- .../provider/machinecontroller/machine.go | 217 +++++++++--------- .../machinecontroller/machine_util.go | 68 +++--- 3 files changed, 156 insertions(+), 132 deletions(-) diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index 82475f405..f09c640a0 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -127,7 +127,8 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } failedMachines = append(failedMachines, machineSummary) } - if cond := getMachineCondition(machine, v1alpha1.NodePreserved); cond != nil { + cond := getMachineCondition(machine, v1alpha1.NodePreserved) + if cond != nil { if cond.Reason == v1alpha1.NodePreservedByMCM { autoPreserveFailedMachineCount++ } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index a9771c65c..bec8e9b4f 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -803,131 +803,142 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // - failed machine, already preserved, check for timeout // // manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects -func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) { - // get effective preservation value based on node's and machine's annotations. - updatedMachine, preserveValue, exists, err := c.syncEffectivePreserveAnnotationValue(ctx, machine) - if err != nil { - return machineutils.ShortRetry, err - } - if !exists { - return machineutils.LongRetry, nil - } - switch preserveValue { - case machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValuePreservedByMCM: - // if preserve annotation value has switched from now to when-failed, then stop preservation - if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && !machineutils.IsMachineFailed(updatedMachine) { - if machineutils.IsPreserveExpiryTimeSet(updatedMachine) { - err = c.stopMachinePreservation(ctx, updatedMachine) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, nil - } - return machineutils.ShortRetry, err - } - } - return machineutils.LongRetry, nil - } - isComplete, err := c.isMachinePreservationComplete(machine) +func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { + defer func() { if err != nil { - return machineutils.ShortRetry, err - } - if !isComplete { - err = c.preserveMachine(ctx, machine, preserveValue) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, nil - } - return machineutils.ShortRetry, err - } - } - if hasMachinePreservationTimedOut(machine) { - err = c.stopMachinePreservation(ctx, updatedMachine) - if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, nil - } - return machineutils.ShortRetry, err + if apierrors.IsConflict(err) { + retry = machineutils.ConflictRetry } + retry = machineutils.ShortRetry } - case machineutils.PreserveMachineAnnotationValueFalse: - err = c.stopMachinePreservation(ctx, updatedMachine) + retry = machineutils.LongRetry + }() + + preserveValue, exists, err := c.computeEffectivePreserveAnnotationValue(machine) + + if err != nil { + return + } + if !exists { + return + } + // if preserve value differs from machine's preserve value, overwrite the value in the machine + clone := machine.DeepCopy() + if machine.Annotations == nil || machine.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { + clone, err = c.writePreserveAnnotationValueOnMachine(ctx, clone, preserveValue) if err != nil { - if apierrors.IsConflict(err) { - return machineutils.ConflictRetry, nil - } - return machineutils.ShortRetry, err + return } - case "": - return machineutils.LongRetry, nil - default: + } + if !c.isPreserveAnnotationValueValid(preserveValue) { klog.Warningf("Preserve annotation value %s on machine %s is invalid", preserveValue, machine.Name) - return machineutils.LongRetry, nil + return + } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || hasMachinePreservationTimedOut(clone) { + err = c.stopMachinePreservation(ctx, clone) + return + } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { + // if machine is preserved, stop preservation. Else, do nothing. + // this check is done in case the annotation value has changed from preserve=now to preserve=when-failed, in which case preservation needs to be stopped + preserveExpirySet := machineutils.IsPreserveExpiryTimeSet(clone) + machineFailed := machineutils.IsMachineFailed(clone) + if !preserveExpirySet && !machineFailed { + return + } else if !preserveExpirySet { + err = c.preserveMachine(ctx, clone, preserveValue) + return + } + // Here, we do not stop preservation even when preserve expiry time is set but the machine is in Running. + // This is to accommodate the case where the annotation is when-failed and the machine has recovered from Failed to Running. + // In this case, we want the preservation to continue so that CA does not scale down the node before pods are assigned to it + return + } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + err = c.preserveMachine(ctx, clone, preserveValue) + return } - return machineutils.LongRetry, nil + return } -// syncEffectivePreserveAnnotationValue finds the preservation annotation value by considering both node and machine objects -// if the backing node is annotated with preserve annotation, the preserve value will be synced to the machine -// if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured -// if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured -func (c *controller) syncEffectivePreserveAnnotationValue(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, string, bool, error) { - mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] +func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) (nAnnotationValue string, nExists bool, err error) { nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName == "" { - return machine, mAnnotationValue, mExists, nil + return } node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q: %v", nodeName, err) - return machine, "", false, err + return } - nAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] - switch { - case nExists && mExists: - if nAnnotationValue == mAnnotationValue { - return machine, nAnnotationValue, nExists, nil - } // else falls through to update machine with node's value - case nExists && !mExists: - // falls through to update machine with node's value - case mExists && !nExists: - return machine, mAnnotationValue, mExists, nil - case !nExists && !mExists: - return machine, "", false, nil + if node.Annotations != nil { + nAnnotationValue, nExists = node.Annotations[machineutils.PreserveMachineAnnotationKey] } - clone := machine.DeepCopy() - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) + return +} + +// computeEffectivePreserveAnnotationValue returns the effective preservation value based on node's and machine's annotations. +// if the backing node is annotated with preserve annotation, the node's preserve value will be honoured +// if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured +// if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured +func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.Machine) (preserveValue string, exists bool, err error) { + mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + nAnnotationValue, nExists, err := c.getNodePreserveAnnotationValue(machine) + if err != nil { + return + } + exists = mExists || nExists + if !exists { + return } - clone.Annotations[machineutils.PreserveMachineAnnotationKey] = nAnnotationValue - updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if nExists { + preserveValue = nAnnotationValue + } else { + preserveValue = mAnnotationValue + } + return +} + +// writePreserveAnnotationValueOnMachine syncs the effective preserve value on the machine objects +func (c *controller) writePreserveAnnotationValueOnMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (*v1alpha1.Machine, error) { + if machine.Annotations == nil { + machine.Annotations = make(map[string]string) + } + machine.Annotations[machineutils.PreserveMachineAnnotationKey] = preserveValue + updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, machine, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("error updating machine %q with preserve annotation %q: %v", machine.Name, nAnnotationValue, err) - return machine, "", true, err + klog.Errorf("error updating machine %q with preserve annotation %q: %v", machine.Name, preserveValue, err) + return machine, err } - return updatedMachine, nAnnotationValue, nExists, nil + return updatedMachine, nil } -func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine) (bool, error) { - // if PreserveExpiryTime is set and machine has not failed, then yes, - // if PreserveExpiryTime is set and machine has failed, the node condition must be there saying drain successful - // if PreserveExpiryTime is not set, then no - if !machineutils.IsPreserveExpiryTimeSet(machine) { - return false, nil - } else if machineutils.IsMachineFailed(machine) { - if machine.Labels[v1alpha1.NodeLabelKey] == "" { - return true, nil - } - node, err := c.nodeLister.Get(getNodeName(machine)) - if err != nil { - klog.Errorf("error trying to get node %q: %v", getNodeName(machine), err) - return false, err - } - if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil { - if cond.Message == v1alpha1.PreservedNodeDrainSuccessful { - return true, nil - } - } +// isPreserveAnnotationValueValid checks if the preserve annotation value is valid +func (c *controller) isPreserveAnnotationValueValid(preserveValue string) bool { + allowedValues := map[string]bool{ + machineutils.PreserveMachineAnnotationValueNow: true, + machineutils.PreserveMachineAnnotationValueWhenFailed: true, + machineutils.PreserveMachineAnnotationValuePreservedByMCM: true, + machineutils.PreserveMachineAnnotationValueFalse: true, + } + _, exists := allowedValues[preserveValue] + return exists +} + +// isMachinePreservationComplete check if all the steps in the preservation logic have been completed for the machine +func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine, isExpirySet bool) (bool, error) { + nodeName := getNodeName(machine) + if nodeName == "" && isExpirySet { + return true, nil + } + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q: %v", nodeName, err) + return false, err + } + cond := nodeops.GetCondition(node, v1alpha1.NodePreserved) + if cond == nil { return false, nil } - return true, nil + if cond.Status == corev1.ConditionTrue { + return true, nil + } + return false, nil } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 0271c1b2a..c7b0587ce 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2369,8 +2369,17 @@ Utility Functions for Machine Preservation // preserveMachine contains logic to start the preservation of a machine and node. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { + isExpirySet := machineutils.IsPreserveExpiryTimeSet(machine) + // check if preservation is complete + isComplete, err := c.isMachinePreservationComplete(machine, isExpirySet) + if err != nil { + return err + } + if isComplete { + return nil + } // Step 1: Set PreserveExpiryTime - if !machineutils.IsPreserveExpiryTimeSet(machine) { + if !isExpirySet { _, err := c.setPreserveExpiryTime(ctx, machine) if err != nil { return err @@ -2400,15 +2409,23 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if c.shouldNodeBeDrained(machine, existingNodePreservedCondition) { err = c.drainPreservedNode(ctx, machine) if err != nil { - _ = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) + newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) + if needsUpdate { + _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) + return err + } return err } drainSuccessful = true } // Step 4: Update NodePreserved Condition on Node - err = c.updateNodePreservedCondition(ctx, machine, preserveValue, drainSuccessful, existingNodePreservedCondition) - if err != nil { - return err + newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) + if needsUpdate { + err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) + if err != nil { + klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", getNodeName(machine), machine.Name, err) + return err + } } klog.V(3).Infof("Machine %s preserved successfully.", machine.Name) return nil @@ -2452,47 +2469,41 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, } // getNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation -func (c *controller) updateNodePreservedCondition(ctx context.Context, machine *v1alpha1.Machine, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) error { +func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { var newNodePreservedCondition *v1.NodeCondition - var changed bool + var needsUpdate bool if existingNodeCondition == nil { newNodePreservedCondition = &v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } - changed = true + if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM + } else { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser + } + needsUpdate = true } else { newNodePreservedCondition = existingNodeCondition.DeepCopy() } - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { + if machinePhase == v1alpha1.MachineFailed { if drainSuccessful { if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful newNodePreservedCondition.Status = v1.ConditionTrue - changed = true + needsUpdate = true } } else if newNodePreservedCondition.Status != v1.ConditionFalse { newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful newNodePreservedCondition.Status = v1.ConditionFalse - changed = true + needsUpdate = true } } else if newNodePreservedCondition.Status != v1.ConditionTrue { newNodePreservedCondition.Status = v1.ConditionTrue - changed = true + needsUpdate = true } - if changed { - if preserveValue != machineutils.PreserveMachineAnnotationValuePreservedByMCM { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser - } else { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM - } - if err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *newNodePreservedCondition); err != nil { - klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", getNodeName(machine), machine.Name, err) - return err - } - } - return nil + return newNodePreservedCondition, needsUpdate } // shouldNodeBeDrained returns true if the machine's backing node must be drained, else false @@ -2516,15 +2527,15 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp if !machineutils.IsPreserveExpiryTimeSet(machine) { return nil } - if machine.Labels[v1alpha1.NodeLabelKey] != "" { - nodeName := machine.Labels[v1alpha1.NodeLabelKey] + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName != "" { preservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } // Step 1: if backing node exists, change node condition to reflect that preservation has stopped - err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), preservedCondition) + err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, preservedCondition) if err != nil { return err } @@ -2589,7 +2600,8 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M } // verify and log node object's existence - if _, err := c.nodeLister.Get(nodeName); err == nil { + _, err = c.nodeLister.Get(nodeName) + if err == nil { klog.V(3).Infof("(drainNode) For node %q, machine %q", nodeName, machine.Name) } else if apierrors.IsNotFound(err) { klog.Warningf("(drainNode) Node %q for machine %q doesn't exist, so drain will finish instantly", nodeName, machine.Name) From 1c73121859cbf69d7513c2fe98222ae49f6877bd Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 18 Dec 2025 10:07:22 +0530 Subject: [PATCH 28/79] Remove incorrect json tag and regenerate CRDs. --- .../crds/machine.sapcloud.io_machinedeployments.yaml | 8 ++++---- kubernetes/crds/machine.sapcloud.io_machines.yaml | 8 ++++---- kubernetes/crds/machine.sapcloud.io_machinesets.yaml | 8 ++++---- pkg/apis/machine/types.go | 3 +-- pkg/apis/machine/v1alpha1/shared_types.go | 2 +- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index 555d898c4..abb36d1c4 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -302,6 +302,10 @@ spec: description: MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed. type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped + type: string maxEvictRetries: description: MaxEvictRetries is the number of retries that will be attempted while draining the node. @@ -425,10 +429,6 @@ spec: type: boolean type: object type: object - preserveTimeout: - description: MachinePreserveTimeout is the timeout after which - the machine preservation is stopped - type: string providerID: description: ProviderID represents the provider's unique ID given to a machine diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml index 6e75f1441..fcea16750 100644 --- a/kubernetes/crds/machine.sapcloud.io_machines.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machines.yaml @@ -95,6 +95,10 @@ spec: description: MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed. type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which the + machine preservation is stopped + type: string maxEvictRetries: description: MaxEvictRetries is the number of retries that will be attempted while draining the node. @@ -216,10 +220,6 @@ spec: type: boolean type: object type: object - preserveTimeout: - description: MachinePreserveTimeout is the timeout after which the - machine preservation is stopped - type: string providerID: description: ProviderID represents the provider's unique ID given to a machine diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index 970be4b20..46445131f 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -181,6 +181,10 @@ spec: description: MachineInPlaceUpdateTimeout is the timeout after which in-place update is declared failed. type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped + type: string maxEvictRetries: description: MaxEvictRetries is the number of retries that will be attempted while draining the node. @@ -304,10 +308,6 @@ spec: type: boolean type: object type: object - preserveTimeout: - description: MachinePreserveTimeout is the timeout after which - the machine preservation is stopped - type: string providerID: description: ProviderID represents the provider's unique ID given to a machine diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 10b3b507e..c2e535bf7 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -98,8 +98,7 @@ type MachineConfiguration struct { MachineInPlaceUpdateTimeout *metav1.Duration // MachinePreserveTimeout is the timeout after which the machine preservation is stopped - // +optional - MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` + MachinePreserveTimeout *metav1.Duration // DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. // This is intended to be used only for in-place updates. DisableHealthTimeout *bool diff --git a/pkg/apis/machine/v1alpha1/shared_types.go b/pkg/apis/machine/v1alpha1/shared_types.go index 1a673b79f..d9a7d0c7b 100644 --- a/pkg/apis/machine/v1alpha1/shared_types.go +++ b/pkg/apis/machine/v1alpha1/shared_types.go @@ -46,7 +46,7 @@ type MachineConfiguration struct { // MachinePreserveTimeout is the timeout after which the machine preservation is stopped // +optional - MachinePreserveTimeout *metav1.Duration `json:"preserveTimeout,omitempty"` + MachinePreserveTimeout *metav1.Duration `json:"machinePreserveTimeout,omitempty"` // DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. // This is intended to be used only for in-place updates. From 6dc35fe106dd244b176136dc3aa7134cfb95325c Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 19 Dec 2025 11:07:09 +0530 Subject: [PATCH 29/79] Apply suggestions from code review - part 1 Co-authored-by: Prashant Tak --- pkg/apis/machine/v1alpha1/machine_types.go | 6 +++--- pkg/controller/deployment_machineset_util.go | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 8fe3d75fa..dc69e2e49 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -251,13 +251,13 @@ const ( // NodePreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM NodePreservedByMCM string = "PreservedByMCM" - //NodePreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user + // NodePreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user NodePreservedByUser string = "PreservedByUser" - //PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful + // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful PreservedNodeDrainSuccessful string = "Preserved Node drained successfully" - //PreservedNodeDrainUnsuccessful is a constant for the message in condition that indicates that the preserved node's drain was not successful + // PreservedNodeDrainUnsuccessful is a constant for the message in condition that indicates that the preserved node's drain was not successful PreservedNodeDrainUnsuccessful string = "Preserved Node could not be drained" ) diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index f09c640a0..016422590 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -128,8 +128,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al failedMachines = append(failedMachines, machineSummary) } cond := getMachineCondition(machine, v1alpha1.NodePreserved) - if cond != nil { - if cond.Reason == v1alpha1.NodePreservedByMCM { + if cond != nil && cond.Reason == v1alpha1.NodePreservedByMCM { autoPreserveFailedMachineCount++ } } From a38223ced6ee923d754f682da01e2ea02db849b0 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 19 Dec 2025 14:00:28 +0530 Subject: [PATCH 30/79] Delete invalid gitlink --- machine-controller-manager | 1 - 1 file changed, 1 deletion(-) delete mode 160000 machine-controller-manager diff --git a/machine-controller-manager b/machine-controller-manager deleted file mode 160000 index f2cbb0378..000000000 --- a/machine-controller-manager +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f2cbb037802eb399e7b655388ef6e182c90cb70f From 165af13d7ad7c266427467682fdced39e1213942 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 22 Dec 2025 16:23:19 +0530 Subject: [PATCH 31/79] Address review comments- part 2: * fix edge case of handling switch from preserve=now to when-failed * Create map in package with valid preserve annotation values * Fix big where node condition's reason wouldn't get updated after toggling of preservation --- pkg/apis/machine/v1alpha1/machine_types.go | 3 + pkg/controller/deployment_machineset_util.go | 3 +- .../provider/machinecontroller/machine.go | 97 +++++------ .../machinecontroller/machine_util.go | 150 ++++++++++-------- pkg/util/provider/machineutils/utils.go | 10 ++ 5 files changed, 142 insertions(+), 121 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index dc69e2e49..654256d32 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -254,6 +254,9 @@ const ( // NodePreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user NodePreservedByUser string = "PreservedByUser" + // NodePreservationStopped is a node condition reason to indicate that a machine/node preservation has been stopped due to annotation update or timeout + NodePreservationStopped string = "PreservationStopped" + // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful PreservedNodeDrainSuccessful string = "Preserved Node drained successfully" diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index 016422590..e44505393 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -129,8 +129,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } cond := getMachineCondition(machine, v1alpha1.NodePreserved) if cond != nil && cond.Reason == v1alpha1.NodePreservedByMCM { - autoPreserveFailedMachineCount++ - } + autoPreserveFailedMachineCount++ } } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index bec8e9b4f..bb5ebac6f 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "github.com/gardener/machine-controller-manager/pkg/util/nodeops" + clientretry "k8s.io/client-go/util/retry" "maps" "slices" "strings" @@ -67,7 +68,7 @@ func (c *controller) updateMachine(oldObj, newObj any) { c.enqueueMachine(newObj, "TEST: handling machine failure simulation UPDATE event") } } - if preserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { + if c.handlePreserveAnnotationsChange(oldMachine.Annotations, newMachine.Annotations, newMachine) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } @@ -222,8 +223,8 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp klog.Errorf("cannot reconcile machine %s: %s", machine.Name, err) return retry, err } - { //TODO@thiyyakat: remove after drain - //insert condition changing code here + + { //TODO@thiyyakat: remove after testing if machine.Labels["test-failed"] == "true" { node, err := c.nodeLister.Get(getNodeName(machine)) if err != nil { @@ -358,10 +359,30 @@ func (c *controller) reconcileClusterMachineTermination(key string) error { } return nil } -func preserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { + +// handlePreserveAnnotationsChange returns true if there is a change in preserve annotations +// it also handles the special case where the annotation is changed from 'now' to 'when-failed' +// in which case it stops the preservation if expiry time is already set +func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotations map[string]string, machine *v1alpha1.Machine) bool { valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] - return existsInOld != existsInNew || valueOld != valueNew + if valueNew != machineutils.PreserveMachineAnnotationValueWhenFailed || valueOld != machineutils.PreserveMachineAnnotationValueNow { + return existsInOld != existsInNew || valueOld != valueNew + } + // Special case: annotation changed from 'now' to 'when-failed' + isPreserved := machineutils.IsPreserveExpiryTimeSet(machine) + if !isPreserved { + return true + } + ctx := context.Background() + err := clientretry.RetryOnConflict(nodeops.Backoff, func() error { + klog.V(3).Infof("Stopping preservation for machine %q as preserve annotation changed from 'now' to 'when-failed'.", machine.Name) + return c.stopMachinePreservation(ctx, machine) + }) + if err != nil { + klog.Errorf("error while stopping preservation for machine %q: %v. Use preserve=false to stop preservation.", machine.Name, err) + } + return true } /* @@ -790,18 +811,6 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { return isMachineInCreationFlow } -// TODO@thiyyakat: check case where, preserved and annotated but times out. Not handled currently -// possible cases: -// 1. Annotated -// - already preserved, check for timeout -// - already preserved, check for explicit stop preservation -// - needs to be preserved on failure -// - needs to be preserved now -// -// 2. Unannotated -// - failed machine, autoPreserveMax not breached, must be preserved -// - failed machine, already preserved, check for timeout -// // manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { defer func() { @@ -815,7 +824,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a }() preserveValue, exists, err := c.computeEffectivePreserveAnnotationValue(machine) - if err != nil { return } @@ -830,26 +838,23 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } } - if !c.isPreserveAnnotationValueValid(preserveValue) { - klog.Warningf("Preserve annotation value %s on machine %s is invalid", preserveValue, machine.Name) + if !isPreserveAnnotationValueValid(preserveValue) { + klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || hasMachinePreservationTimedOut(clone) { err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { - // if machine is preserved, stop preservation. Else, do nothing. - // this check is done in case the annotation value has changed from preserve=now to preserve=when-failed, in which case preservation needs to be stopped - preserveExpirySet := machineutils.IsPreserveExpiryTimeSet(clone) machineFailed := machineutils.IsMachineFailed(clone) - if !preserveExpirySet && !machineFailed { - return - } else if !preserveExpirySet { + if machineFailed { err = c.preserveMachine(ctx, clone, preserveValue) - return } - // Here, we do not stop preservation even when preserve expiry time is set but the machine is in Running. - // This is to accommodate the case where the annotation is when-failed and the machine has recovered from Failed to Running. - // In this case, we want the preservation to continue so that CA does not scale down the node before pods are assigned to it + // Here, if the preserve value is when-failed, but the machine is in running, there could be 2 possibilities: + // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. In this case, + // we want to stop preservation. This case is already being handled in updateMachine and updateNodeToMachine functions. + // 2. The machine was initially annotated with preserve=when-failed and has recovered from Failed to Running. In this case, + // we want to continue preservation until the annotation is changed to false or the preservation times out, so that CA does not + // scale down the node before pods are assigned to it. return } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { err = c.preserveMachine(ctx, clone, preserveValue) @@ -911,34 +916,20 @@ func (c *controller) writePreserveAnnotationValueOnMachine(ctx context.Context, } // isPreserveAnnotationValueValid checks if the preserve annotation value is valid -func (c *controller) isPreserveAnnotationValueValid(preserveValue string) bool { - allowedValues := map[string]bool{ - machineutils.PreserveMachineAnnotationValueNow: true, - machineutils.PreserveMachineAnnotationValueWhenFailed: true, - machineutils.PreserveMachineAnnotationValuePreservedByMCM: true, - machineutils.PreserveMachineAnnotationValueFalse: true, - } - _, exists := allowedValues[preserveValue] +func isPreserveAnnotationValueValid(preserveValue string) bool { + _, exists := machineutils.AllowedPreserveAnnotationValues[preserveValue] return exists } -// isMachinePreservationComplete check if all the steps in the preservation logic have been completed for the machine -func (c *controller) isMachinePreservationComplete(machine *v1alpha1.Machine, isExpirySet bool) (bool, error) { - nodeName := getNodeName(machine) - if nodeName == "" && isExpirySet { - return true, nil - } - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) - return false, err - } - cond := nodeops.GetCondition(node, v1alpha1.NodePreserved) +// isPreservedNodeConditionStatusTrue check if all the steps in the preservation logic have been completed for the machine +// if the machine has no backing node, only PreserveExpiryTime needs to be set +// if the machine has a backing node, the NodePreserved condition on the node needs to be true +func (c *controller) isPreservedNodeConditionStatusTrue(cond *corev1.NodeCondition) bool { if cond == nil { - return false, nil + return false } if cond.Status == corev1.ConditionTrue { - return true, nil + return true } - return false, nil + return false } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index c7b0587ce..9e4008fcd 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2369,48 +2369,57 @@ Utility Functions for Machine Preservation // preserveMachine contains logic to start the preservation of a machine and node. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { + nodeName := machine.Labels[v1alpha1.NodeLabelKey] isExpirySet := machineutils.IsPreserveExpiryTimeSet(machine) - // check if preservation is complete - isComplete, err := c.isMachinePreservationComplete(machine, isExpirySet) - if err != nil { - return err - } - if isComplete { - return nil - } - // Step 1: Set PreserveExpiryTime - if !isExpirySet { - _, err := c.setPreserveExpiryTime(ctx, machine) + + // If machine has no backing node + if nodeName == "" { + if isExpirySet { + return nil + } + // Step 1: Add preserveExpiryTime to machine status + updatedMachine, err := c.setPreserveExpiryTimeOnMachine(ctx, machine) if err != nil { return err } - } - if machine.Labels[v1alpha1.NodeLabelKey] == "" { + klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil + } - nodeName := machine.Labels[v1alpha1.NodeLabelKey] + // Machine has a backing node node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) return err } + existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) + // check if preservation is already complete + if isExpirySet && c.isPreservedNodeConditionStatusTrue(existingNodePreservedCondition) { + return nil + } + // Preservation incomplete - either the flow is just starting or in progress + updatedMachine := machine + if !isExpirySet { + // Step 1: Add preserveExpiryTime to machine status + updatedMachine, err = c.setPreserveExpiryTimeOnMachine(ctx, machine) + if err != nil { + return err + } + } // Step 2: Add annotations to prevent scale down of node by CA _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { return err } - existingNodePreservedCondition, err := nodeops.GetNodeCondition(ctx, c.targetCoreClient, getNodeName(machine), v1alpha1.NodePreserved) - if err != nil { - klog.Errorf("error trying to get existing node preserved condition for node %q of machine %q: %v", nodeName, machine.Name, err) - return err - } + drainSuccessful := false - // Step 3: If machine is in Failed Phase, drain the backing node if c.shouldNodeBeDrained(machine, existingNodePreservedCondition) { + // Step 3: If machine is in Failed Phase, drain the backing node err = c.drainPreservedNode(ctx, machine) if err != nil { newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { + // Step 4a: Update NodePreserved Condition on Node, with drain unsuccessful status _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) return err } @@ -2418,54 +2427,59 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } drainSuccessful = true } - // Step 4: Update NodePreserved Condition on Node + // Step 4b: Update NodePreserved Condition on Node with drain successful status newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) if err != nil { - klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", getNodeName(machine), machine.Name, err) + klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", nodeName, machine.Name, err) return err } } - klog.V(3).Infof("Machine %s preserved successfully.", machine.Name) + klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil } -// setPreserveExpiryTime sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout -func (c *controller) setPreserveExpiryTime(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { +// setPreserveExpiryTimeOnMachine sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout +func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { + clone := machine.DeepCopy() preservedCurrentStatus := v1alpha1.CurrentStatus{ Phase: machine.Status.CurrentStatus.Phase, TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, LastUpdateTime: metav1.Now(), PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), } - clone := machine.DeepCopy() + clone.Status.CurrentStatus = preservedCurrentStatus - _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + updatedMachine, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return nil, err } klog.V(4).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) - return clone, nil + return updatedMachine, nil } // addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { + + // Check if annotation already exists with correct value + if node.Annotations != nil && + node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { + return node, nil + } + + CAScaleDownAnnotation := map[string]string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + } nodeCopy := node.DeepCopy() - if nodeCopy.Annotations == nil || nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { - CAScaleDownAnnotation := map[string]string{ - autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, - } - updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) - _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error trying to update CA annotation on node %q: %v", updatedNode.Name, err) - return nil, err - } - return updatedNode, nil + updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) + updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error trying to update CA annotation on node %q: %v", node.Name, err) + return nil, err } - return node, nil + return updatedNode, nil } // getNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation @@ -2478,11 +2492,6 @@ func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.Mach Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), } - if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM - } else { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser - } needsUpdate = true } else { newNodePreservedCondition = existingNodeCondition.DeepCopy() @@ -2503,6 +2512,11 @@ func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.Mach newNodePreservedCondition.Status = v1.ConditionTrue needsUpdate = true } + if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM + } else { + newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser + } return newNodePreservedCondition, needsUpdate } @@ -2512,62 +2526,66 @@ func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCond if existingCondition == nil { return true } - if existingCondition.Message == v1alpha1.PreservedNodeDrainSuccessful { - return false - } else { - return true - } + return existingCondition.Message != v1alpha1.PreservedNodeDrainSuccessful } return false } // stopMachinePreservation stops the preservation of the machine and node func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) error { - // check if preserveExpiryTime is set, if not, no need to do anything + // removal of preserveExpiryTime is the last step of stopping preservation + // if preserveExpiryTime is not set, preservation is already stopped if !machineutils.IsPreserveExpiryTimeSet(machine) { return nil } nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName != "" { + // Machine has a backing node preservedCondition := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), + Reason: v1alpha1.NodePreservationStopped, } + // Step 1: if backing node exists, change node condition to reflect that preservation has stopped err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, preservedCondition) if err != nil { return err } // Step 2: remove CA's scale-down disabled annotations to allow CA to scale down node if needed - CAAnnotations := make(map[string]string) - CAAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" + // fetch latest node object since cache may be not be up-to-date with node updated earlier latestNode, err := c.targetCoreClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { - klog.Errorf("error trying to get backing node %q for machine %s. Retrying, error: %v", nodeName, machine.Name, err) + klog.Errorf("error trying to get backing node %q for machine %q. Retrying, error: %s", nodeName, machine.Name, err) return err } - latestNodeCopy := latestNode.DeepCopy() - latestNodeCopy, _, _ = annotations.RemoveAnnotation(latestNodeCopy, CAAnnotations) // error can be ignored, always returns nil - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, latestNodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("Node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) - return err + if latestNode.Annotations != nil && latestNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != "" { + annotationsToRemove := make(map[string]string) + annotationsToRemove[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" + nodeCopy := latestNode.DeepCopy() + nodeCopy, _, err = annotations.RemoveAnnotation(nodeCopy, annotationsToRemove) + if err != nil { + klog.Errorf("error trying to remove CA annotation from node %q of machine %q : %s", nodeName, machine.Name, err) + return err + } + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("Node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) + return err + } } } // Step 3: update machine status to set preserve expiry time to metav1.Time{} clone := machine.DeepCopy() - clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: clone.Status.CurrentStatus.Phase, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, - } + clone.Status.CurrentStatus.PreserveExpiryTime = metav1.Time{} + clone.Status.CurrentStatus.LastUpdateTime = metav1.Now() _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return err } - klog.V(3).Infof("Machine status updated to stop preservation for machine %q", machine.Name) + klog.V(3).Infof("Preservation stopped for machine %q", machine.Name) return nil } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 56f9c13c7..85cd5c073 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -100,8 +100,18 @@ const ( //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that // a Machine should not be preserved any longer, even if the expiry timeout has not been reached PreserveMachineAnnotationValueFalse = "false" + + PreserveMac ) +// AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation +var AllowedPreserveAnnotationValues = map[string]bool{ + PreserveMachineAnnotationValueNow: true, + PreserveMachineAnnotationValueWhenFailed: true, + PreserveMachineAnnotationValuePreservedByMCM: true, + PreserveMachineAnnotationValueFalse: true, +} + // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration From 6720e4df0633af23327ac6619d8116742ad997fe Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 23 Dec 2025 11:05:14 +0530 Subject: [PATCH 32/79] Address review comments- part 3: * remove duplicate function to check preservation timeout * rename variables --- pkg/controller/machineset.go | 20 +++++++++---------- .../provider/machinecontroller/machine.go | 2 +- .../machinecontroller/machine_util.go | 9 ++------- pkg/util/provider/machineutils/utils.go | 9 +-------- 4 files changed, 14 insertions(+), 26 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index b116e050e..66a9492fd 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -498,7 +498,7 @@ func (c *controller) isMachineCandidateForPreservation(ctx context.Context, mach } } if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { - _, err := c.annotateMachineForAutoPreservation(ctx, machine) + err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { return true, err } @@ -716,16 +716,16 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al } func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machine { - pendingMachines := make([]*v1alpha1.Machine, 0, len(machines)) + preservedMachines := make([]*v1alpha1.Machine, 0, len(machines)) otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) for _, mc := range machines { if machineutils.IsPreserveExpiryTimeSet(mc) { - pendingMachines = append(pendingMachines, mc) + preservedMachines = append(preservedMachines, mc) } else { otherMachines = append(otherMachines, mc) } } - return slices.Concat(otherMachines, pendingMachines) + return slices.Concat(otherMachines, preservedMachines) } func getMachineKeys(machines []*v1alpha1.Machine) []string { @@ -948,18 +948,18 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } -func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) (*v1alpha1.Machine, error) { +func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { if m.Labels[v1alpha1.NodeLabelKey] != "" { // check if backing node has preserve=false annotation, if yes, do not auto-preserve node, err := dc.nodeLister.Get(m.Labels[v1alpha1.NodeLabelKey]) if err != nil { - return nil, err + return err } if val, exists := node.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueFalse { - return nil, nil + return nil } } - updatedMachine, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { + _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { if clone.Annotations == nil { clone.Annotations = make(map[string]string) } @@ -967,9 +967,9 @@ func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m return nil }) if err != nil { - return nil, err + return err } klog.V(2).Infof("Updated machine %q with auto-preserved annotation.", m.Name) - return updatedMachine, nil + return nil } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index bb5ebac6f..6a663a466 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -841,7 +841,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if !isPreserveAnnotationValueValid(preserveValue) { klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || hasMachinePreservationTimedOut(clone) { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || machineutils.HasPreservationTimedOut(clone) { err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 9e4008fcd..a9acd1aa4 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2541,7 +2541,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName != "" { // Machine has a backing node - preservedCondition := v1.NodeCondition{ + preservedConditionFalse := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), @@ -2549,7 +2549,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } // Step 1: if backing node exists, change node condition to reflect that preservation has stopped - err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, preservedCondition) + err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, preservedConditionFalse) if err != nil { return err } @@ -2701,8 +2701,3 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M } return nil } - -// hasMachinePreservationTimedOut returns true if preserve expiry time has lapsed -func hasMachinePreservationTimedOut(machine *v1alpha1.Machine) bool { - return machineutils.IsPreserveExpiryTimeSet(machine) && metav1.Now().After(machine.Status.CurrentStatus.PreserveExpiryTime.Time) -} diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 85cd5c073..3ea3076df 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -100,8 +100,6 @@ const ( //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that // a Machine should not be preserved any longer, even if the expiry timeout has not been reached PreserveMachineAnnotationValueFalse = "false" - - PreserveMac ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation @@ -161,10 +159,5 @@ func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { // HasPreservationTimedOut checks if the Status.CurrentStatus.PreserveExpiryTime has not yet passed func HasPreservationTimedOut(m *v1alpha1.Machine) bool { - if m.Status.CurrentStatus.PreserveExpiryTime.IsZero() { - return true - } else if m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { - return false - } - return true + return IsPreserveExpiryTimeSet(m) && m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) } From 14e4af37db9d269320835f6079c4cbbb4ce28347 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 23 Dec 2025 13:43:06 +0530 Subject: [PATCH 33/79] Address review comments- part 4: * reduce get calls * remove usage of RemoveAnnotations() --- .../provider/machinecontroller/machine.go | 2 +- .../machinecontroller/machine_util.go | 32 ++++++++----------- pkg/util/provider/machineutils/utils.go | 2 +- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 6a663a466..0c3161a9c 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -841,7 +841,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if !isPreserveAnnotationValueValid(preserveValue) { klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || machineutils.HasPreservationTimedOut(clone) { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (machineutils.IsPreserveExpiryTimeSet(clone) && machineutils.HasPreservationTimedOut(clone)) { err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index a9acd1aa4..da6c557c6 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2541,37 +2541,33 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName != "" { // Machine has a backing node + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + return err + } + // prepare NodeCondition to set preservation as stopped preservedConditionFalse := v1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: v1.ConditionFalse, LastTransitionTime: metav1.Now(), Reason: v1alpha1.NodePreservationStopped, } - - // Step 1: if backing node exists, change node condition to reflect that preservation has stopped - err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, preservedConditionFalse) + // Step 1: change node condition to reflect that preservation has stopped + updatedNode := nodeops.AddOrUpdateCondition(node, preservedConditionFalse) + updatedNode, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { + klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %s", nodeName, machine.Name, err) return err } // Step 2: remove CA's scale-down disabled annotations to allow CA to scale down node if needed // fetch latest node object since cache may be not be up-to-date with node updated earlier - latestNode, err := c.targetCoreClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) - if err != nil { - klog.Errorf("error trying to get backing node %q for machine %q. Retrying, error: %s", nodeName, machine.Name, err) - return err - } - if latestNode.Annotations != nil && latestNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != "" { - annotationsToRemove := make(map[string]string) - annotationsToRemove[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "" - nodeCopy := latestNode.DeepCopy() - nodeCopy, _, err = annotations.RemoveAnnotation(nodeCopy, annotationsToRemove) - if err != nil { - klog.Errorf("error trying to remove CA annotation from node %q of machine %q : %s", nodeName, machine.Name, err) - return err - } + if updatedNode.Annotations != nil && updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != "" { + nodeCopy := updatedNode.DeepCopy() + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("Node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) + klog.Errorf("node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) return err } } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 3ea3076df..0e0ec144d 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -159,5 +159,5 @@ func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { // HasPreservationTimedOut checks if the Status.CurrentStatus.PreserveExpiryTime has not yet passed func HasPreservationTimedOut(m *v1alpha1.Machine) bool { - return IsPreserveExpiryTimeSet(m) && m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) + return !m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) } From c94e391191430c0d6458cff8af64d5ea096cc3b5 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 24 Dec 2025 16:27:25 +0530 Subject: [PATCH 34/79] Add unit tests for preservation logic in machine.go --- .../provider/machinecontroller/machine.go | 58 +- .../machinecontroller/machine_test.go | 1136 +++++++++++------ 2 files changed, 766 insertions(+), 428 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 0c3161a9c..4bfd2970b 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -360,31 +360,6 @@ func (c *controller) reconcileClusterMachineTermination(key string) error { return nil } -// handlePreserveAnnotationsChange returns true if there is a change in preserve annotations -// it also handles the special case where the annotation is changed from 'now' to 'when-failed' -// in which case it stops the preservation if expiry time is already set -func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotations map[string]string, machine *v1alpha1.Machine) bool { - valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] - valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] - if valueNew != machineutils.PreserveMachineAnnotationValueWhenFailed || valueOld != machineutils.PreserveMachineAnnotationValueNow { - return existsInOld != existsInNew || valueOld != valueNew - } - // Special case: annotation changed from 'now' to 'when-failed' - isPreserved := machineutils.IsPreserveExpiryTimeSet(machine) - if !isPreserved { - return true - } - ctx := context.Background() - err := clientretry.RetryOnConflict(nodeops.Backoff, func() error { - klog.V(3).Infof("Stopping preservation for machine %q as preserve annotation changed from 'now' to 'when-failed'.", machine.Name) - return c.stopMachinePreservation(ctx, machine) - }) - if err != nil { - klog.Errorf("error while stopping preservation for machine %q: %v. Use preserve=false to stop preservation.", machine.Name, err) - } - return true -} - /* SECTION Machine operations - Create, Delete @@ -811,6 +786,36 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { return isMachineInCreationFlow } +/* + SECTION + Machine Preservation operations +*/ + +// handlePreserveAnnotationsChange returns true if there is a change in preserve annotations +// it also handles the special case where the annotation is changed from 'now' to 'when-failed' +// in which case it stops the preservation if expiry time is already set +func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotations map[string]string, machine *v1alpha1.Machine) bool { + valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] + valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] + if valueNew != machineutils.PreserveMachineAnnotationValueWhenFailed || valueOld != machineutils.PreserveMachineAnnotationValueNow { + return existsInOld != existsInNew || valueOld != valueNew + } + // Special case: annotation changed from 'now' to 'when-failed' + isPreserved := machineutils.IsPreserveExpiryTimeSet(machine) + if !isPreserved { + return true + } + ctx := context.Background() + err := clientretry.RetryOnConflict(nodeops.Backoff, func() error { + klog.V(3).Infof("Stopping preservation for machine %q as preserve annotation changed from 'now' to 'when-failed'.", machine.Name) + return c.stopMachinePreservation(ctx, machine) + }) + if err != nil { + klog.Errorf("error while stopping preservation for machine %q: %v. Use preserve=false to stop preservation.", machine.Name, err) + } + return true +} + // manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { defer func() { @@ -819,8 +824,9 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a retry = machineutils.ConflictRetry } retry = machineutils.ShortRetry + } else { + retry = machineutils.LongRetry } - retry = machineutils.LongRetry }() preserveValue, exists, err := c.computeEffectivePreserveAnnotationValue(machine) diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index d5c5b5925..a00425c7f 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -7,6 +7,7 @@ package controller import ( "context" "fmt" + k8stesting "k8s.io/client-go/testing" "math" "time" @@ -16,7 +17,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" - k8stesting "k8s.io/client-go/testing" machineapi "github.com/gardener/machine-controller-manager/pkg/apis/machine" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" @@ -4074,470 +4074,802 @@ var _ = Describe("machine", func() { }), ) }) - /* - Describe("#checkMachineTimeout", func() { - type setup struct { - machines []*v1alpha1.Machine - } - type action struct { - machine string - } - type expect struct { - machine *v1alpha1.Machine - err bool - } - type data struct { - setup setup - action action - expect expect + + Describe("#handlePreserveAnnotationsChange", func() { + type setup struct { + oldAnnotations map[string]string + newAnnotations map[string]string + machine *v1alpha1.Machine + node *corev1.Node + } + + type expect struct { + change bool + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("##handlePreserveAnnotationsChange scenarios", func(tc testCase) { + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + + controlMachineObjects = append(controlMachineObjects, tc.setup.machine) + var targetCoreObjects []runtime.Object + if tc.setup.node != nil { + targetCoreObjects = append(targetCoreObjects, tc.setup.node) } - objMeta := &metav1.ObjectMeta{ - GenerateName: "machine", - Namespace: "test", + + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + + waitForCacheSync(stop, c) + result := c.handlePreserveAnnotationsChange(tc.setup.oldAnnotations, tc.setup.newAnnotations, tc.setup.machine) + Expect(result).To(Equal(tc.expect.change)) + if tc.setup.newAnnotations != nil && tc.setup.oldAnnotations != nil && tc.setup.newAnnotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueWhenFailed && tc.setup.oldAnnotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueNow { + updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) // machine preservation should have been stopped } - machineName := "machine-0" - timeOutOccurred := -21 * time.Minute - timeOutNotOccurred := -5 * time.Minute - creationTimeOut := 20 * time.Minute - healthTimeOut := 10 * time.Minute - DescribeTable("##Machine Timeout Scenarios", - func(data *data) { - stop := make(chan struct{}) - defer close(stop) - machineObjects := []runtime.Object{} - for _, o := range data.setup.machines { - machineObjects = append(machineObjects, o) - } - coreObjects := []runtime.Object{} - controller, trackers := createController(stop, objMeta.Namespace, machineObjects, nil, coreObjects) - defer trackers.Stop() - waitForCacheSync(stop, controller) - action := data.action - machine, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(action.machine, metav1.GetOptions{}) - //Expect(err).ToNot(HaveOccurred()) - controller.checkMachineTimeout(machine) - actual, err := controller.controlMachineClient.Machines(machine.Namespace).Get(machine.Name, metav1.GetOptions{}) - Expect(err).To(BeNil()) - Expect(actual.Status.CurrentStatus.Phase).To(Equal(data.expect.machine.Status.CurrentStatus.Phase)) - Expect(actual.Status.CurrentStatus.//TimeoutActive).To(Equal(data.expect.machine.Status.CurrentStatus.//TimeoutActive)) - Expect(actual.Status.LastOperation.Description).To(Equal(data.expect.machine.Status.LastOperation.Description)) - Expect(actual.Status.LastOperation.State).To(Equal(data.expect.machine.Status.LastOperation.State)) - Expect(actual.Status.LastOperation.Type).To(Equal(data.expect.machine.Status.LastOperation.Type)) - }, - Entry("Machine is still running", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - //TimeoutActive: false, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutNotOccurred)), - }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine % successfully joined the cluster", machineName), - State: v1alpha1.MachineStateSuccessful, - Type: v1alpha1.MachineOperationCreate, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutNotOccurred)), + }, + Entry("no change in preserve annotations", testCase{ + setup: setup{ + oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, + newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, nil, nil, nil), - }, - action: action{ - machine: machineName, - }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ + }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine % successfully joined the cluster", machineName), - State: v1alpha1.MachineStateSuccessful, - Type: v1alpha1.MachineOperationCreate, - }, - }, nil, nil, nil), + }, }, - }), - Entry("Machine creation has still not timed out", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineUnknown, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutNotOccurred)), - }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine %s is unhealthy - changing MachineState to Unknown", machineName), - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationCreate, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutNotOccurred)), + }, + expect: expect{ + change: false, + }, + }), + Entry("preserve annotation added on machine", testCase{ + setup: setup{ + oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: ""}, + newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, nil, nil, nil), - }, - action: action{ - machine: machineName, - }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ + }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineUnknown, - }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine %s is unhealthy - changing MachineState to Unknown", machineName), - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationCreate, + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), }, - }, nil, nil, nil), + }, }, - }), - Entry("Machine creation has timed out", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachinePending, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutOccurred)), + }, + expect: expect{ + change: true, + }, + }), + Entry("preserve annotation removed", testCase{ + setup: setup{ + oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, + newAnnotations: map[string]string{"someOtherKey": "someValue"}, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{"someOtherKey": "someValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - LastOperation: v1alpha1.LastOperation{ - Description: "Creating machine on cloud provider", - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationCreate, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutOccurred)), + }}}, + expect: expect{ + change: true, + }, + }), + Entry("preserve annotation value changed", testCase{ + setup: setup{ + oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "oldValue"}, + newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, nil, nil, nil), - }, - action: action{ - machine: machineName, - }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineFailed, + }}}, + expect: expect{ + change: true, + }, + }), + Entry("both annotations are nil", testCase{ + setup: setup{ + oldAnnotations: nil, + newAnnotations: nil, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf( - "Machine %s failed to join the cluster in %s minutes.", - machineName, - creationTimeOut, - ), - State: v1alpha1.MachineStateFailed, - Type: v1alpha1.MachineOperationCreate, + }}}, + expect: expect{ + change: false, + }, + }), + Entry("preserve annotation changed from now to when-failed", testCase{ + setup: setup{ + oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, + newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, nil, nil, nil), - }, - }), - Entry("Machine health has timed out", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ + }, + Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineUnknown, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutOccurred)), - }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine %s is unhealthy - changing MachineState to Unknown", machineName), - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationHealthCheck, - LastUpdateTime: metav1.NewTime(time.Now().Add(timeOutOccurred)), + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, }, - }, nil, nil, nil), + }, }, - action: action{ - machine: machineName, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineFailed, - }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf( - "Machine %s is not healthy since %s minutes. Changing status to failed. Node Conditions: %+v", - machineName, - healthTimeOut, - []corev1.NodeCondition{}, - ), - State: v1alpha1.MachineStateFailed, - Type: v1alpha1.MachineOperationHealthCheck, + }, + expect: expect{ + change: true, + }, + }), + ) + }) + + Describe("#computeEffectivePreserveAnnotationValue", func() { + type setup struct { + machine *v1alpha1.Machine + node *corev1.Node + } + type expect struct { + preserveValue string + exists bool + err error + } + type testCase struct { + setup setup + expect expect + } + + DescribeTable("computeEffectivePreserveAnnotationValue behavior", + func(tc testCase) { + + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + var targetCoreObjects []runtime.Object + + // Build machine + controlMachineObjects = append(controlMachineObjects, tc.setup.machine) + if tc.setup.node != nil { + targetCoreObjects = append(targetCoreObjects, tc.setup.node) + } + + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + + waitForCacheSync(stop, c) + value, exists, err := c.computeEffectivePreserveAnnotationValue(tc.setup.machine) + + if tc.expect.err != nil { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + return + } + Expect(err).ToNot(HaveOccurred()) + Expect(exists).To(Equal(tc.expect.exists)) + Expect(value).To(Equal(tc.expect.preserveValue)) + }, + Entry("neither machine nor node has preserve annotation", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, nil, nil, nil), + }}, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, }, - }), - ) - }) - Describe("#updateMachineState", func() { - type setup struct { - machines []*v1alpha1.Machine - nodes []*corev1.Node - } - type action struct { - machine string - } - type expect struct { - machine *v1alpha1.Machine - err bool - } - type data struct { - setup setup - action action - expect expect - } - objMeta := &metav1.ObjectMeta{ - GenerateName: "machine", - // using default namespace for non-namespaced objects - // as our current fake client is with the assumption - // that all objects are namespaced - Namespace: "", - } - machineName := "machine-0" - DescribeTable("##Different machine state update scenrios", - func(data *data) { - stop := make(chan struct{}) - defer close(stop) - machineObjects := []runtime.Object{} - for _, o := range data.setup.machines { - machineObjects = append(machineObjects, o) - } - coreObjects := []runtime.Object{} - for _, o := range data.setup.nodes { - coreObjects = append(coreObjects, o) - } - controller, trackers := createController(stop, objMeta.Namespace, machineObjects, nil, coreObjects) - defer trackers.Stop() - waitForCacheSync(stop, controller) - action := data.action - machine, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(action.machine, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - controller.updateMachineState(machine) - actual, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(action.machine, metav1.GetOptions{}) - Expect(err).To(BeNil()) - Expect(actual.Name).To(Equal(data.expect.machine.Name)) - Expect(actual.Status.Node).To(Equal(data.expect.machine.Status.Node)) - Expect(actual.Status.CurrentStatus.Phase).To(Equal(data.expect.machine.Status.CurrentStatus.Phase)) - Expect(actual.Status.CurrentStatus.//TimeoutActive).To(Equal(data.expect.machine.Status.CurrentStatus.//TimeoutActive)) - Expect(actual.Status.LastOperation.State).To(Equal(data.expect.machine.Status.LastOperation.State)) - Expect(actual.Status.LastOperation.Type).To(Equal(data.expect.machine.Status.LastOperation.Type)) - Expect(actual.Status.LastOperation.Description).To(Equal(data.expect.machine.Status.LastOperation.Description)) - if data.expect.machine.Labels != nil { - if _, ok := data.expect.machine.Labels["node"]; ok { - Expect(actual.Labels["node"]).To(Equal(data.expect.machine.Labels["node"])) - } - } - for i := range actual.Status.Conditions { - Expect(actual.Status.Conditions[i].Type).To(Equal(data.expect.machine.Status.Conditions[i].Type)) - Expect(actual.Status.Conditions[i].Status).To(Equal(data.expect.machine.Status.Conditions[i].Status)) - Expect(actual.Status.Conditions[i].Reason).To(Equal(data.expect.machine.Status.Conditions[i].Reason)) - Expect(actual.Status.Conditions[i].Message).To(Equal(data.expect.machine.Status.Conditions[i].Message)) - } }, - Entry("Machine does not have a node backing", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{}, nil, nil, nil), - }, - action: action{ - machine: machineName, - }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{}, nil, nil, nil), + expect: expect{ + preserveValue: "", + exists: false, + err: nil, + }, + }), + Entry("only machine has preserve annotation", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }}, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, }, - }), - Entry("Node object backing machine not found and machine conditions are empty", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "dummy-node", - }, nil, nil, nil), + }, + expect: expect{ + preserveValue: "machineValue", + exists: true, + err: nil, + }, + }), + Entry("only node has preserve annotation", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }}, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "nodeValue"}, + }, }, - action: action{ - machine: machineName, + }, + expect: expect{ + preserveValue: "nodeValue", + exists: true, + err: nil, + }, + }), + Entry("both machine and node have preserve annotation - node takes precedence", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }}, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "nodeValue"}, + }, }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "dummy-node", - }, nil, nil, nil), - }, - }), - Entry("Machine is running but node object is lost", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "dummy-node", + }, + expect: expect{ + preserveValue: "nodeValue", + exists: true, + err: nil, + }, + }), + Entry("machine has node label but node object is not found", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }}, + node: nil, + }, + expect: expect{ + preserveValue: "", + exists: false, + err: fmt.Errorf("node %q not found", "node-1"), + }, + }), + Entry("machine does not have node label", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, + Labels: map[string]string{}, + }}, + }, + expect: expect{ + preserveValue: "machineValue", + exists: true, + err: nil, + }, + }), + ) + }) + + Describe("#manageMachinePreservation", func() { + type setup struct { + machine *v1alpha1.Machine + node *corev1.Node + } + type expect struct { + retry machineutils.RetryPeriod + preserveExpiryTimeIsSet bool + err error + nodeCondition *corev1.NodeCondition + } + type testCase struct { + setup setup + expect expect + } + + DescribeTable("manageMachinePreservation behavior", + func(tc testCase) { + + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + var targetCoreObjects []runtime.Object + + // Build machine + controlMachineObjects = append(controlMachineObjects, tc.setup.machine) + if tc.setup.node != nil { + targetCoreObjects = append(targetCoreObjects, tc.setup.node) + } + + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + + waitForCacheSync(stop, c) + retry, err := c.manageMachinePreservation(context.TODO(), tc.setup.machine) + Expect(retry).To(Equal(tc.expect.retry)) + if tc.expect.err != nil { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + return + } + Expect(err).ToNot(HaveOccurred()) + updatedMachine, err := c.controlMachineClient.Machines(tc.setup.machine.Namespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + + if tc.expect.preserveExpiryTimeIsSet { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) + } else { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) + } + if tc.setup.machine.Labels[v1alpha1.NodeLabelKey] != "" { + updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.machine.Labels[v1alpha1.NodeLabelKey], metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + found := false + if tc.expect.nodeCondition != nil { + for _, cond := range updatedNode.Status.Conditions { + if cond.Type == tc.expect.nodeCondition.Type { + found = true + Expect(cond.Status).To(Equal(tc.expect.nodeCondition.Status)) + break + } + } + } + + if tc.expect.nodeCondition != nil { + Expect(found).To(BeTrue()) + } else { + Expect(found).To(BeFalse()) + } + + } + + }, + Entry("no preserve annotation on machine and node", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineRunning, - //TimeoutActive: false, LastUpdateTime: metav1.Now(), }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Machine % successfully joined the cluster", machineName), - State: v1alpha1.MachineStateSuccessful, - Type: v1alpha1.MachineOperationCreate, - LastUpdateTime: metav1.Now(), + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + }, + }), + Entry("preserve annotation 'now' added on Running machine", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - Conditions: []corev1.NodeCondition{ - { - Message: "kubelet is posting ready status", - Reason: "KubeletReady", - Status: "True", - Type: "Ready", - }, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), }, - }, nil, nil, nil), + }, }, - action: action{ - machine: machineName, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "dummy-node", + }, + expect: expect{ + preserveExpiryTimeIsSet: true, + nodeCondition: &corev1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue}, + retry: machineutils.LongRetry, + }, + }), + Entry("preserve annotation 'when-failed' added on Running machine", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineUnknown, + Phase: v1alpha1.MachineRunning, LastUpdateTime: metav1.Now(), }, - LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf( - "Node object went missing. Machine %s is unhealthy - changing MachineState to Unknown", - machineName, - ), - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationHealthCheck, + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + }, + }), + Entry("preserve annotation 'when-failed' added on Failed machine", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineFailed, LastUpdateTime: metav1.Now(), }, + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ { - Message: "kubelet is posting ready status", - Reason: "KubeletReady", - Status: "True", - Type: "Ready", + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue, }, }, - }, nil, nil, nil), + }, }, - }), - Entry("Machine and node both are present and kubelet ready status is updated", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "machine", - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachinePending, - LastUpdateTime: metav1.Now(), + }, + expect: expect{ + preserveExpiryTimeIsSet: true, + nodeCondition: &corev1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue}, + retry: machineutils.LongRetry, + }, + }), + Entry("preserve annotation 'now' added on Healthy node ", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - LastOperation: v1alpha1.LastOperation{ - Description: "Creating machine on cloud provider", - State: v1alpha1.MachineStateProcessing, - Type: v1alpha1.MachineOperationCreate, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, LastUpdateTime: metav1.Now(), }, - Conditions: []corev1.NodeCondition{ - { - Message: "kubelet is not ready", - Reason: "KubeletReady", - Status: "False", - Type: "Ready", - }, - }, - }, nil, nil, nil), - nodes: []*corev1.Node{ - { - ObjectMeta: *newObjectMeta(objMeta, 0), - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - { - Message: "kubelet is posting ready status", - Reason: "KubeletReady", - Status: "True", - Type: "Ready", - }, - }, - }, - }, }, }, - action: action{ - machine: machineName, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "machine", + }, + expect: expect{ + preserveExpiryTimeIsSet: true, + nodeCondition: &corev1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue}, + retry: machineutils.LongRetry, + }, + }), + Entry("preserve annotation 'when-failed' added on Healthy node ", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineRunning, - //TimeoutActive: false, LastUpdateTime: metav1.Now(), }, - LastOperation: v1alpha1.LastOperation{ - Description: "Machine machine-0 successfully joined the cluster", - State: v1alpha1.MachineStateSuccessful, - Type: v1alpha1.MachineOperationCreate, - LastUpdateTime: metav1.Now(), + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + }}), + Entry("preserve annotation 'false' added on backing node of preserved machine", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, }, + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "false"}, + }, + Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ { - Message: "kubelet is posting ready status", - Reason: "KubeletReady", - Status: "True", - Type: "Ready", + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue, }, }, - }, nil, nil, nil), + }, }, - }), - Entry("Machine object does not have node-label and node exists", &data{ - setup: setup{ - machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ - ObjectMeta: *newObjectMeta(objMeta, 0), - }, &v1alpha1.MachineStatus{ - Node: "node", - }, nil, nil, nil), - nodes: []*corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "node-0", - }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + }, + }), + Entry("machine auto-preserved by MCM", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValuePreservedByMCM}, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineFailed, + LastUpdateTime: metav1.Now(), }, }, }, - action: action{ - machine: machineName, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, }, - expect: expect{ - machine: newMachine(&v1alpha1.MachineTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-0", + }, + expect: expect{ + preserveExpiryTimeIsSet: true, + nodeCondition: &corev1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue}, + retry: machineutils.LongRetry, + }, + }), + Entry("preservation timed out", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", }, - }, &v1alpha1.MachineStatus{ - Node: "node", - }, nil, nil, - map[string]string{ - "node": "node-0", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, }, - ), + }, }, - }), - ) - }) - */ + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + retry: machineutils.LongRetry, + }, + }), + Entry("invalid preserve annotation on node of unpreserved machine", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + }, + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "invalidValue"}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, + }, + }), + Entry("machine annotated with preserve=now, but has no backing node", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: true, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, + }, + }), + Entry("machine with backing node, but node retrieval fails", testCase{ + setup: setup{ + machine: &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, + }, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.ShortRetry, + err: fmt.Errorf("node %q not found", "node-1"), + }, + }), + ) + }) }) From 4d3482a5ffd3efd016a7fd3c51af3f165d088438 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 26 Dec 2025 15:51:10 +0530 Subject: [PATCH 35/79] Refactor tests to reduce redundancy in code. --- .../provider/machinecontroller/machine.go | 4 + .../machinecontroller/machine_test.go | 656 ++++++------------ 2 files changed, 200 insertions(+), 460 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 4bfd2970b..37f44d07f 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -805,6 +805,10 @@ func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotati if !isPreserved { return true } + if machineutils.IsMachineFailed(machine) { + // If machine is already in failed state, no need to stop preservation + return true + } ctx := context.Background() err := clientretry.RetryOnConflict(nodeops.Backoff, func() error { klog.V(3).Infof("Stopping preservation for machine %q as preserve annotation changed from 'now' to 'when-failed'.", machine.Name) diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index a00425c7f..ddc301668 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4077,10 +4077,9 @@ var _ = Describe("machine", func() { Describe("#handlePreserveAnnotationsChange", func() { type setup struct { - oldAnnotations map[string]string - newAnnotations map[string]string - machine *v1alpha1.Machine - node *corev1.Node + oldPreserveValue string + newPreserveValue string + phase v1alpha1.MachinePhase } type expect struct { @@ -4094,70 +4093,68 @@ var _ = Describe("machine", func() { stop := make(chan struct{}) defer close(stop) - var controlMachineObjects []runtime.Object + // Build machine object + machine := &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: "node-1", + }, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineRunning, + LastUpdateTime: metav1.Now(), + }, + }, + } + if tc.setup.phase != "" { + machine.Status.CurrentStatus.Phase = tc.setup.phase + } + if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueNow || tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + machine.Status.CurrentStatus.PreserveExpiryTime = metav1.NewTime(metav1.Now().Add(1 * time.Hour)) + } else if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && machineutils.IsMachineFailed(machine) { + machine.Status.CurrentStatus.PreserveExpiryTime = metav1.NewTime(metav1.Now().Add(1 * time.Hour)) + } - controlMachineObjects = append(controlMachineObjects, tc.setup.machine) - var targetCoreObjects []runtime.Object - if tc.setup.node != nil { - targetCoreObjects = append(targetCoreObjects, tc.setup.node) + controlMachineObjects := []runtime.Object{machine} + + // Build node object + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, } + targetCoreObjects := []runtime.Object{node} c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - result := c.handlePreserveAnnotationsChange(tc.setup.oldAnnotations, tc.setup.newAnnotations, tc.setup.machine) + result := c.handlePreserveAnnotationsChange(map[string]string{machineutils.PreserveMachineAnnotationKey: tc.setup.oldPreserveValue}, map[string]string{machineutils.PreserveMachineAnnotationKey: tc.setup.newPreserveValue}, machine) Expect(result).To(Equal(tc.expect.change)) - if tc.setup.newAnnotations != nil && tc.setup.oldAnnotations != nil && tc.setup.newAnnotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueWhenFailed && tc.setup.oldAnnotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueNow { - updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + if tc.setup.newPreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueNow { + updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) - Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) // machine preservation should have been stopped + if tc.setup.phase == v1alpha1.MachineFailed { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) // machine preservation should be active + } else { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) // machine preservation should have been stopped + } } }, Entry("no change in preserve annotations", testCase{ setup: setup{ - oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, - newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, - }, - }, - }, + oldPreserveValue: "someValue", + newPreserveValue: "someValue", }, expect: expect{ change: false, }, }), - Entry("preserve annotation added on machine", testCase{ + Entry("preserve annotation newly added on machine", testCase{ setup: setup{ - oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: ""}, - newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, + newPreserveValue: "newValue", }, expect: expect{ change: true, @@ -4165,80 +4162,41 @@ var _ = Describe("machine", func() { }), Entry("preserve annotation removed", testCase{ setup: setup{ - oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "someValue"}, - newAnnotations: map[string]string{"someOtherKey": "someValue"}, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{"someOtherKey": "someValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}}, + oldPreserveValue: "someValue", + newPreserveValue: "", + }, expect: expect{ change: true, }, }), Entry("preserve annotation value changed", testCase{ setup: setup{ - oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "oldValue"}, - newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "newValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}}, + oldPreserveValue: "oldValue", + newPreserveValue: "newValue"}, expect: expect{ change: true, }, }), Entry("both annotations are nil", testCase{ - setup: setup{ - oldAnnotations: nil, - newAnnotations: nil, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}}, + setup: setup{}, expect: expect{ change: false, }, }), - Entry("preserve annotation changed from now to when-failed", testCase{ + Entry("preserve annotation changed from now to when-failed on Running machine", testCase{ setup: setup{ - oldAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, - newAnnotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, - Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - }, + oldPreserveValue: machineutils.PreserveMachineAnnotationValueNow, + newPreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + }, + expect: expect{ + change: true, + }, + }), + Entry("preserve annotation changed from now to when-failed on Failed machine", testCase{ + setup: setup{ + oldPreserveValue: machineutils.PreserveMachineAnnotationValueNow, + newPreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + phase: v1alpha1.MachineFailed, }, expect: expect{ change: true, @@ -4249,8 +4207,9 @@ var _ = Describe("machine", func() { Describe("#computeEffectivePreserveAnnotationValue", func() { type setup struct { - machine *v1alpha1.Machine - node *corev1.Node + machinePreserveAnnotation string + nodePreserveAnnotation string + nodeName string } type expect struct { preserveValue string @@ -4272,16 +4231,40 @@ var _ = Describe("machine", func() { var targetCoreObjects []runtime.Object // Build machine - controlMachineObjects = append(controlMachineObjects, tc.setup.machine) - if tc.setup.node != nil { - targetCoreObjects = append(targetCoreObjects, tc.setup.node) + machine := &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: tc.setup.nodeName, + }, + Annotations: map[string]string{}, + }, + } + if tc.setup.machinePreserveAnnotation != "" { + machine.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.machinePreserveAnnotation + } + + controlMachineObjects = append(controlMachineObjects, machine) + // Build node + if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.setup.nodeName, + Annotations: map[string]string{}, + }, + } + if tc.setup.nodePreserveAnnotation != "" { + node.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.nodePreserveAnnotation + } + targetCoreObjects = append(targetCoreObjects, node) } c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - value, exists, err := c.computeEffectivePreserveAnnotationValue(tc.setup.machine) + value, exists, err := c.computeEffectivePreserveAnnotationValue(machine) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) @@ -4294,19 +4277,7 @@ var _ = Describe("machine", func() { }, Entry("neither machine nor node has preserve annotation", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - }, + nodeName: "node-1", }, expect: expect{ preserveValue: "", @@ -4316,20 +4287,8 @@ var _ = Describe("machine", func() { }), Entry("only machine has preserve annotation", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - }, + machinePreserveAnnotation: "machineValue", + nodeName: "node-1", }, expect: expect{ preserveValue: "machineValue", @@ -4339,20 +4298,8 @@ var _ = Describe("machine", func() { }), Entry("only node has preserve annotation", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "nodeValue"}, - }, - }, + nodePreserveAnnotation: "nodeValue", + nodeName: "node-1", }, expect: expect{ preserveValue: "nodeValue", @@ -4362,21 +4309,9 @@ var _ = Describe("machine", func() { }), Entry("both machine and node have preserve annotation - node takes precedence", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "nodeValue"}, - }, - }, + machinePreserveAnnotation: "machineValue", + nodePreserveAnnotation: "nodeValue", + nodeName: "node-1", }, expect: expect{ preserveValue: "nodeValue", @@ -4386,32 +4321,18 @@ var _ = Describe("machine", func() { }), Entry("machine has node label but node object is not found", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }}, - node: nil, + machinePreserveAnnotation: "machineValue", + nodeName: "invalid", }, expect: expect{ preserveValue: "", exists: false, - err: fmt.Errorf("node %q not found", "node-1"), + err: fmt.Errorf("node %q not found", "invalid"), }, }), Entry("machine does not have node label", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "machineValue"}, - Labels: map[string]string{}, - }}, + machinePreserveAnnotation: "machineValue", }, expect: expect{ preserveValue: "machineValue", @@ -4424,8 +4345,11 @@ var _ = Describe("machine", func() { Describe("#manageMachinePreservation", func() { type setup struct { - machine *v1alpha1.Machine - node *corev1.Node + machineAnnotationValue string + nodeAnnotationValue string + nodeName string + machinePhase v1alpha1.MachinePhase + preserveExpiryTime metav1.Time } type expect struct { retry machineutils.RetryPeriod @@ -4448,16 +4372,47 @@ var _ = Describe("machine", func() { var targetCoreObjects []runtime.Object // Build machine - controlMachineObjects = append(controlMachineObjects, tc.setup.machine) - if tc.setup.node != nil { - targetCoreObjects = append(targetCoreObjects, tc.setup.node) + machine := &v1alpha1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: "m1", + Labels: map[string]string{ + v1alpha1.NodeLabelKey: tc.setup.nodeName, + }, + Annotations: map[string]string{}, + }, Status: v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: tc.setup.machinePhase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: tc.setup.preserveExpiryTime, + }, + }, + } + if tc.setup.machineAnnotationValue != "" { + machine.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.machineAnnotationValue + } + controlMachineObjects = append(controlMachineObjects, machine) + if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.setup.nodeName, + Annotations: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + } + if tc.setup.nodeAnnotationValue != "" { + node.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.nodeAnnotationValue + } + targetCoreObjects = append(targetCoreObjects, node) } - c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() - waitForCacheSync(stop, c) - retry, err := c.manageMachinePreservation(context.TODO(), tc.setup.machine) + + retry, err := c.manageMachinePreservation(context.TODO(), machine) + Expect(retry).To(Equal(tc.expect.retry)) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) @@ -4465,16 +4420,15 @@ var _ = Describe("machine", func() { return } Expect(err).ToNot(HaveOccurred()) - updatedMachine, err := c.controlMachineClient.Machines(tc.setup.machine.Namespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) - if tc.expect.preserveExpiryTimeIsSet { Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) } else { Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) } - if tc.setup.machine.Labels[v1alpha1.NodeLabelKey] != "" { - updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.machine.Labels[v1alpha1.NodeLabelKey], metav1.GetOptions{}) + if tc.setup.nodeName != "" { + updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) found := false if tc.expect.nodeCondition != nil { @@ -4486,40 +4440,16 @@ var _ = Describe("machine", func() { } } } - if tc.expect.nodeCondition != nil { Expect(found).To(BeTrue()) } else { Expect(found).To(BeFalse()) } - } - }, Entry("no preserve annotation on machine and node", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + nodeName: "node-1", }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4529,29 +4459,9 @@ var _ = Describe("machine", func() { }), Entry("preserve annotation 'now' added on Running machine", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: true, @@ -4563,29 +4473,9 @@ var _ = Describe("machine", func() { }), Entry("preserve annotation 'when-failed' added on Running machine", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4593,36 +4483,11 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'when-failed' added on Failed machine", testCase{ + Entry("Failed machine annotated with when-failed", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - { - Type: v1alpha1.NodePreserved, - Status: corev1.ConditionTrue, - }, - }, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "node-1", + machinePhase: v1alpha1.MachineFailed, }, expect: expect{ preserveExpiryTimeIsSet: true, @@ -4634,29 +4499,9 @@ var _ = Describe("machine", func() { }), Entry("preserve annotation 'now' added on Healthy node ", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow}, - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: true, @@ -4668,29 +4513,9 @@ var _ = Describe("machine", func() { }), Entry("preserve annotation 'when-failed' added on Healthy node ", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueWhenFailed}, - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4699,36 +4524,10 @@ var _ = Describe("machine", func() { }}), Entry("preserve annotation 'false' added on backing node of preserved machine", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{Time: time.Now().Add(1 * time.Hour)}, - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "false"}, - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - { - Type: v1alpha1.NodePreserved, - Status: corev1.ConditionTrue, - }, - }, - }, - }, + nodeAnnotationValue: "false", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: metav1.NewTime(metav1.Now().Add(1 * time.Hour)), }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4738,29 +4537,10 @@ var _ = Describe("machine", func() { }), Entry("machine auto-preserved by MCM", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValuePreservedByMCM}, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeAnnotationValue: "", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: true, @@ -4772,27 +4552,11 @@ var _ = Describe("machine", func() { }), Entry("preservation timed out", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: metav1.NewTime(metav1.Now().Add(-1 * time.Minute)), }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4802,29 +4566,10 @@ var _ = Describe("machine", func() { }), Entry("invalid preserve annotation on node of unpreserved machine", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "invalidValue"}, - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{}, - }, - }, + machineAnnotationValue: "", + nodeAnnotationValue: "invalidValue", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4835,13 +4580,10 @@ var _ = Describe("machine", func() { }), Entry("machine annotated with preserve=now, but has no backing node", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeAnnotationValue: "", + nodeName: "", + machinePhase: v1alpha1.MachineUnknown, }, expect: expect{ preserveExpiryTimeIsSet: true, @@ -4852,22 +4594,16 @@ var _ = Describe("machine", func() { }), Entry("machine with backing node, but node retrieval fails", testCase{ setup: setup{ - machine: &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Annotations: map[string]string{machineutils.PreserveMachineAnnotationKey: "now"}, - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, - }, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeAnnotationValue: "", + nodeName: "invalid", + machinePhase: v1alpha1.MachineUnknown, }, expect: expect{ preserveExpiryTimeIsSet: false, nodeCondition: nil, retry: machineutils.ShortRetry, - err: fmt.Errorf("node %q not found", "node-1"), + err: fmt.Errorf("node %q not found", "invalid"), }, }), ) From 5033fdfbf91c1e663547a200fc247282051a5b4c Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 29 Dec 2025 16:47:30 +0530 Subject: [PATCH 36/79] Add tests for preservation logic in machine_util.go --- .../machinecontroller/machine_test.go | 77 --- .../machinecontroller/machine_util.go | 39 +- .../machinecontroller/machine_util_test.go | 514 ++++++++++++++++++ 3 files changed, 529 insertions(+), 101 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index ddc301668..10eaad4de 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -196,82 +196,6 @@ var _ = Describe("machine", func() { ) }) - /* - Describe("##updateMachineConditions", func() { - Describe("Update conditions of a non-existing machine", func() { - It("should return error", func() { - stop := make(chan struct{}) - defer close(stop) - - objects := []runtime.Object{} - c, trackers := createController(stop, testNamespace, objects, nil, nil) - defer trackers.Stop() - - testMachine := &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testmachine", - Namespace: testNamespace, - }, - Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineTerminating, - }, - }, - } - conditions := []corev1.NodeCondition{} - var _, err = c.updateMachineConditions(testMachine, conditions) - Expect(err).Should(Not(BeNil())) - }) - }) - DescribeTable("Update conditions of an existing machine", - func(phase v1alpha1.MachinePhase, conditions []corev1.NodeCondition, expectedPhase v1alpha1.MachinePhase) { - stop := make(chan struct{}) - defer close(stop) - - testMachine := &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testmachine", - Namespace: testNamespace, - }, - Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: phase, - }, - }, - } - objects := []runtime.Object{} - objects = append(objects, testMachine) - - c, trackers := createController(stop, testNamespace, objects, nil, nil) - defer trackers.Stop() - - var updatedMachine, err = c.updateMachineConditions(testMachine, conditions) - Expect(updatedMachine.Status.Conditions).Should(BeEquivalentTo(conditions)) - Expect(updatedMachine.Status.CurrentStatus.Phase).Should(BeIdenticalTo(expectedPhase)) - Expect(err).Should(BeNil()) - }, - Entry("healthy status but machine terminating", v1alpha1.MachineTerminating, []corev1.NodeCondition{ - { - Type: corev1.NodeReady, - Status: corev1.ConditionTrue, - }, - }, v1alpha1.MachineTerminating), - Entry("unhealthy status but machine running", v1alpha1.MachineRunning, []corev1.NodeCondition{ - { - Type: corev1.NodeReady, - Status: corev1.ConditionFalse, - }, - }, v1alpha1.MachineUnknown), - Entry("healthy status but machine not running", v1alpha1.MachineAvailable, []corev1.NodeCondition{ - { - Type: corev1.NodeReady, - Status: corev1.ConditionTrue, - }, - }, v1alpha1.MachineRunning), - ) - }) - */ - Describe("#ValidateMachine", func() { type data struct { action machineapi.Machine @@ -4410,7 +4334,6 @@ var _ = Describe("machine", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - retry, err := c.manageMachinePreservation(context.TODO(), machine) Expect(retry).To(Equal(tc.expect.retry)) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index da6c557c6..ceea0339a 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2371,20 +2371,19 @@ Utility Functions for Machine Preservation func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { nodeName := machine.Labels[v1alpha1.NodeLabelKey] isExpirySet := machineutils.IsPreserveExpiryTimeSet(machine) - - // If machine has no backing node - if nodeName == "" { - if isExpirySet { - return nil - } + updatedMachine := machine.DeepCopy() + if !isExpirySet { + klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) // Step 1: Add preserveExpiryTime to machine status - updatedMachine, err := c.setPreserveExpiryTimeOnMachine(ctx, machine) + updatedMachine, err := c.setPreserveExpiryTimeOnMachine(ctx, updatedMachine) if err != nil { return err } - klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) - return nil - + if nodeName == "" { + // if machine has no backing node, preservation is complete + klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + return nil + } } // Machine has a backing node node, err := c.nodeLister.Get(nodeName) @@ -2394,18 +2393,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) // check if preservation is already complete - if isExpirySet && c.isPreservedNodeConditionStatusTrue(existingNodePreservedCondition) { + if c.isPreservedNodeConditionStatusTrue(existingNodePreservedCondition) { return nil } // Preservation incomplete - either the flow is just starting or in progress - updatedMachine := machine - if !isExpirySet { - // Step 1: Add preserveExpiryTime to machine status - updatedMachine, err = c.setPreserveExpiryTimeOnMachine(ctx, machine) - if err != nil { - return err - } - } + // Step 2: Add annotations to prevent scale down of node by CA _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { @@ -2413,7 +2405,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } drainSuccessful := false - if c.shouldNodeBeDrained(machine, existingNodePreservedCondition) { + if c.shouldNodeBeDrained(updatedMachine, existingNodePreservedCondition) { // Step 3: If machine is in Failed Phase, drain the backing node err = c.drainPreservedNode(ctx, machine) if err != nil { @@ -2442,7 +2434,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // setPreserveExpiryTimeOnMachine sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - clone := machine.DeepCopy() + preservedCurrentStatus := v1alpha1.CurrentStatus{ Phase: machine.Status.CurrentStatus.Phase, TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, @@ -2450,8 +2442,8 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), } - clone.Status.CurrentStatus = preservedCurrentStatus - updatedMachine, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + machine.Status.CurrentStatus = preservedCurrentStatus + updatedMachine, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, machine, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return nil, err @@ -2462,7 +2454,6 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine // addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { - // Check if annotation already exists with correct value if node.Annotations != nil && node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 53e92a6ba..0095b9bf2 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -9,6 +9,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" "k8s.io/klog/v2" "time" @@ -3957,4 +3958,517 @@ var _ = Describe("machine_util", func() { }), ) }) + Describe("#preserveMachine", func() { + type setup struct { + machine *machinev1.Machine + nodeName string + preserveValue string + isCAAnnotationPresent bool + preservedNodeCondition corev1.NodeCondition + } + type expect struct { + preserveNodeCondition corev1.NodeCondition + isPreserveExpiryTimeSet bool + isCAAnnotationPresent bool + err error + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("##preserveMachine behaviour scenarios", + func(tc *testCase) { + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + var targetCoreObjects []runtime.Object + + controlMachineObjects = append(controlMachineObjects, tc.setup.machine) + if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { + node := corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.setup.nodeName, + Labels: map[string]string{}, + Annotations: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + } + if tc.setup.isCAAnnotationPresent { + node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = "true" + } + targetCoreObjects = append(targetCoreObjects, &node) + } + + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + waitForCacheSync(stop, c) + err := c.preserveMachine(context.TODO(), tc.setup.machine, tc.setup.preserveValue) + if tc.expect.err == nil { + Expect(err).To(BeNil()) + } else { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + } + updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + Expect(getErr).To(BeNil()) + if tc.expect.isPreserveExpiryTimeSet { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) + } else { + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) + } + if tc.setup.nodeName == "" || tc.setup.nodeName == "invalid" { + return + } + updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) + Expect(getErr).To(BeNil()) + if tc.expect.isCAAnnotationPresent { + Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal(autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue)) + } + if tc.expect.preserveNodeCondition.Type != "" { + updatedNodeCondition := nodeops.GetCondition(updatedNode, tc.expect.preserveNodeCondition.Type) + Expect(updatedNodeCondition.Status).To(Equal(tc.expect.preserveNodeCondition.Status)) + Expect(updatedNodeCondition.Reason).To(Equal(tc.expect.preserveNodeCondition.Reason)) + Expect(updatedNodeCondition.Message).To(Equal(tc.expect.preserveNodeCondition.Message)) + } + + }, + Entry("when preserve=now and there is no backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineUnknown, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + }, + }), + Entry("when preserve=now, the machine is Running, and there is a backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineRunning, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + }, + }, + }), + Entry("when preserve=now, the machine has Failed, and there is a backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=now, the machine has Failed, and the preservation is incomplete after step 1 - adding preserveExpiryTime", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=now, the machine has Failed, and the preservation is incomplete at step 2 - adding CA annotations", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + isCAAnnotationPresent: true, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=now, the machine has Failed, and the preservation is incomplete because of drain failure", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + isCAAnnotationPresent: true, + preservedNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=when-failed, the machine has Failed, and there is a backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=auto-preserved, the machine has Failed, and there is a backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "node-1", + preserveValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + }, + expect: expect{ + err: nil, + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: true, + preserveNodeCondition: corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByMCM, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + }), + Entry("when preserve=now, the machine has Failed, and there is an error fetching backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "invalid", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + }, + }, + }, + nodeName: "invalid", + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + err: fmt.Errorf("node \"invalid\" not found"), + isPreserveExpiryTimeSet: true, + isCAAnnotationPresent: false, + }, + }, + ), + ) + }) + Describe("#stopMachinePreservation", func() { + type setup struct { + machine *machinev1.Machine + node *corev1.Node + } + + type expect struct { + err error + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("##preserveMachine behaviour scenarios", + func(tc *testCase) { + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + var targetCoreObjects []runtime.Object + + controlMachineObjects = append(controlMachineObjects, tc.setup.machine) + if tc.setup.machine.Labels[machinev1.NodeLabelKey] != "" && tc.setup.machine.Labels[machinev1.NodeLabelKey] != "invalid" { + targetCoreObjects = append(targetCoreObjects, tc.setup.node) + } + + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + waitForCacheSync(stop, c) + err := c.stopMachinePreservation(context.TODO(), tc.setup.machine) + if tc.expect.err != nil { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + return + } + Expect(err).To(BeNil()) + updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + Expect(getErr).To(BeNil()) + Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) + + if tc.setup.machine.Labels[machinev1.NodeLabelKey] == "" || tc.setup.machine.Labels[machinev1.NodeLabelKey] == "invalid" { + return + } + updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.node.Name, metav1.GetOptions{}) + Expect(getErr).To(BeNil()) + Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("")) + updatedNodeCondition := nodeops.GetCondition(updatedNode, machinev1.NodePreserved) + Expect(updatedNodeCondition).ToNot(BeNil()) + Expect(updatedNodeCondition.Status).To(Equal(corev1.ConditionFalse)) + Expect(updatedNodeCondition.Reason).To(Equal(machinev1.NodePreservationStopped)) + }, + Entry("when stopping preservation on a preserved machine with backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "node-1", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + node: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + }, + }, + }, + }, + }, + expect: expect{ + err: nil, + }, + }), + Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + }, + expect: expect{ + err: nil, + }, + }), + Entry("when stopping preservation on a preserved machine, and the backing node is not found", &testCase{ + setup: setup{ + machine: &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: "invalid", + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + }, + }, + expect: expect{ + err: fmt.Errorf("node \"invalid\" not found"), + }, + }), + ) + }) }) From 58ac6cd183dc2161dc02f280b77a1389ac4ff59e Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 31 Dec 2025 14:48:52 +0530 Subject: [PATCH 37/79] Refactor test code to reduce redundant code --- .../machinecontroller/machine_util_test.go | 315 +++++------------- 1 file changed, 85 insertions(+), 230 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 0095b9bf2..87581bda6 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -3960,7 +3960,8 @@ var _ = Describe("machine_util", func() { }) Describe("#preserveMachine", func() { type setup struct { - machine *machinev1.Machine + machinePhase machinev1.MachinePhase + preserveExpiryTime metav1.Time nodeName string preserveValue string isCAAnnotationPresent bool @@ -3984,8 +3985,25 @@ var _ = Describe("machine_util", func() { var controlMachineObjects []runtime.Object var targetCoreObjects []runtime.Object - controlMachineObjects = append(controlMachineObjects, tc.setup.machine) - if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { + machine := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{ + machinev1.NodeLabelKey: tc.setup.nodeName, + }, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: tc.setup.machinePhase, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: tc.setup.preserveExpiryTime, + }, + }, + } + if tc.setup.nodeName != "" && tc.setup.nodeName != "err-backing-node" { + node := corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: tc.setup.nodeName, @@ -4001,25 +4019,26 @@ var _ = Describe("machine_util", func() { } targetCoreObjects = append(targetCoreObjects, &node) } + controlMachineObjects = append(controlMachineObjects, machine) c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.preserveMachine(context.TODO(), tc.setup.machine, tc.setup.preserveValue) + err := c.preserveMachine(context.TODO(), machine, tc.setup.preserveValue) if tc.expect.err == nil { Expect(err).To(BeNil()) } else { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) } - updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) Expect(getErr).To(BeNil()) if tc.expect.isPreserveExpiryTimeSet { Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) } else { Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) } - if tc.setup.nodeName == "" || tc.setup.nodeName == "invalid" { + if tc.setup.nodeName == "" || tc.setup.nodeName == "err-backing-node" { return } updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) @@ -4037,19 +4056,7 @@ var _ = Describe("machine_util", func() { }, Entry("when preserve=now and there is no backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineUnknown, - LastUpdateTime: metav1.Now(), - }, - }, - }, + machinePhase: machinev1.MachineUnknown, nodeName: "", preserveValue: machineutils.PreserveMachineAnnotationValueNow, }, @@ -4060,22 +4067,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine is Running, and there is a backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - }, + machinePhase: machinev1.MachineRunning, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueNow, }, @@ -4092,22 +4084,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine has Failed, and there is a backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueNow, }, @@ -4125,23 +4102,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine has Failed, and the preservation is incomplete after step 1 - adding preserveExpiryTime", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueNow, }, @@ -4159,23 +4120,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine has Failed, and the preservation is incomplete at step 2 - adding CA annotations", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueNow, isCAAnnotationPresent: true, @@ -4194,23 +4139,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine has Failed, and the preservation is incomplete because of drain failure", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueNow, isCAAnnotationPresent: true, @@ -4235,22 +4164,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=when-failed, the machine has Failed, and there is a backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, }, @@ -4268,22 +4182,7 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=auto-preserved, the machine has Failed, and there is a backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, + machinePhase: machinev1.MachineFailed, nodeName: "node-1", preserveValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, }, @@ -4301,27 +4200,12 @@ var _ = Describe("machine_util", func() { }), Entry("when preserve=now, the machine has Failed, and there is an error fetching backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "invalid", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - }, - }, - }, - nodeName: "invalid", + machinePhase: machinev1.MachineFailed, + nodeName: "err-backing-node", preserveValue: machineutils.PreserveMachineAnnotationValueNow, }, expect: expect{ - err: fmt.Errorf("node \"invalid\" not found"), + err: fmt.Errorf("node \"err-backing-node\" not found"), isPreserveExpiryTimeSet: true, isCAAnnotationPresent: false, }, @@ -4331,8 +4215,7 @@ var _ = Describe("machine_util", func() { }) Describe("#stopMachinePreservation", func() { type setup struct { - machine *machinev1.Machine - node *corev1.Node + nodeName string } type expect struct { @@ -4350,29 +4233,66 @@ var _ = Describe("machine_util", func() { var controlMachineObjects []runtime.Object var targetCoreObjects []runtime.Object - controlMachineObjects = append(controlMachineObjects, tc.setup.machine) - if tc.setup.machine.Labels[machinev1.NodeLabelKey] != "" && tc.setup.machine.Labels[machinev1.NodeLabelKey] != "invalid" { - targetCoreObjects = append(targetCoreObjects, tc.setup.node) + machine := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + Labels: map[string]string{}, + }, + Spec: machinev1.MachineSpec{}, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + }, + }, + } + if tc.setup.nodeName != "" && tc.setup.nodeName != "err-backing-node" { + machine.Labels[machinev1.NodeLabelKey] = tc.setup.nodeName + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.NodePreservedByUser, + }, + }, + }, + } + targetCoreObjects = append(targetCoreObjects, node) + + } else { + machine.Labels[machinev1.NodeLabelKey] = "" } + controlMachineObjects = append(controlMachineObjects, machine) + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.stopMachinePreservation(context.TODO(), tc.setup.machine) + err := c.stopMachinePreservation(context.TODO(), machine) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) return } Expect(err).To(BeNil()) - updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), tc.setup.machine.Name, metav1.GetOptions{}) + updatedMachine, getErr := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) Expect(getErr).To(BeNil()) Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) - if tc.setup.machine.Labels[machinev1.NodeLabelKey] == "" || tc.setup.machine.Labels[machinev1.NodeLabelKey] == "invalid" { + if machine.Labels[machinev1.NodeLabelKey] == "" || machine.Labels[machinev1.NodeLabelKey] == "err-backing-node" { return } - updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.node.Name, metav1.GetOptions{}) + updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) Expect(getErr).To(BeNil()) Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("")) updatedNodeCondition := nodeops.GetCondition(updatedNode, machinev1.NodePreserved) @@ -4382,40 +4302,7 @@ var _ = Describe("machine_util", func() { }, Entry("when stopping preservation on a preserved machine with backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "node-1", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, - node: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{ - autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, - }, - }, - Status: corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - { - Type: machinev1.NodePreserved, - Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, - }, - }, - }, - }, + nodeName: "node-1", }, expect: expect{ err: nil, @@ -4423,23 +4310,7 @@ var _ = Describe("machine_util", func() { }), Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, + nodeName: "", }, expect: expect{ err: nil, @@ -4447,26 +4318,10 @@ var _ = Describe("machine_util", func() { }), Entry("when stopping preservation on a preserved machine, and the backing node is not found", &testCase{ setup: setup{ - machine: &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - Labels: map[string]string{ - machinev1.NodeLabelKey: "invalid", - }, - }, - Spec: machinev1.MachineSpec{}, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), - }, - }, - }, + nodeName: "no-backing-node", }, expect: expect{ - err: fmt.Errorf("node \"invalid\" not found"), + err: fmt.Errorf("node \"no-backing-node\" not found"), }, }), ) From 68b9ed1e5831d273a91b9e089a1308afb637a274 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 31 Dec 2025 15:44:34 +0530 Subject: [PATCH 38/79] Fix bugs after merging --- pkg/util/provider/machinecontroller/machine.go | 3 --- pkg/util/provider/machinecontroller/node.go | 8 ++++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 37f44d07f..e47684cc7 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -7,7 +7,6 @@ package controller import ( "context" - "errors" "fmt" "github.com/gardener/machine-controller-manager/pkg/util/nodeops" clientretry "k8s.io/client-go/util/retry" @@ -19,8 +18,6 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/selection" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index 7afa8991f..ae28505d7 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -100,6 +100,14 @@ func (c *controller) updateNode(oldObj, newObj any) { if nodeConditionsHaveChanged && !(isMachineCrashLooping || isMachineTerminating) { c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Conditions of node %q differ from machine status", node.Name)) } + + // to reconcile on change in annotations related to preservation + if c.handlePreserveAnnotationsChange(oldNode.Annotations, node.Annotations, machine) { + klog.V(3).Infof("Node %q for machine %q is annotated for preservation with value %q.", node.Name, machine.Name, node.Annotations[machineutils.PreserveMachineAnnotationKey]) + c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Preserve annotations added or updated for node %q", getNodeName(machine))) + return + } + } func (c *controller) deleteNode(obj any) { From fd1c51eb77b51960f935ec8eceb72f97c0fb609f Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 6 Jan 2026 11:18:01 +0530 Subject: [PATCH 39/79] Remove testing code --- .../provider/machinecontroller/machine.go | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index e47684cc7..93fc4e548 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -60,11 +60,6 @@ func (c *controller) updateMachine(oldObj, newObj any) { klog.Errorf("couldn't convert to machine resource from object") return } - { // TODO@thiyyakat: remove after testing - if newMachine.Labels["test-failed"] != oldMachine.Labels["test-failed"] { - c.enqueueMachine(newObj, "TEST: handling machine failure simulation UPDATE event") - } - } if c.handlePreserveAnnotationsChange(oldMachine.Annotations, newMachine.Annotations, newMachine) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return @@ -221,40 +216,6 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp return retry, err } - { //TODO@thiyyakat: remove after testing - if machine.Labels["test-failed"] == "true" { - node, err := c.nodeLister.Get(getNodeName(machine)) - if err != nil { - klog.V(3).Infof("TEST:Machine %q: Failed to get node %q: %v", machine.Name, machine.Name, err) - return machineutils.ShortRetry, err - } - if cond := nodeops.GetCondition(node, corev1.NodeNetworkUnavailable); cond.Status != corev1.ConditionTrue { - newCond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionTrue} - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newCond) - if err != nil { - klog.V(2).Infof("TEST:Machine %q: Failed to change node condition %q: %v", machine.Name, machine.Name, err) - return machineutils.ShortRetry, err - } - klog.V(2).Infof("TEST: marked nodenetwork as unavailable for machine %s", machine.Name) - } - } else if machine.Labels["test-failed"] == "false" { - node, err := c.nodeLister.Get(getNodeName(machine)) - if err != nil { - klog.V(3).Infof("TEST:Machine %q: Failed to get node %q: %v", machine.Name, machine.Name, err) - return machineutils.ShortRetry, err - } - if cond := nodeops.GetCondition(node, corev1.NodeNetworkUnavailable); cond.Status != corev1.ConditionFalse { - newCond := corev1.NodeCondition{Type: corev1.NodeNetworkUnavailable, Status: corev1.ConditionFalse} - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), newCond) - if err != nil { - klog.V(2).Infof("TEST:Machine %q: Failed to change node condition %q: %v", machine.Name, machine.Name, err) - return machineutils.ShortRetry, err - } - klog.V(2).Infof("TEST: marked nodenetwork as available %s", machine.Name) - } - } - } - retry, err = c.manageMachinePreservation(ctx, machine) if err != nil { return retry, err From f58d703cf762a47542f7fbbc71546fb56dde6926 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 8 Jan 2026 11:17:10 +0530 Subject: [PATCH 40/79] Address review comments - part 5: Change api fields to pointers --- Makefile | 6 +- docs/documents/apis.md | 12 +- .../machine.sapcloud.io_machineclasses.yaml | 127 -- ...achine.sapcloud.io_machinedeployments.yaml | 562 -------- .../crds/machine.sapcloud.io_machines.yaml | 333 ----- .../crds/machine.sapcloud.io_machinesets.yaml | 447 ------- pkg/apis/machine/types.go | 8 +- pkg/apis/machine/v1alpha1/machine_types.go | 2 +- .../v1alpha1/machinedeployment_types.go | 2 +- pkg/apis/machine/v1alpha1/machineset_types.go | 4 +- .../v1alpha1/zz_generated.conversion.go | 1179 ----------------- .../machine/v1alpha1/zz_generated.deepcopy.go | 795 ----------- .../machine/v1alpha1/zz_generated.defaults.go | 21 - pkg/apis/machine/zz_generated.deepcopy.go | 888 ------------- pkg/controller/deployment_machineset_util.go | 3 +- pkg/controller/machineset.go | 2 +- pkg/openapi/api_violations.report | 1 - pkg/openapi/openapi_generated.go | 4 +- .../machinecontroller/machine_test.go | 10 +- .../machinecontroller/machine_util.go | 6 +- .../machinecontroller/machine_util_test.go | 5 +- 21 files changed, 32 insertions(+), 4385 deletions(-) delete mode 100644 kubernetes/crds/machine.sapcloud.io_machineclasses.yaml delete mode 100644 kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml delete mode 100644 kubernetes/crds/machine.sapcloud.io_machines.yaml delete mode 100644 kubernetes/crds/machine.sapcloud.io_machinesets.yaml delete mode 100644 pkg/apis/machine/v1alpha1/zz_generated.conversion.go delete mode 100644 pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go delete mode 100644 pkg/apis/machine/v1alpha1/zz_generated.defaults.go delete mode 100644 pkg/apis/machine/zz_generated.deepcopy.go diff --git a/Makefile b/Makefile index aba0236ac..6b0f4f913 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @./hack/generate-code - @./hack/api-reference/generate-spec-doc.sh + GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @GOFLAGS="-buildvcs=false" ./hack/generate-code + @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index a4e2f7628..0918a46fb 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -519,7 +519,7 @@ by default, which is treated as infinite deadline.

-int32 +*int32 @@ -699,7 +699,7 @@ int32 -int32 +*int32 @@ -1114,7 +1114,7 @@ Kubernetes meta/v1.Duration -preserveTimeout +machinePreserveTimeout @@ -1463,7 +1463,7 @@ by default, which is treated as infinite deadline.

-int32 +*int32 @@ -1940,7 +1940,7 @@ int32 -int32 +*int32 @@ -2091,7 +2091,7 @@ LastOperation -int32 +*int32 diff --git a/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml b/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml deleted file mode 100644 index f0cd9d515..000000000 --- a/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml +++ /dev/null @@ -1,127 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.1 - name: machineclasses.machine.sapcloud.io -spec: - group: machine.sapcloud.io - names: - kind: MachineClass - listKind: MachineClassList - plural: machineclasses - shortNames: - - mcc - singular: machineclass - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: |- - MachineClass can be used to templatize and re-use provider configuration - across multiple Machines / MachineSets / MachineDeployments. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - credentialsSecretRef: - description: |- - CredentialsSecretRef can optionally store the credentials (in this case the SecretRef does not need to store them). - This might be useful if multiple machine classes with the same credentials but different user-datas are used. - properties: - name: - description: name is unique within a namespace to reference a secret - resource. - type: string - namespace: - description: namespace defines the space within which the secret name - must be unique. - type: string - type: object - x-kubernetes-map-type: atomic - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - nodeTemplate: - description: NodeTemplate contains subfields to track all node resources - and other node info required to scale nodegroup from zero - properties: - architecture: - description: CPU Architecture of the node belonging to nodeGroup - type: string - capacity: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Capacity contains subfields to track all node resources - required to scale nodegroup from zero - type: object - instanceType: - description: Instance type of the node belonging to nodeGroup - type: string - region: - description: Region of the expected node belonging to nodeGroup - type: string - virtualCapacity: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: VirtualCapacity represents the expected Node 'virtual' - capacity ie comprising virtual extended resources. - type: object - zone: - description: Zone of the expected node belonging to nodeGroup - type: string - required: - - capacity - - instanceType - - region - - zone - type: object - x-kubernetes-preserve-unknown-fields: true - provider: - description: Provider is the combination of name and location of cloud-specific - drivers. - type: string - providerSpec: - description: Provider-specific configuration to use during node creation. - type: object - x-kubernetes-preserve-unknown-fields: true - secretRef: - description: SecretRef stores the necessary secrets such as credentials - or userdata. - properties: - name: - description: name is unique within a namespace to reference a secret - resource. - type: string - namespace: - description: namespace defines the space within which the secret name - must be unique. - type: string - type: object - x-kubernetes-map-type: atomic - required: - - providerSpec - type: object - served: true - storage: true diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml deleted file mode 100644 index abb36d1c4..000000000 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ /dev/null @@ -1,562 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.1 - name: machinedeployments.machine.sapcloud.io -spec: - group: machine.sapcloud.io - names: - kind: MachineDeployment - listKind: MachineDeploymentList - plural: machinedeployments - shortNames: - - mcd - singular: machinedeployment - scope: Namespaced - versions: - - additionalPrinterColumns: - - description: Total number of ready machines targeted by this machine deployment. - jsonPath: .status.readyReplicas - name: Ready - type: integer - - description: Number of desired machines. - jsonPath: .spec.replicas - name: Desired - type: integer - - description: Total number of non-terminated machines targeted by this machine - deployment that have the desired template spec. - jsonPath: .status.updatedReplicas - name: Up-to-date - type: integer - - description: Total number of available machines (ready for at least minReadySeconds) - targeted by this machine deployment. - jsonPath: .status.availableReplicas - name: Available - type: integer - - description: |- - CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. - Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1alpha1 - schema: - openAPIV3Schema: - description: MachineDeployment enables declarative updates for machines and - MachineSets. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of the desired behavior of the MachineDeployment. - properties: - autoPreserveFailedMachineMax: - description: |- - The maximum number of machines in the machine deployment that will be auto-preserved. - In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments - format: int32 - type: integer - minReadySeconds: - description: |- - Minimum number of seconds for which a newly created machine should be ready - without any of its container crashing, for it to be considered available. - Defaults to 0 (machine will be considered available as soon as it is ready) - format: int32 - type: integer - paused: - description: |- - Indicates that the MachineDeployment is paused and will not be processed by the - MachineDeployment controller. - type: boolean - progressDeadlineSeconds: - description: |- - The maximum time in seconds for a MachineDeployment to make progress before it - is considered to be failed. The MachineDeployment controller will continue to - process failed MachineDeployments and a condition with a ProgressDeadlineExceeded - reason will be surfaced in the MachineDeployment status. Note that progress will - not be estimated during the time a MachineDeployment is paused. This is not set - by default, which is treated as infinite deadline. - format: int32 - type: integer - replicas: - description: |- - Number of desired machines. This is a pointer to distinguish between explicit - zero and not specified. Defaults to 0. - format: int32 - type: integer - revisionHistoryLimit: - description: |- - The number of old MachineSets to retain to allow rollback. - This is a pointer to distinguish between explicit zero and not specified. - format: int32 - type: integer - rollbackTo: - description: |- - DEPRECATED. - The config this MachineDeployment is rolling back to. Will be cleared after rollback is done. - properties: - revision: - description: The revision to rollback to. If set to 0, rollback - to the last revision. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector for machines. Existing MachineSets whose machines are - selected by this will be the ones affected by this MachineDeployment. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - strategy: - description: The MachineDeployment strategy to use to replace existing - machines with new ones. - properties: - inPlaceUpdate: - description: |- - InPlaceUpdate update config params. Present only if MachineDeploymentStrategyType = - InPlaceUpdate. - properties: - maxSurge: - anyOf: - - type: integer - - type: string - description: |- - The maximum number of machines that can be scheduled above the desired number of - machines. - Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). - This can not be 0 if MaxUnavailable is 0. - Absolute number is calculated from percentage by rounding up. - Example: when this is set to 30%, the new machine set can be scaled up immediately when - the update starts, such that the total number of old and new machines does not exceed - 130% of desired machines. Once old machines have been killed, - new machine set can be scaled up further, ensuring that total number of machines running - at any time during the update is utmost 130% of desired machines. - x-kubernetes-int-or-string: true - maxUnavailable: - anyOf: - - type: integer - - type: string - description: |- - The maximum number of machines that can be unavailable during the update. - Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). - Absolute number is calculated from percentage by rounding down. - This can not be 0 if MaxSurge is 0. - Example: when this is set to 30%, the old machine set can be scaled down to 70% of desired machines - immediately when the update starts. Once new machines are ready, old machine set - can be scaled down further, followed by scaling up the new machine set, ensuring - that the total number of machines available at all times during the update is at - least 70% of desired machines. - x-kubernetes-int-or-string: true - orchestrationType: - description: OrchestrationType specifies the orchestration - type for the inplace update. - type: string - type: object - rollingUpdate: - description: |- - Rolling update config params. Present only if MachineDeploymentStrategyType = - RollingUpdate. - properties: - maxSurge: - anyOf: - - type: integer - - type: string - description: |- - The maximum number of machines that can be scheduled above the desired number of - machines. - Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). - This can not be 0 if MaxUnavailable is 0. - Absolute number is calculated from percentage by rounding up. - Example: when this is set to 30%, the new machine set can be scaled up immediately when - the update starts, such that the total number of old and new machines does not exceed - 130% of desired machines. Once old machines have been killed, - new machine set can be scaled up further, ensuring that total number of machines running - at any time during the update is utmost 130% of desired machines. - x-kubernetes-int-or-string: true - maxUnavailable: - anyOf: - - type: integer - - type: string - description: |- - The maximum number of machines that can be unavailable during the update. - Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). - Absolute number is calculated from percentage by rounding down. - This can not be 0 if MaxSurge is 0. - Example: when this is set to 30%, the old machine set can be scaled down to 70% of desired machines - immediately when the update starts. Once new machines are ready, old machine set - can be scaled down further, followed by scaling up the new machine set, ensuring - that the total number of machines available at all times during the update is at - least 70% of desired machines. - x-kubernetes-int-or-string: true - type: object - type: - description: Type of MachineDeployment. Can be "Recreate" or "RollingUpdate". - Default is RollingUpdate. - type: string - type: object - template: - description: Template describes the machines that will be created. - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - type: object - x-kubernetes-preserve-unknown-fields: true - spec: - description: |- - Specification of the desired behavior of the machine. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - class: - description: Class contains the machineclass attributes of - a machine - properties: - apiGroup: - description: API group to which it belongs - type: string - kind: - description: Kind for machine class - type: string - name: - description: Name of machine class - type: string - type: object - creationTimeout: - description: MachineCreationTimeout is the timeout after which - machinie creation is declared failed. - type: string - disableHealthTimeout: - description: |- - DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. - This is intended to be used only for in-place updates. - type: boolean - drainTimeout: - description: MachineDraintimeout is the timeout after which - machine is forcefully deleted. - type: string - healthTimeout: - description: MachineHealthTimeout is the timeout after which - machine is declared unhealhty/failed. - type: string - inPlaceUpdateTimeout: - description: MachineInPlaceUpdateTimeout is the timeout after - which in-place update is declared failed. - type: string - machinePreserveTimeout: - description: MachinePreserveTimeout is the timeout after which - the machine preservation is stopped - type: string - maxEvictRetries: - description: MaxEvictRetries is the number of retries that - will be attempted while draining the node. - format: int32 - type: integer - nodeConditions: - description: NodeConditions are the set of conditions if set - to true for MachineHealthTimeOut, machine will be declared - failed. - type: string - nodeTemplate: - description: NodeTemplateSpec describes the data a node should - have when created from a template - properties: - metadata: - type: object - x-kubernetes-preserve-unknown-fields: true - spec: - description: NodeSpec describes the attributes that a - node is created with. - properties: - configSource: - description: 'Deprecated: Previously used to specify - the source of the node''s configuration for the - DynamicKubeletConfig feature. This feature is removed.' - properties: - configMap: - description: ConfigMap is a reference to a Node's - ConfigMap - properties: - kubeletConfigKey: - description: |- - KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure - This field is required in all cases. - type: string - name: - description: |- - Name is the metadata.name of the referenced ConfigMap. - This field is required in all cases. - type: string - namespace: - description: |- - Namespace is the metadata.namespace of the referenced ConfigMap. - This field is required in all cases. - type: string - resourceVersion: - description: |- - ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - uid: - description: |- - UID is the metadata.UID of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - required: - - kubeletConfigKey - - name - - namespace - type: object - type: object - externalID: - description: |- - Deprecated. Not all kubelets will set this field. Remove field after 1.13. - see: https://issues.k8s.io/61966 - type: string - podCIDR: - description: PodCIDR represents the pod IP range assigned - to the node. - type: string - podCIDRs: - description: |- - podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this - field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for - each of IPv4 and IPv6. - items: - type: string - type: array - x-kubernetes-list-type: set - providerID: - description: 'ID of the node assigned by the cloud - provider in the format: ://' - type: string - taints: - description: If specified, the node's taints. - items: - description: |- - The node this Taint is attached to has the "effect" on - any pod that does not tolerate the Taint. - properties: - effect: - description: |- - Required. The effect of the taint on pods - that do not tolerate the taint. - Valid effects are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: Required. The taint key to be applied - to a node. - type: string - timeAdded: - description: |- - TimeAdded represents the time at which the taint was added. - It is only written for NoExecute taints. - format: date-time - type: string - value: - description: The taint value corresponding to - the taint key. - type: string - required: - - effect - - key - type: object - type: array - x-kubernetes-list-type: atomic - unschedulable: - description: |- - Unschedulable controls node schedulability of new pods. By default, node is schedulable. - More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration - type: boolean - type: object - type: object - providerID: - description: ProviderID represents the provider's unique ID - given to a machine - type: string - type: object - type: object - required: - - template - type: object - status: - description: Most recently observed status of the MachineDeployment. - properties: - availableReplicas: - description: Total number of available machines (ready for at least - minReadySeconds) targeted by this MachineDeployment. - format: int32 - type: integer - collisionCount: - description: |- - Count of hash collisions for the MachineDeployment. The MachineDeployment controller uses this - field as a collision avoidance mechanism when it needs to create the name for the - newest MachineSet. - format: int32 - type: integer - conditions: - description: Represents the latest available observations of a MachineDeployment's - current state. - items: - description: MachineDeploymentCondition describes the state of a - MachineDeployment at a certain point. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status - to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of MachineDeployment condition. - type: string - required: - - status - - type - type: object - type: array - failedMachines: - description: FailedMachines has summary of machines on which lastOperation - Failed - items: - description: MachineSummary store the summary of machine. - properties: - lastOperation: - description: Last operation refers to the status of the last - operation performed - properties: - description: - description: Description of the current operation - type: string - errorCode: - description: ErrorCode of the current operation if any - type: string - lastUpdateTime: - description: Last update time of current operation - format: date-time - type: string - state: - description: State of operation - type: string - type: - description: Type of operation - type: string - type: object - name: - description: Name of the machine object - type: string - ownerRef: - description: OwnerRef - type: string - providerID: - description: ProviderID represents the provider's unique ID - given to a machine - type: string - type: object - type: array - observedGeneration: - description: The generation observed by the MachineDeployment controller. - format: int64 - type: integer - readyReplicas: - description: Total number of ready machines targeted by this MachineDeployment. - format: int32 - type: integer - replicas: - description: Total number of non-terminated machines targeted by this - MachineDeployment (their labels match the selector). - format: int32 - type: integer - unavailableReplicas: - description: |- - Total number of unavailable machines targeted by this MachineDeployment. This is the total number of - machines that are still required for the MachineDeployment to have 100% available capacity. They may - either be machines that are running but not yet available or machines that still have not been created. - format: int32 - type: integer - updatedReplicas: - description: Total number of non-terminated machines targeted by this - MachineDeployment that have the desired template spec. - format: int32 - type: integer - type: object - type: object - served: true - storage: true - subresources: - scale: - specReplicasPath: .spec.replicas - statusReplicasPath: .status.replicas - status: {} diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml deleted file mode 100644 index fcea16750..000000000 --- a/kubernetes/crds/machine.sapcloud.io_machines.yaml +++ /dev/null @@ -1,333 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.1 - name: machines.machine.sapcloud.io -spec: - group: machine.sapcloud.io - names: - kind: Machine - listKind: MachineList - plural: machines - shortNames: - - mc - singular: machine - scope: Namespaced - versions: - - additionalPrinterColumns: - - description: Current status of the machine. - jsonPath: .status.currentStatus.phase - name: Status - type: string - - description: |- - CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. - Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - jsonPath: .metadata.creationTimestamp - name: Age - type: date - - description: Node backing the machine object - jsonPath: .metadata.labels.node - name: Node - type: string - - description: ProviderID of the infra instance backing the machine object - jsonPath: .spec.providerID - name: ProviderID - priority: 1 - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - description: Machine is the representation of a physical or virtual machine. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Spec contains the specification of the machine - properties: - class: - description: Class contains the machineclass attributes of a machine - properties: - apiGroup: - description: API group to which it belongs - type: string - kind: - description: Kind for machine class - type: string - name: - description: Name of machine class - type: string - type: object - creationTimeout: - description: MachineCreationTimeout is the timeout after which machinie - creation is declared failed. - type: string - disableHealthTimeout: - description: |- - DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. - This is intended to be used only for in-place updates. - type: boolean - drainTimeout: - description: MachineDraintimeout is the timeout after which machine - is forcefully deleted. - type: string - healthTimeout: - description: MachineHealthTimeout is the timeout after which machine - is declared unhealhty/failed. - type: string - inPlaceUpdateTimeout: - description: MachineInPlaceUpdateTimeout is the timeout after which - in-place update is declared failed. - type: string - machinePreserveTimeout: - description: MachinePreserveTimeout is the timeout after which the - machine preservation is stopped - type: string - maxEvictRetries: - description: MaxEvictRetries is the number of retries that will be - attempted while draining the node. - format: int32 - type: integer - nodeConditions: - description: NodeConditions are the set of conditions if set to true - for MachineHealthTimeOut, machine will be declared failed. - type: string - nodeTemplate: - description: NodeTemplateSpec describes the data a node should have - when created from a template - properties: - metadata: - type: object - x-kubernetes-preserve-unknown-fields: true - spec: - description: NodeSpec describes the attributes that a node is - created with. - properties: - configSource: - description: 'Deprecated: Previously used to specify the source - of the node''s configuration for the DynamicKubeletConfig - feature. This feature is removed.' - properties: - configMap: - description: ConfigMap is a reference to a Node's ConfigMap - properties: - kubeletConfigKey: - description: |- - KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure - This field is required in all cases. - type: string - name: - description: |- - Name is the metadata.name of the referenced ConfigMap. - This field is required in all cases. - type: string - namespace: - description: |- - Namespace is the metadata.namespace of the referenced ConfigMap. - This field is required in all cases. - type: string - resourceVersion: - description: |- - ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - uid: - description: |- - UID is the metadata.UID of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - required: - - kubeletConfigKey - - name - - namespace - type: object - type: object - externalID: - description: |- - Deprecated. Not all kubelets will set this field. Remove field after 1.13. - see: https://issues.k8s.io/61966 - type: string - podCIDR: - description: PodCIDR represents the pod IP range assigned - to the node. - type: string - podCIDRs: - description: |- - podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this - field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for - each of IPv4 and IPv6. - items: - type: string - type: array - x-kubernetes-list-type: set - providerID: - description: 'ID of the node assigned by the cloud provider - in the format: ://' - type: string - taints: - description: If specified, the node's taints. - items: - description: |- - The node this Taint is attached to has the "effect" on - any pod that does not tolerate the Taint. - properties: - effect: - description: |- - Required. The effect of the taint on pods - that do not tolerate the taint. - Valid effects are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: Required. The taint key to be applied to - a node. - type: string - timeAdded: - description: |- - TimeAdded represents the time at which the taint was added. - It is only written for NoExecute taints. - format: date-time - type: string - value: - description: The taint value corresponding to the taint - key. - type: string - required: - - effect - - key - type: object - type: array - x-kubernetes-list-type: atomic - unschedulable: - description: |- - Unschedulable controls node schedulability of new pods. By default, node is schedulable. - More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration - type: boolean - type: object - type: object - providerID: - description: ProviderID represents the provider's unique ID given - to a machine - type: string - type: object - status: - description: Status contains fields depicting the status - properties: - addresses: - description: |- - Addresses of this machines. This field is only present if the MCM provider runs without a target cluster and may - be used by clients to determine how to connect to the machine, instead of the `Node.status.addresses` field. - items: - description: NodeAddress contains information for the node's address. - properties: - address: - description: The node address. - type: string - type: - description: Node address type, one of Hostname, ExternalIP - or InternalIP. - type: string - required: - - address - - type - type: object - type: array - conditions: - description: Conditions of this machine, same as node - items: - description: NodeCondition contains condition information for a - node. - properties: - lastHeartbeatTime: - description: Last time we got an update on a given condition. - format: date-time - type: string - lastTransitionTime: - description: Last time the condition transit from one status - to another. - format: date-time - type: string - message: - description: Human readable message indicating details about - last transition. - type: string - reason: - description: (brief) reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of node condition. - type: string - required: - - status - - type - type: object - type: array - currentStatus: - description: Current status of the machine object - properties: - lastUpdateTime: - description: Last update time of current status - format: date-time - type: string - phase: - description: MachinePhase is a label for the condition of a machine - at the current time. - type: string - preserveExpiryTime: - description: PreserveExpiryTime is the time at which MCM will - stop preserving the machine - format: date-time - type: string - timeoutActive: - type: boolean - type: object - lastKnownState: - description: |- - LastKnownState can store details of the last known state of the VM by the plugins. - It can be used by future operation calls to determine current infrastucture state - type: string - lastOperation: - description: Last operation refers to the status of the last operation - performed - properties: - description: - description: Description of the current operation - type: string - errorCode: - description: ErrorCode of the current operation if any - type: string - lastUpdateTime: - description: Last update time of current operation - format: date-time - type: string - state: - description: State of operation - type: string - type: - description: Type of operation - type: string - type: object - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml deleted file mode 100644 index 46445131f..000000000 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ /dev/null @@ -1,447 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.16.1 - name: machinesets.machine.sapcloud.io -spec: - group: machine.sapcloud.io - names: - kind: MachineSet - listKind: MachineSetList - plural: machinesets - shortNames: - - mcs - singular: machineset - scope: Namespaced - versions: - - additionalPrinterColumns: - - description: Number of desired replicas. - jsonPath: .spec.replicas - name: Desired - type: integer - - description: Number of actual replicas. - jsonPath: .status.replicas - name: Current - type: integer - - description: Number of ready replicas for this machine set. - jsonPath: .status.readyReplicas - name: Ready - type: integer - - description: |- - CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. - Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1alpha1 - schema: - openAPIV3Schema: - description: MachineSet TODO - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: MachineSetSpec is the specification of a MachineSet. - properties: - autoPreserveFailedMachineMax: - format: int32 - type: integer - machineClass: - description: ClassSpec is the class specification of machine - properties: - apiGroup: - description: API group to which it belongs - type: string - kind: - description: Kind for machine class - type: string - name: - description: Name of machine class - type: string - type: object - minReadySeconds: - format: int32 - type: integer - replicas: - format: int32 - type: integer - selector: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. - The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies - to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - template: - description: MachineTemplateSpec describes the data a machine should - have when created from a template - properties: - metadata: - description: |- - Standard object's metadata. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata - type: object - x-kubernetes-preserve-unknown-fields: true - spec: - description: |- - Specification of the desired behavior of the machine. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status - properties: - class: - description: Class contains the machineclass attributes of - a machine - properties: - apiGroup: - description: API group to which it belongs - type: string - kind: - description: Kind for machine class - type: string - name: - description: Name of machine class - type: string - type: object - creationTimeout: - description: MachineCreationTimeout is the timeout after which - machinie creation is declared failed. - type: string - disableHealthTimeout: - description: |- - DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. - This is intended to be used only for in-place updates. - type: boolean - drainTimeout: - description: MachineDraintimeout is the timeout after which - machine is forcefully deleted. - type: string - healthTimeout: - description: MachineHealthTimeout is the timeout after which - machine is declared unhealhty/failed. - type: string - inPlaceUpdateTimeout: - description: MachineInPlaceUpdateTimeout is the timeout after - which in-place update is declared failed. - type: string - machinePreserveTimeout: - description: MachinePreserveTimeout is the timeout after which - the machine preservation is stopped - type: string - maxEvictRetries: - description: MaxEvictRetries is the number of retries that - will be attempted while draining the node. - format: int32 - type: integer - nodeConditions: - description: NodeConditions are the set of conditions if set - to true for MachineHealthTimeOut, machine will be declared - failed. - type: string - nodeTemplate: - description: NodeTemplateSpec describes the data a node should - have when created from a template - properties: - metadata: - type: object - x-kubernetes-preserve-unknown-fields: true - spec: - description: NodeSpec describes the attributes that a - node is created with. - properties: - configSource: - description: 'Deprecated: Previously used to specify - the source of the node''s configuration for the - DynamicKubeletConfig feature. This feature is removed.' - properties: - configMap: - description: ConfigMap is a reference to a Node's - ConfigMap - properties: - kubeletConfigKey: - description: |- - KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure - This field is required in all cases. - type: string - name: - description: |- - Name is the metadata.name of the referenced ConfigMap. - This field is required in all cases. - type: string - namespace: - description: |- - Namespace is the metadata.namespace of the referenced ConfigMap. - This field is required in all cases. - type: string - resourceVersion: - description: |- - ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - uid: - description: |- - UID is the metadata.UID of the referenced ConfigMap. - This field is forbidden in Node.Spec, and required in Node.Status. - type: string - required: - - kubeletConfigKey - - name - - namespace - type: object - type: object - externalID: - description: |- - Deprecated. Not all kubelets will set this field. Remove field after 1.13. - see: https://issues.k8s.io/61966 - type: string - podCIDR: - description: PodCIDR represents the pod IP range assigned - to the node. - type: string - podCIDRs: - description: |- - podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this - field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for - each of IPv4 and IPv6. - items: - type: string - type: array - x-kubernetes-list-type: set - providerID: - description: 'ID of the node assigned by the cloud - provider in the format: ://' - type: string - taints: - description: If specified, the node's taints. - items: - description: |- - The node this Taint is attached to has the "effect" on - any pod that does not tolerate the Taint. - properties: - effect: - description: |- - Required. The effect of the taint on pods - that do not tolerate the taint. - Valid effects are NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: Required. The taint key to be applied - to a node. - type: string - timeAdded: - description: |- - TimeAdded represents the time at which the taint was added. - It is only written for NoExecute taints. - format: date-time - type: string - value: - description: The taint value corresponding to - the taint key. - type: string - required: - - effect - - key - type: object - type: array - x-kubernetes-list-type: atomic - unschedulable: - description: |- - Unschedulable controls node schedulability of new pods. By default, node is schedulable. - More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration - type: boolean - type: object - type: object - providerID: - description: ProviderID represents the provider's unique ID - given to a machine - type: string - type: object - type: object - type: object - status: - description: MachineSetStatus holds the most recently observed status - of MachineSet. - properties: - autoPreserveFailedMachineCount: - description: AutoPreserveFailedMachineCount has a count of the number - of failed machines in the machineset that have been auto-preserved - format: int32 - type: integer - availableReplicas: - description: The number of available replicas (ready for at least - minReadySeconds) for this replica set. - format: int32 - type: integer - failedMachines: - description: FailedMachines has summary of machines on which lastOperation - Failed - items: - description: MachineSummary store the summary of machine. - properties: - lastOperation: - description: Last operation refers to the status of the last - operation performed - properties: - description: - description: Description of the current operation - type: string - errorCode: - description: ErrorCode of the current operation if any - type: string - lastUpdateTime: - description: Last update time of current operation - format: date-time - type: string - state: - description: State of operation - type: string - type: - description: Type of operation - type: string - type: object - name: - description: Name of the machine object - type: string - ownerRef: - description: OwnerRef - type: string - providerID: - description: ProviderID represents the provider's unique ID - given to a machine - type: string - type: object - type: array - fullyLabeledReplicas: - description: The number of pods that have labels matching the labels - of the pod template of the replicaset. - format: int32 - type: integer - lastOperation: - description: LastOperation performed - properties: - description: - description: Description of the current operation - type: string - errorCode: - description: ErrorCode of the current operation if any - type: string - lastUpdateTime: - description: Last update time of current operation - format: date-time - type: string - state: - description: State of operation - type: string - type: - description: Type of operation - type: string - type: object - machineSetCondition: - description: Represents the latest available observations of a replica - set's current state. - items: - description: MachineSetCondition describes the state of a machine - set at a certain point. - properties: - lastTransitionTime: - description: The last time the condition transitioned from one - status to another. - format: date-time - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of machine set condition. - type: string - required: - - status - - type - type: object - type: array - observedGeneration: - description: ObservedGeneration is the most recent generation observed - by the controller. - format: int64 - type: integer - readyReplicas: - description: The number of ready replicas for this replica set. - format: int32 - type: integer - replicas: - description: Replicas is the number of actual replicas. - format: int32 - type: integer - type: object - type: object - served: true - storage: true - subresources: - scale: - specReplicasPath: .spec.replicas - statusReplicasPath: .status.replicas - status: {} diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index c2e535bf7..d9ae90a19 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -162,7 +162,7 @@ type CurrentStatus struct { LastUpdateTime metav1.Time // PreserveExpiryTime is the time at which MCM will stop preserving the machine - PreserveExpiryTime metav1.Time + PreserveExpiryTime *metav1.Time } // MachineStatus holds the most recently observed status of Machine. @@ -357,7 +357,7 @@ type MachineSetSpec struct { MinReadySeconds int32 - AutoPreserveFailedMachineMax int32 + AutoPreserveFailedMachineMax *int32 } // MachineSetConditionType is the condition on machineset object @@ -418,7 +418,7 @@ type MachineSetStatus struct { FailedMachines *[]MachineSummary // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved - AutoPreserveFailedMachineCount int32 + AutoPreserveFailedMachineCount *int32 } // MachineSummary store the summary of machine. @@ -500,7 +500,7 @@ type MachineDeploymentSpec struct { // The maximum number of machines in the machine deployment that will be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments - AutoPreserveFailedMachineMax int32 + AutoPreserveFailedMachineMax *int32 } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 654256d32..3744534f3 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -274,7 +274,7 @@ type CurrentStatus struct { LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"` // PreserveExpiryTime is the time at which MCM will stop preserving the machine - PreserveExpiryTime metav1.Time `json:"preserveExpiryTime,omitempty"` + PreserveExpiryTime *metav1.Time `json:"preserveExpiryTime,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/machine/v1alpha1/machinedeployment_types.go b/pkg/apis/machine/v1alpha1/machinedeployment_types.go index 6cebcd1a1..3fc6487f7 100644 --- a/pkg/apis/machine/v1alpha1/machinedeployment_types.go +++ b/pkg/apis/machine/v1alpha1/machinedeployment_types.go @@ -95,7 +95,7 @@ type MachineDeploymentSpec struct { // The maximum number of machines in the machine deployment that will be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments // +optional - AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` + AutoPreserveFailedMachineMax *int32 `json:"autoPreserveFailedMachineMax,omitempty"` } const ( diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 8cd73d58e..2e6d24bd7 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -70,7 +70,7 @@ type MachineSetSpec struct { MinReadySeconds int32 `json:"minReadySeconds,omitempty"` // +optional - AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` + AutoPreserveFailedMachineMax *int32 `json:"autoPreserveFailedMachineMax,omitempty"` } // MachineSetConditionType is the condition on machineset object @@ -141,5 +141,5 @@ type MachineSetStatus struct { // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved // +optional - AutoPreserveFailedMachineCount int32 `json:"autoPreserveFailedMachineCount,omitempty"` + AutoPreserveFailedMachineCount *int32 `json:"autoPreserveFailedMachineCount,omitempty"` } diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go deleted file mode 100644 index 1990c2f03..000000000 --- a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ /dev/null @@ -1,1179 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors -// -// SPDX-License-Identifier: Apache-2.0 - -// Code generated by conversion-gen. DO NOT EDIT. - -package v1alpha1 - -import ( - unsafe "unsafe" - - machine "github.com/gardener/machine-controller-manager/pkg/apis/machine" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - conversion "k8s.io/apimachinery/pkg/conversion" - runtime "k8s.io/apimachinery/pkg/runtime" - intstr "k8s.io/apimachinery/pkg/util/intstr" -) - -func init() { - localSchemeBuilder.Register(RegisterConversions) -} - -// RegisterConversions adds conversion functions to the given scheme. -// Public to allow building arbitrary schemes. -func RegisterConversions(s *runtime.Scheme) error { - if err := s.AddGeneratedConversionFunc((*ClassSpec)(nil), (*machine.ClassSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(a.(*ClassSpec), b.(*machine.ClassSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.ClassSpec)(nil), (*ClassSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(a.(*machine.ClassSpec), b.(*ClassSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*CurrentStatus)(nil), (*machine.CurrentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(a.(*CurrentStatus), b.(*machine.CurrentStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.CurrentStatus)(nil), (*CurrentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(a.(*machine.CurrentStatus), b.(*CurrentStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*InPlaceUpdateMachineDeployment)(nil), (*machine.InPlaceUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(a.(*InPlaceUpdateMachineDeployment), b.(*machine.InPlaceUpdateMachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.InPlaceUpdateMachineDeployment)(nil), (*InPlaceUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(a.(*machine.InPlaceUpdateMachineDeployment), b.(*InPlaceUpdateMachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*LastOperation)(nil), (*machine.LastOperation)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_LastOperation_To_machine_LastOperation(a.(*LastOperation), b.(*machine.LastOperation), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.LastOperation)(nil), (*LastOperation)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_LastOperation_To_v1alpha1_LastOperation(a.(*machine.LastOperation), b.(*LastOperation), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*Machine)(nil), (*machine.Machine)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_Machine_To_machine_Machine(a.(*Machine), b.(*machine.Machine), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.Machine)(nil), (*Machine)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_Machine_To_v1alpha1_Machine(a.(*machine.Machine), b.(*Machine), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineClass)(nil), (*machine.MachineClass)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineClass_To_machine_MachineClass(a.(*MachineClass), b.(*machine.MachineClass), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineClass)(nil), (*MachineClass)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineClass_To_v1alpha1_MachineClass(a.(*machine.MachineClass), b.(*MachineClass), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineClassList)(nil), (*machine.MachineClassList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineClassList_To_machine_MachineClassList(a.(*MachineClassList), b.(*machine.MachineClassList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineClassList)(nil), (*MachineClassList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineClassList_To_v1alpha1_MachineClassList(a.(*machine.MachineClassList), b.(*MachineClassList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineConfiguration)(nil), (*machine.MachineConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(a.(*MachineConfiguration), b.(*machine.MachineConfiguration), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineConfiguration)(nil), (*MachineConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(a.(*machine.MachineConfiguration), b.(*MachineConfiguration), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeployment)(nil), (*machine.MachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(a.(*MachineDeployment), b.(*machine.MachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeployment)(nil), (*MachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(a.(*machine.MachineDeployment), b.(*MachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeploymentCondition)(nil), (*machine.MachineDeploymentCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(a.(*MachineDeploymentCondition), b.(*machine.MachineDeploymentCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentCondition)(nil), (*MachineDeploymentCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(a.(*machine.MachineDeploymentCondition), b.(*MachineDeploymentCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeploymentList)(nil), (*machine.MachineDeploymentList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(a.(*MachineDeploymentList), b.(*machine.MachineDeploymentList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentList)(nil), (*MachineDeploymentList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(a.(*machine.MachineDeploymentList), b.(*MachineDeploymentList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeploymentSpec)(nil), (*machine.MachineDeploymentSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(a.(*MachineDeploymentSpec), b.(*machine.MachineDeploymentSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentSpec)(nil), (*MachineDeploymentSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(a.(*machine.MachineDeploymentSpec), b.(*MachineDeploymentSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeploymentStatus)(nil), (*machine.MachineDeploymentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(a.(*MachineDeploymentStatus), b.(*machine.MachineDeploymentStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentStatus)(nil), (*MachineDeploymentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(a.(*machine.MachineDeploymentStatus), b.(*MachineDeploymentStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineDeploymentStrategy)(nil), (*machine.MachineDeploymentStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(a.(*MachineDeploymentStrategy), b.(*machine.MachineDeploymentStrategy), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentStrategy)(nil), (*MachineDeploymentStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(a.(*machine.MachineDeploymentStrategy), b.(*MachineDeploymentStrategy), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineList)(nil), (*machine.MachineList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineList_To_machine_MachineList(a.(*MachineList), b.(*machine.MachineList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineList)(nil), (*MachineList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineList_To_v1alpha1_MachineList(a.(*machine.MachineList), b.(*MachineList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSet)(nil), (*machine.MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSet_To_machine_MachineSet(a.(*MachineSet), b.(*machine.MachineSet), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSet)(nil), (*MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSet_To_v1alpha1_MachineSet(a.(*machine.MachineSet), b.(*MachineSet), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSetCondition)(nil), (*machine.MachineSetCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(a.(*MachineSetCondition), b.(*machine.MachineSetCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSetCondition)(nil), (*MachineSetCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(a.(*machine.MachineSetCondition), b.(*MachineSetCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSetList)(nil), (*machine.MachineSetList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSetList_To_machine_MachineSetList(a.(*MachineSetList), b.(*machine.MachineSetList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSetList)(nil), (*MachineSetList)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSetList_To_v1alpha1_MachineSetList(a.(*machine.MachineSetList), b.(*MachineSetList), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSetSpec)(nil), (*machine.MachineSetSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(a.(*MachineSetSpec), b.(*machine.MachineSetSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSetSpec)(nil), (*MachineSetSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(a.(*machine.MachineSetSpec), b.(*MachineSetSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSetStatus)(nil), (*machine.MachineSetStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(a.(*MachineSetStatus), b.(*machine.MachineSetStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSetStatus)(nil), (*MachineSetStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(a.(*machine.MachineSetStatus), b.(*MachineSetStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSpec)(nil), (*machine.MachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(a.(*MachineSpec), b.(*machine.MachineSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSpec)(nil), (*MachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(a.(*machine.MachineSpec), b.(*MachineSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineStatus)(nil), (*machine.MachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(a.(*MachineStatus), b.(*machine.MachineStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineStatus)(nil), (*MachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(a.(*machine.MachineStatus), b.(*MachineStatus), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineSummary)(nil), (*machine.MachineSummary)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineSummary_To_machine_MachineSummary(a.(*MachineSummary), b.(*machine.MachineSummary), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineSummary)(nil), (*MachineSummary)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineSummary_To_v1alpha1_MachineSummary(a.(*machine.MachineSummary), b.(*MachineSummary), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*MachineTemplateSpec)(nil), (*machine.MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(a.(*MachineTemplateSpec), b.(*machine.MachineTemplateSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.MachineTemplateSpec)(nil), (*MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(a.(*machine.MachineTemplateSpec), b.(*MachineTemplateSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*NodeTemplate)(nil), (*machine.NodeTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(a.(*NodeTemplate), b.(*machine.NodeTemplate), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.NodeTemplate)(nil), (*NodeTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(a.(*machine.NodeTemplate), b.(*NodeTemplate), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*NodeTemplateSpec)(nil), (*machine.NodeTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(a.(*NodeTemplateSpec), b.(*machine.NodeTemplateSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.NodeTemplateSpec)(nil), (*NodeTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(a.(*machine.NodeTemplateSpec), b.(*NodeTemplateSpec), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*RollbackConfig)(nil), (*machine.RollbackConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(a.(*RollbackConfig), b.(*machine.RollbackConfig), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.RollbackConfig)(nil), (*RollbackConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(a.(*machine.RollbackConfig), b.(*RollbackConfig), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*RollingUpdateMachineDeployment)(nil), (*machine.RollingUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(a.(*RollingUpdateMachineDeployment), b.(*machine.RollingUpdateMachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.RollingUpdateMachineDeployment)(nil), (*RollingUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(a.(*machine.RollingUpdateMachineDeployment), b.(*RollingUpdateMachineDeployment), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*UpdateConfiguration)(nil), (*machine.UpdateConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(a.(*UpdateConfiguration), b.(*machine.UpdateConfiguration), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*machine.UpdateConfiguration)(nil), (*UpdateConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(a.(*machine.UpdateConfiguration), b.(*UpdateConfiguration), scope) - }); err != nil { - return err - } - return nil -} - -func autoConvert_v1alpha1_ClassSpec_To_machine_ClassSpec(in *ClassSpec, out *machine.ClassSpec, s conversion.Scope) error { - out.APIGroup = in.APIGroup - out.Kind = in.Kind - out.Name = in.Name - return nil -} - -// Convert_v1alpha1_ClassSpec_To_machine_ClassSpec is an autogenerated conversion function. -func Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(in *ClassSpec, out *machine.ClassSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_ClassSpec_To_machine_ClassSpec(in, out, s) -} - -func autoConvert_machine_ClassSpec_To_v1alpha1_ClassSpec(in *machine.ClassSpec, out *ClassSpec, s conversion.Scope) error { - out.APIGroup = in.APIGroup - out.Kind = in.Kind - out.Name = in.Name - return nil -} - -// Convert_machine_ClassSpec_To_v1alpha1_ClassSpec is an autogenerated conversion function. -func Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(in *machine.ClassSpec, out *ClassSpec, s conversion.Scope) error { - return autoConvert_machine_ClassSpec_To_v1alpha1_ClassSpec(in, out, s) -} - -func autoConvert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in *CurrentStatus, out *machine.CurrentStatus, s conversion.Scope) error { - out.Phase = machine.MachinePhase(in.Phase) - out.TimeoutActive = in.TimeoutActive - out.LastUpdateTime = in.LastUpdateTime - out.PreserveExpiryTime = in.PreserveExpiryTime - return nil -} - -// Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus is an autogenerated conversion function. -func Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in *CurrentStatus, out *machine.CurrentStatus, s conversion.Scope) error { - return autoConvert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in, out, s) -} - -func autoConvert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.CurrentStatus, out *CurrentStatus, s conversion.Scope) error { - out.Phase = MachinePhase(in.Phase) - out.TimeoutActive = in.TimeoutActive - out.LastUpdateTime = in.LastUpdateTime - out.PreserveExpiryTime = in.PreserveExpiryTime - return nil -} - -// Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus is an autogenerated conversion function. -func Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.CurrentStatus, out *CurrentStatus, s conversion.Scope) error { - return autoConvert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in, out, s) -} - -func autoConvert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in *InPlaceUpdateMachineDeployment, out *machine.InPlaceUpdateMachineDeployment, s conversion.Scope) error { - if err := Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { - return err - } - out.OrchestrationType = machine.OrchestrationType(in.OrchestrationType) - return nil -} - -// Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment is an autogenerated conversion function. -func Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in *InPlaceUpdateMachineDeployment, out *machine.InPlaceUpdateMachineDeployment, s conversion.Scope) error { - return autoConvert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in, out, s) -} - -func autoConvert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in *machine.InPlaceUpdateMachineDeployment, out *InPlaceUpdateMachineDeployment, s conversion.Scope) error { - if err := Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { - return err - } - out.OrchestrationType = OrchestrationType(in.OrchestrationType) - return nil -} - -// Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment is an autogenerated conversion function. -func Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in *machine.InPlaceUpdateMachineDeployment, out *InPlaceUpdateMachineDeployment, s conversion.Scope) error { - return autoConvert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in, out, s) -} - -func autoConvert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, out *machine.LastOperation, s conversion.Scope) error { - out.Description = in.Description - out.ErrorCode = in.ErrorCode - out.LastUpdateTime = in.LastUpdateTime - out.State = machine.MachineState(in.State) - out.Type = machine.MachineOperationType(in.Type) - return nil -} - -// Convert_v1alpha1_LastOperation_To_machine_LastOperation is an autogenerated conversion function. -func Convert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, out *machine.LastOperation, s conversion.Scope) error { - return autoConvert_v1alpha1_LastOperation_To_machine_LastOperation(in, out, s) -} - -func autoConvert_machine_LastOperation_To_v1alpha1_LastOperation(in *machine.LastOperation, out *LastOperation, s conversion.Scope) error { - out.Description = in.Description - out.ErrorCode = in.ErrorCode - out.LastUpdateTime = in.LastUpdateTime - out.State = MachineState(in.State) - out.Type = MachineOperationType(in.Type) - return nil -} - -// Convert_machine_LastOperation_To_v1alpha1_LastOperation is an autogenerated conversion function. -func Convert_machine_LastOperation_To_v1alpha1_LastOperation(in *machine.LastOperation, out *LastOperation, s conversion.Scope) error { - return autoConvert_machine_LastOperation_To_v1alpha1_LastOperation(in, out, s) -} - -func autoConvert_v1alpha1_Machine_To_machine_Machine(in *Machine, out *machine.Machine, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_v1alpha1_Machine_To_machine_Machine is an autogenerated conversion function. -func Convert_v1alpha1_Machine_To_machine_Machine(in *Machine, out *machine.Machine, s conversion.Scope) error { - return autoConvert_v1alpha1_Machine_To_machine_Machine(in, out, s) -} - -func autoConvert_machine_Machine_To_v1alpha1_Machine(in *machine.Machine, out *Machine, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_machine_Machine_To_v1alpha1_Machine is an autogenerated conversion function. -func Convert_machine_Machine_To_v1alpha1_Machine(in *machine.Machine, out *Machine, s conversion.Scope) error { - return autoConvert_machine_Machine_To_v1alpha1_Machine(in, out, s) -} - -func autoConvert_v1alpha1_MachineClass_To_machine_MachineClass(in *MachineClass, out *machine.MachineClass, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - out.NodeTemplate = (*machine.NodeTemplate)(unsafe.Pointer(in.NodeTemplate)) - out.CredentialsSecretRef = (*v1.SecretReference)(unsafe.Pointer(in.CredentialsSecretRef)) - out.ProviderSpec = in.ProviderSpec - out.Provider = in.Provider - out.SecretRef = (*v1.SecretReference)(unsafe.Pointer(in.SecretRef)) - return nil -} - -// Convert_v1alpha1_MachineClass_To_machine_MachineClass is an autogenerated conversion function. -func Convert_v1alpha1_MachineClass_To_machine_MachineClass(in *MachineClass, out *machine.MachineClass, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineClass_To_machine_MachineClass(in, out, s) -} - -func autoConvert_machine_MachineClass_To_v1alpha1_MachineClass(in *machine.MachineClass, out *MachineClass, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - out.NodeTemplate = (*NodeTemplate)(unsafe.Pointer(in.NodeTemplate)) - out.CredentialsSecretRef = (*v1.SecretReference)(unsafe.Pointer(in.CredentialsSecretRef)) - out.Provider = in.Provider - out.ProviderSpec = in.ProviderSpec - out.SecretRef = (*v1.SecretReference)(unsafe.Pointer(in.SecretRef)) - return nil -} - -// Convert_machine_MachineClass_To_v1alpha1_MachineClass is an autogenerated conversion function. -func Convert_machine_MachineClass_To_v1alpha1_MachineClass(in *machine.MachineClass, out *MachineClass, s conversion.Scope) error { - return autoConvert_machine_MachineClass_To_v1alpha1_MachineClass(in, out, s) -} - -func autoConvert_v1alpha1_MachineClassList_To_machine_MachineClassList(in *MachineClassList, out *machine.MachineClassList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]machine.MachineClass, len(*in)) - for i := range *in { - if err := Convert_v1alpha1_MachineClass_To_machine_MachineClass(&(*in)[i], &(*out)[i], s); err != nil { - return err - } - } - } else { - out.Items = nil - } - return nil -} - -// Convert_v1alpha1_MachineClassList_To_machine_MachineClassList is an autogenerated conversion function. -func Convert_v1alpha1_MachineClassList_To_machine_MachineClassList(in *MachineClassList, out *machine.MachineClassList, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineClassList_To_machine_MachineClassList(in, out, s) -} - -func autoConvert_machine_MachineClassList_To_v1alpha1_MachineClassList(in *machine.MachineClassList, out *MachineClassList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineClass, len(*in)) - for i := range *in { - if err := Convert_machine_MachineClass_To_v1alpha1_MachineClass(&(*in)[i], &(*out)[i], s); err != nil { - return err - } - } - } else { - out.Items = nil - } - return nil -} - -// Convert_machine_MachineClassList_To_v1alpha1_MachineClassList is an autogenerated conversion function. -func Convert_machine_MachineClassList_To_v1alpha1_MachineClassList(in *machine.MachineClassList, out *MachineClassList, s conversion.Scope) error { - return autoConvert_machine_MachineClassList_To_v1alpha1_MachineClassList(in, out, s) -} - -func autoConvert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in *MachineConfiguration, out *machine.MachineConfiguration, s conversion.Scope) error { - out.MachineDrainTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineDrainTimeout)) - out.MachineHealthTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) - out.MachineCreationTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) - out.MachineInPlaceUpdateTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) - out.MachinePreserveTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) - out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) - out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) - out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) - return nil -} - -// Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration is an autogenerated conversion function. -func Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in *MachineConfiguration, out *machine.MachineConfiguration, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in, out, s) -} - -func autoConvert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in *machine.MachineConfiguration, out *MachineConfiguration, s conversion.Scope) error { - out.MachineDrainTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineDrainTimeout)) - out.MachineHealthTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) - out.MachineCreationTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) - out.MachineInPlaceUpdateTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) - out.MachinePreserveTimeout = (*metav1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) - out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) - out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) - out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) - return nil -} - -// Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration is an autogenerated conversion function. -func Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in *machine.MachineConfiguration, out *MachineConfiguration, s conversion.Scope) error { - return autoConvert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in *MachineDeployment, out *machine.MachineDeployment, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in *MachineDeployment, out *machine.MachineDeployment, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in, out, s) -} - -func autoConvert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in *machine.MachineDeployment, out *MachineDeployment, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment is an autogenerated conversion function. -func Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in *machine.MachineDeployment, out *MachineDeployment, s conversion.Scope) error { - return autoConvert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in *MachineDeploymentCondition, out *machine.MachineDeploymentCondition, s conversion.Scope) error { - out.Type = machine.MachineDeploymentConditionType(in.Type) - out.Status = machine.ConditionStatus(in.Status) - out.LastUpdateTime = in.LastUpdateTime - out.LastTransitionTime = in.LastTransitionTime - out.Reason = in.Reason - out.Message = in.Message - return nil -} - -// Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in *MachineDeploymentCondition, out *machine.MachineDeploymentCondition, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in, out, s) -} - -func autoConvert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in *machine.MachineDeploymentCondition, out *MachineDeploymentCondition, s conversion.Scope) error { - out.Type = MachineDeploymentConditionType(in.Type) - out.Status = ConditionStatus(in.Status) - out.LastUpdateTime = in.LastUpdateTime - out.LastTransitionTime = in.LastTransitionTime - out.Reason = in.Reason - out.Message = in.Message - return nil -} - -// Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition is an autogenerated conversion function. -func Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in *machine.MachineDeploymentCondition, out *MachineDeploymentCondition, s conversion.Scope) error { - return autoConvert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in *MachineDeploymentList, out *machine.MachineDeploymentList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]machine.MachineDeployment)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in *MachineDeploymentList, out *machine.MachineDeploymentList, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in, out, s) -} - -func autoConvert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in *machine.MachineDeploymentList, out *MachineDeploymentList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]MachineDeployment)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList is an autogenerated conversion function. -func Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in *machine.MachineDeploymentList, out *MachineDeploymentList, s conversion.Scope) error { - return autoConvert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in *MachineDeploymentSpec, out *machine.MachineDeploymentSpec, s conversion.Scope) error { - out.Replicas = in.Replicas - out.Selector = (*metav1.LabelSelector)(unsafe.Pointer(in.Selector)) - if err := Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { - return err - } - if err := Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(&in.Strategy, &out.Strategy, s); err != nil { - return err - } - out.MinReadySeconds = in.MinReadySeconds - out.RevisionHistoryLimit = (*int32)(unsafe.Pointer(in.RevisionHistoryLimit)) - out.Paused = in.Paused - out.RollbackTo = (*machine.RollbackConfig)(unsafe.Pointer(in.RollbackTo)) - out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) - out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax - return nil -} - -// Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in *MachineDeploymentSpec, out *machine.MachineDeploymentSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in, out, s) -} - -func autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in *machine.MachineDeploymentSpec, out *MachineDeploymentSpec, s conversion.Scope) error { - out.Replicas = in.Replicas - out.Selector = (*metav1.LabelSelector)(unsafe.Pointer(in.Selector)) - if err := Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { - return err - } - if err := Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(&in.Strategy, &out.Strategy, s); err != nil { - return err - } - out.MinReadySeconds = in.MinReadySeconds - out.RevisionHistoryLimit = (*int32)(unsafe.Pointer(in.RevisionHistoryLimit)) - out.Paused = in.Paused - out.RollbackTo = (*RollbackConfig)(unsafe.Pointer(in.RollbackTo)) - out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) - out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax - return nil -} - -// Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec is an autogenerated conversion function. -func Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in *machine.MachineDeploymentSpec, out *MachineDeploymentSpec, s conversion.Scope) error { - return autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in *MachineDeploymentStatus, out *machine.MachineDeploymentStatus, s conversion.Scope) error { - out.ObservedGeneration = in.ObservedGeneration - out.Replicas = in.Replicas - out.UpdatedReplicas = in.UpdatedReplicas - out.ReadyReplicas = in.ReadyReplicas - out.AvailableReplicas = in.AvailableReplicas - out.UnavailableReplicas = in.UnavailableReplicas - out.Conditions = *(*[]machine.MachineDeploymentCondition)(unsafe.Pointer(&in.Conditions)) - out.CollisionCount = (*int32)(unsafe.Pointer(in.CollisionCount)) - out.FailedMachines = *(*[]*machine.MachineSummary)(unsafe.Pointer(&in.FailedMachines)) - return nil -} - -// Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in *MachineDeploymentStatus, out *machine.MachineDeploymentStatus, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in, out, s) -} - -func autoConvert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in *machine.MachineDeploymentStatus, out *MachineDeploymentStatus, s conversion.Scope) error { - out.ObservedGeneration = in.ObservedGeneration - out.Replicas = in.Replicas - out.UpdatedReplicas = in.UpdatedReplicas - out.ReadyReplicas = in.ReadyReplicas - out.AvailableReplicas = in.AvailableReplicas - out.UnavailableReplicas = in.UnavailableReplicas - out.Conditions = *(*[]MachineDeploymentCondition)(unsafe.Pointer(&in.Conditions)) - out.CollisionCount = (*int32)(unsafe.Pointer(in.CollisionCount)) - out.FailedMachines = *(*[]*MachineSummary)(unsafe.Pointer(&in.FailedMachines)) - return nil -} - -// Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus is an autogenerated conversion function. -func Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in *machine.MachineDeploymentStatus, out *MachineDeploymentStatus, s conversion.Scope) error { - return autoConvert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in, out, s) -} - -func autoConvert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in *MachineDeploymentStrategy, out *machine.MachineDeploymentStrategy, s conversion.Scope) error { - out.Type = machine.MachineDeploymentStrategyType(in.Type) - out.RollingUpdate = (*machine.RollingUpdateMachineDeployment)(unsafe.Pointer(in.RollingUpdate)) - out.InPlaceUpdate = (*machine.InPlaceUpdateMachineDeployment)(unsafe.Pointer(in.InPlaceUpdate)) - return nil -} - -// Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy is an autogenerated conversion function. -func Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in *MachineDeploymentStrategy, out *machine.MachineDeploymentStrategy, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in, out, s) -} - -func autoConvert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in *machine.MachineDeploymentStrategy, out *MachineDeploymentStrategy, s conversion.Scope) error { - out.Type = MachineDeploymentStrategyType(in.Type) - out.RollingUpdate = (*RollingUpdateMachineDeployment)(unsafe.Pointer(in.RollingUpdate)) - out.InPlaceUpdate = (*InPlaceUpdateMachineDeployment)(unsafe.Pointer(in.InPlaceUpdate)) - return nil -} - -// Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy is an autogenerated conversion function. -func Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in *machine.MachineDeploymentStrategy, out *MachineDeploymentStrategy, s conversion.Scope) error { - return autoConvert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in, out, s) -} - -func autoConvert_v1alpha1_MachineList_To_machine_MachineList(in *MachineList, out *machine.MachineList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]machine.Machine)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_v1alpha1_MachineList_To_machine_MachineList is an autogenerated conversion function. -func Convert_v1alpha1_MachineList_To_machine_MachineList(in *MachineList, out *machine.MachineList, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineList_To_machine_MachineList(in, out, s) -} - -func autoConvert_machine_MachineList_To_v1alpha1_MachineList(in *machine.MachineList, out *MachineList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]Machine)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_machine_MachineList_To_v1alpha1_MachineList is an autogenerated conversion function. -func Convert_machine_MachineList_To_v1alpha1_MachineList(in *machine.MachineList, out *MachineList, s conversion.Scope) error { - return autoConvert_machine_MachineList_To_v1alpha1_MachineList(in, out, s) -} - -func autoConvert_v1alpha1_MachineSet_To_machine_MachineSet(in *MachineSet, out *machine.MachineSet, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_v1alpha1_MachineSet_To_machine_MachineSet is an autogenerated conversion function. -func Convert_v1alpha1_MachineSet_To_machine_MachineSet(in *MachineSet, out *machine.MachineSet, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSet_To_machine_MachineSet(in, out, s) -} - -func autoConvert_machine_MachineSet_To_v1alpha1_MachineSet(in *machine.MachineSet, out *MachineSet, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - if err := Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(&in.Status, &out.Status, s); err != nil { - return err - } - return nil -} - -// Convert_machine_MachineSet_To_v1alpha1_MachineSet is an autogenerated conversion function. -func Convert_machine_MachineSet_To_v1alpha1_MachineSet(in *machine.MachineSet, out *MachineSet, s conversion.Scope) error { - return autoConvert_machine_MachineSet_To_v1alpha1_MachineSet(in, out, s) -} - -func autoConvert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in *MachineSetCondition, out *machine.MachineSetCondition, s conversion.Scope) error { - out.Type = machine.MachineSetConditionType(in.Type) - out.Status = machine.ConditionStatus(in.Status) - out.LastTransitionTime = in.LastTransitionTime - out.Reason = in.Reason - out.Message = in.Message - return nil -} - -// Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition is an autogenerated conversion function. -func Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in *MachineSetCondition, out *machine.MachineSetCondition, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in, out, s) -} - -func autoConvert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in *machine.MachineSetCondition, out *MachineSetCondition, s conversion.Scope) error { - out.Type = MachineSetConditionType(in.Type) - out.Status = ConditionStatus(in.Status) - out.LastTransitionTime = in.LastTransitionTime - out.Reason = in.Reason - out.Message = in.Message - return nil -} - -// Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition is an autogenerated conversion function. -func Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in *machine.MachineSetCondition, out *MachineSetCondition, s conversion.Scope) error { - return autoConvert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in, out, s) -} - -func autoConvert_v1alpha1_MachineSetList_To_machine_MachineSetList(in *MachineSetList, out *machine.MachineSetList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]machine.MachineSet)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_v1alpha1_MachineSetList_To_machine_MachineSetList is an autogenerated conversion function. -func Convert_v1alpha1_MachineSetList_To_machine_MachineSetList(in *MachineSetList, out *machine.MachineSetList, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSetList_To_machine_MachineSetList(in, out, s) -} - -func autoConvert_machine_MachineSetList_To_v1alpha1_MachineSetList(in *machine.MachineSetList, out *MachineSetList, s conversion.Scope) error { - out.ListMeta = in.ListMeta - out.Items = *(*[]MachineSet)(unsafe.Pointer(&in.Items)) - return nil -} - -// Convert_machine_MachineSetList_To_v1alpha1_MachineSetList is an autogenerated conversion function. -func Convert_machine_MachineSetList_To_v1alpha1_MachineSetList(in *machine.MachineSetList, out *MachineSetList, s conversion.Scope) error { - return autoConvert_machine_MachineSetList_To_v1alpha1_MachineSetList(in, out, s) -} - -func autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSetSpec, out *machine.MachineSetSpec, s conversion.Scope) error { - out.Replicas = in.Replicas - out.Selector = (*metav1.LabelSelector)(unsafe.Pointer(in.Selector)) - if err := Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(&in.MachineClass, &out.MachineClass, s); err != nil { - return err - } - if err := Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { - return err - } - out.MinReadySeconds = in.MinReadySeconds - out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax - return nil -} - -// Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec is an autogenerated conversion function. -func Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSetSpec, out *machine.MachineSetSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in, out, s) -} - -func autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.MachineSetSpec, out *MachineSetSpec, s conversion.Scope) error { - out.Replicas = in.Replicas - out.Selector = (*metav1.LabelSelector)(unsafe.Pointer(in.Selector)) - if err := Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(&in.MachineClass, &out.MachineClass, s); err != nil { - return err - } - if err := Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { - return err - } - out.MinReadySeconds = in.MinReadySeconds - out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax - return nil -} - -// Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec is an autogenerated conversion function. -func Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.MachineSetSpec, out *MachineSetSpec, s conversion.Scope) error { - return autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in, out, s) -} - -func autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *MachineSetStatus, out *machine.MachineSetStatus, s conversion.Scope) error { - out.Replicas = in.Replicas - out.FullyLabeledReplicas = in.FullyLabeledReplicas - out.ReadyReplicas = in.ReadyReplicas - out.AvailableReplicas = in.AvailableReplicas - out.ObservedGeneration = in.ObservedGeneration - out.Conditions = *(*[]machine.MachineSetCondition)(unsafe.Pointer(&in.Conditions)) - if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - out.FailedMachines = (*[]machine.MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount - return nil -} - -// Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus is an autogenerated conversion function. -func Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *MachineSetStatus, out *machine.MachineSetStatus, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in, out, s) -} - -func autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machine.MachineSetStatus, out *MachineSetStatus, s conversion.Scope) error { - out.Replicas = in.Replicas - out.FullyLabeledReplicas = in.FullyLabeledReplicas - out.ReadyReplicas = in.ReadyReplicas - out.AvailableReplicas = in.AvailableReplicas - out.ObservedGeneration = in.ObservedGeneration - out.Conditions = *(*[]MachineSetCondition)(unsafe.Pointer(&in.Conditions)) - if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - out.FailedMachines = (*[]MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount - return nil -} - -// Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus is an autogenerated conversion function. -func Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machine.MachineSetStatus, out *MachineSetStatus, s conversion.Scope) error { - return autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in, out, s) -} - -func autoConvert_v1alpha1_MachineSpec_To_machine_MachineSpec(in *MachineSpec, out *machine.MachineSpec, s conversion.Scope) error { - if err := Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(&in.Class, &out.Class, s); err != nil { - return err - } - out.ProviderID = in.ProviderID - if err := Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(&in.NodeTemplateSpec, &out.NodeTemplateSpec, s); err != nil { - return err - } - out.MachineConfiguration = (*machine.MachineConfiguration)(unsafe.Pointer(in.MachineConfiguration)) - return nil -} - -// Convert_v1alpha1_MachineSpec_To_machine_MachineSpec is an autogenerated conversion function. -func Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(in *MachineSpec, out *machine.MachineSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSpec_To_machine_MachineSpec(in, out, s) -} - -func autoConvert_machine_MachineSpec_To_v1alpha1_MachineSpec(in *machine.MachineSpec, out *MachineSpec, s conversion.Scope) error { - if err := Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(&in.Class, &out.Class, s); err != nil { - return err - } - out.ProviderID = in.ProviderID - if err := Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(&in.NodeTemplateSpec, &out.NodeTemplateSpec, s); err != nil { - return err - } - out.MachineConfiguration = (*MachineConfiguration)(unsafe.Pointer(in.MachineConfiguration)) - return nil -} - -// Convert_machine_MachineSpec_To_v1alpha1_MachineSpec is an autogenerated conversion function. -func Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(in *machine.MachineSpec, out *MachineSpec, s conversion.Scope) error { - return autoConvert_machine_MachineSpec_To_v1alpha1_MachineSpec(in, out, s) -} - -func autoConvert_v1alpha1_MachineStatus_To_machine_MachineStatus(in *MachineStatus, out *machine.MachineStatus, s conversion.Scope) error { - out.Addresses = *(*[]v1.NodeAddress)(unsafe.Pointer(&in.Addresses)) - out.Conditions = *(*[]v1.NodeCondition)(unsafe.Pointer(&in.Conditions)) - if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - if err := Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(&in.CurrentStatus, &out.CurrentStatus, s); err != nil { - return err - } - out.LastKnownState = in.LastKnownState - return nil -} - -// Convert_v1alpha1_MachineStatus_To_machine_MachineStatus is an autogenerated conversion function. -func Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(in *MachineStatus, out *machine.MachineStatus, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineStatus_To_machine_MachineStatus(in, out, s) -} - -func autoConvert_machine_MachineStatus_To_v1alpha1_MachineStatus(in *machine.MachineStatus, out *MachineStatus, s conversion.Scope) error { - out.Addresses = *(*[]v1.NodeAddress)(unsafe.Pointer(&in.Addresses)) - out.Conditions = *(*[]v1.NodeCondition)(unsafe.Pointer(&in.Conditions)) - if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - if err := Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(&in.CurrentStatus, &out.CurrentStatus, s); err != nil { - return err - } - out.LastKnownState = in.LastKnownState - return nil -} - -// Convert_machine_MachineStatus_To_v1alpha1_MachineStatus is an autogenerated conversion function. -func Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(in *machine.MachineStatus, out *MachineStatus, s conversion.Scope) error { - return autoConvert_machine_MachineStatus_To_v1alpha1_MachineStatus(in, out, s) -} - -func autoConvert_v1alpha1_MachineSummary_To_machine_MachineSummary(in *MachineSummary, out *machine.MachineSummary, s conversion.Scope) error { - out.Name = in.Name - out.ProviderID = in.ProviderID - if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - out.OwnerRef = in.OwnerRef - return nil -} - -// Convert_v1alpha1_MachineSummary_To_machine_MachineSummary is an autogenerated conversion function. -func Convert_v1alpha1_MachineSummary_To_machine_MachineSummary(in *MachineSummary, out *machine.MachineSummary, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineSummary_To_machine_MachineSummary(in, out, s) -} - -func autoConvert_machine_MachineSummary_To_v1alpha1_MachineSummary(in *machine.MachineSummary, out *MachineSummary, s conversion.Scope) error { - out.Name = in.Name - out.ProviderID = in.ProviderID - if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { - return err - } - out.OwnerRef = in.OwnerRef - return nil -} - -// Convert_machine_MachineSummary_To_v1alpha1_MachineSummary is an autogenerated conversion function. -func Convert_machine_MachineSummary_To_v1alpha1_MachineSummary(in *machine.MachineSummary, out *MachineSummary, s conversion.Scope) error { - return autoConvert_machine_MachineSummary_To_v1alpha1_MachineSummary(in, out, s) -} - -func autoConvert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in *MachineTemplateSpec, out *machine.MachineTemplateSpec, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - return nil -} - -// Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec is an autogenerated conversion function. -func Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in *MachineTemplateSpec, out *machine.MachineTemplateSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in, out, s) -} - -func autoConvert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in *machine.MachineTemplateSpec, out *MachineTemplateSpec, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - if err := Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(&in.Spec, &out.Spec, s); err != nil { - return err - } - return nil -} - -// Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec is an autogenerated conversion function. -func Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in *machine.MachineTemplateSpec, out *MachineTemplateSpec, s conversion.Scope) error { - return autoConvert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in, out, s) -} - -func autoConvert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in *NodeTemplate, out *machine.NodeTemplate, s conversion.Scope) error { - out.Capacity = *(*v1.ResourceList)(unsafe.Pointer(&in.Capacity)) - out.VirtualCapacity = *(*v1.ResourceList)(unsafe.Pointer(&in.VirtualCapacity)) - out.InstanceType = in.InstanceType - out.Region = in.Region - out.Zone = in.Zone - out.Architecture = (*string)(unsafe.Pointer(in.Architecture)) - return nil -} - -// Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate is an autogenerated conversion function. -func Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in *NodeTemplate, out *machine.NodeTemplate, s conversion.Scope) error { - return autoConvert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in, out, s) -} - -func autoConvert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in *machine.NodeTemplate, out *NodeTemplate, s conversion.Scope) error { - out.Capacity = *(*v1.ResourceList)(unsafe.Pointer(&in.Capacity)) - out.VirtualCapacity = *(*v1.ResourceList)(unsafe.Pointer(&in.VirtualCapacity)) - out.InstanceType = in.InstanceType - out.Region = in.Region - out.Zone = in.Zone - out.Architecture = (*string)(unsafe.Pointer(in.Architecture)) - return nil -} - -// Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate is an autogenerated conversion function. -func Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in *machine.NodeTemplate, out *NodeTemplate, s conversion.Scope) error { - return autoConvert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in, out, s) -} - -func autoConvert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in *NodeTemplateSpec, out *machine.NodeTemplateSpec, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - out.Spec = in.Spec - return nil -} - -// Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec is an autogenerated conversion function. -func Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in *NodeTemplateSpec, out *machine.NodeTemplateSpec, s conversion.Scope) error { - return autoConvert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in, out, s) -} - -func autoConvert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in *machine.NodeTemplateSpec, out *NodeTemplateSpec, s conversion.Scope) error { - out.ObjectMeta = in.ObjectMeta - out.Spec = in.Spec - return nil -} - -// Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec is an autogenerated conversion function. -func Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in *machine.NodeTemplateSpec, out *NodeTemplateSpec, s conversion.Scope) error { - return autoConvert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in, out, s) -} - -func autoConvert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in *RollbackConfig, out *machine.RollbackConfig, s conversion.Scope) error { - out.Revision = in.Revision - return nil -} - -// Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig is an autogenerated conversion function. -func Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in *RollbackConfig, out *machine.RollbackConfig, s conversion.Scope) error { - return autoConvert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in, out, s) -} - -func autoConvert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in *machine.RollbackConfig, out *RollbackConfig, s conversion.Scope) error { - out.Revision = in.Revision - return nil -} - -// Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig is an autogenerated conversion function. -func Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in *machine.RollbackConfig, out *RollbackConfig, s conversion.Scope) error { - return autoConvert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in, out, s) -} - -func autoConvert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in *RollingUpdateMachineDeployment, out *machine.RollingUpdateMachineDeployment, s conversion.Scope) error { - if err := Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { - return err - } - return nil -} - -// Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment is an autogenerated conversion function. -func Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in *RollingUpdateMachineDeployment, out *machine.RollingUpdateMachineDeployment, s conversion.Scope) error { - return autoConvert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in, out, s) -} - -func autoConvert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in *machine.RollingUpdateMachineDeployment, out *RollingUpdateMachineDeployment, s conversion.Scope) error { - if err := Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { - return err - } - return nil -} - -// Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment is an autogenerated conversion function. -func Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in *machine.RollingUpdateMachineDeployment, out *RollingUpdateMachineDeployment, s conversion.Scope) error { - return autoConvert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in, out, s) -} - -func autoConvert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in *UpdateConfiguration, out *machine.UpdateConfiguration, s conversion.Scope) error { - out.MaxUnavailable = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnavailable)) - out.MaxSurge = (*intstr.IntOrString)(unsafe.Pointer(in.MaxSurge)) - return nil -} - -// Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration is an autogenerated conversion function. -func Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in *UpdateConfiguration, out *machine.UpdateConfiguration, s conversion.Scope) error { - return autoConvert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in, out, s) -} - -func autoConvert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in *machine.UpdateConfiguration, out *UpdateConfiguration, s conversion.Scope) error { - out.MaxUnavailable = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnavailable)) - out.MaxSurge = (*intstr.IntOrString)(unsafe.Pointer(in.MaxSurge)) - return nil -} - -// Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration is an autogenerated conversion function. -func Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in *machine.UpdateConfiguration, out *UpdateConfiguration, s conversion.Scope) error { - return autoConvert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in, out, s) -} diff --git a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go deleted file mode 100644 index 13aab59e2..000000000 --- a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go +++ /dev/null @@ -1,795 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors -// -// SPDX-License-Identifier: Apache-2.0 - -// Code generated by deepcopy-gen. DO NOT EDIT. - -package v1alpha1 - -import ( - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - intstr "k8s.io/apimachinery/pkg/util/intstr" -) - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ClassSpec) DeepCopyInto(out *ClassSpec) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClassSpec. -func (in *ClassSpec) DeepCopy() *ClassSpec { - if in == nil { - return nil - } - out := new(ClassSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - in.PreserveExpiryTime.DeepCopyInto(&out.PreserveExpiryTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CurrentStatus. -func (in *CurrentStatus) DeepCopy() *CurrentStatus { - if in == nil { - return nil - } - out := new(CurrentStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *InPlaceUpdateMachineDeployment) DeepCopyInto(out *InPlaceUpdateMachineDeployment) { - *out = *in - in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InPlaceUpdateMachineDeployment. -func (in *InPlaceUpdateMachineDeployment) DeepCopy() *InPlaceUpdateMachineDeployment { - if in == nil { - return nil - } - out := new(InPlaceUpdateMachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LastOperation) DeepCopyInto(out *LastOperation) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LastOperation. -func (in *LastOperation) DeepCopy() *LastOperation { - if in == nil { - return nil - } - out := new(LastOperation) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Machine) DeepCopyInto(out *Machine) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.TypeMeta = in.TypeMeta - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Machine. -func (in *Machine) DeepCopy() *Machine { - if in == nil { - return nil - } - out := new(Machine) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *Machine) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineClass) DeepCopyInto(out *MachineClass) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - if in.NodeTemplate != nil { - in, out := &in.NodeTemplate, &out.NodeTemplate - *out = new(NodeTemplate) - (*in).DeepCopyInto(*out) - } - if in.CredentialsSecretRef != nil { - in, out := &in.CredentialsSecretRef, &out.CredentialsSecretRef - *out = new(v1.SecretReference) - **out = **in - } - in.ProviderSpec.DeepCopyInto(&out.ProviderSpec) - if in.SecretRef != nil { - in, out := &in.SecretRef, &out.SecretRef - *out = new(v1.SecretReference) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClass. -func (in *MachineClass) DeepCopy() *MachineClass { - if in == nil { - return nil - } - out := new(MachineClass) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineClass) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineClassList) DeepCopyInto(out *MachineClassList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineClass, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClassList. -func (in *MachineClassList) DeepCopy() *MachineClassList { - if in == nil { - return nil - } - out := new(MachineClassList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineClassList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { - *out = *in - if in.MachineDrainTimeout != nil { - in, out := &in.MachineDrainTimeout, &out.MachineDrainTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineHealthTimeout != nil { - in, out := &in.MachineHealthTimeout, &out.MachineHealthTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineCreationTimeout != nil { - in, out := &in.MachineCreationTimeout, &out.MachineCreationTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineInPlaceUpdateTimeout != nil { - in, out := &in.MachineInPlaceUpdateTimeout, &out.MachineInPlaceUpdateTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachinePreserveTimeout != nil { - in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.DisableHealthTimeout != nil { - in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout - *out = new(bool) - **out = **in - } - if in.MaxEvictRetries != nil { - in, out := &in.MaxEvictRetries, &out.MaxEvictRetries - *out = new(int32) - **out = **in - } - if in.NodeConditions != nil { - in, out := &in.NodeConditions, &out.NodeConditions - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineConfiguration. -func (in *MachineConfiguration) DeepCopy() *MachineConfiguration { - if in == nil { - return nil - } - out := new(MachineConfiguration) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeployment) DeepCopyInto(out *MachineDeployment) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeployment. -func (in *MachineDeployment) DeepCopy() *MachineDeployment { - if in == nil { - return nil - } - out := new(MachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineDeployment) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentCondition) DeepCopyInto(out *MachineDeploymentCondition) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentCondition. -func (in *MachineDeploymentCondition) DeepCopy() *MachineDeploymentCondition { - if in == nil { - return nil - } - out := new(MachineDeploymentCondition) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentList) DeepCopyInto(out *MachineDeploymentList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineDeployment, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentList. -func (in *MachineDeploymentList) DeepCopy() *MachineDeploymentList { - if in == nil { - return nil - } - out := new(MachineDeploymentList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineDeploymentList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { - *out = *in - if in.Selector != nil { - in, out := &in.Selector, &out.Selector - *out = new(metav1.LabelSelector) - (*in).DeepCopyInto(*out) - } - in.Template.DeepCopyInto(&out.Template) - in.Strategy.DeepCopyInto(&out.Strategy) - if in.RevisionHistoryLimit != nil { - in, out := &in.RevisionHistoryLimit, &out.RevisionHistoryLimit - *out = new(int32) - **out = **in - } - if in.RollbackTo != nil { - in, out := &in.RollbackTo, &out.RollbackTo - *out = new(RollbackConfig) - **out = **in - } - if in.ProgressDeadlineSeconds != nil { - in, out := &in.ProgressDeadlineSeconds, &out.ProgressDeadlineSeconds - *out = new(int32) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentSpec. -func (in *MachineDeploymentSpec) DeepCopy() *MachineDeploymentSpec { - if in == nil { - return nil - } - out := new(MachineDeploymentSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentStatus) DeepCopyInto(out *MachineDeploymentStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]MachineDeploymentCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.CollisionCount != nil { - in, out := &in.CollisionCount, &out.CollisionCount - *out = new(int32) - **out = **in - } - if in.FailedMachines != nil { - in, out := &in.FailedMachines, &out.FailedMachines - *out = make([]*MachineSummary, len(*in)) - for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = new(MachineSummary) - (*in).DeepCopyInto(*out) - } - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStatus. -func (in *MachineDeploymentStatus) DeepCopy() *MachineDeploymentStatus { - if in == nil { - return nil - } - out := new(MachineDeploymentStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentStrategy) DeepCopyInto(out *MachineDeploymentStrategy) { - *out = *in - if in.RollingUpdate != nil { - in, out := &in.RollingUpdate, &out.RollingUpdate - *out = new(RollingUpdateMachineDeployment) - (*in).DeepCopyInto(*out) - } - if in.InPlaceUpdate != nil { - in, out := &in.InPlaceUpdate, &out.InPlaceUpdate - *out = new(InPlaceUpdateMachineDeployment) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStrategy. -func (in *MachineDeploymentStrategy) DeepCopy() *MachineDeploymentStrategy { - if in == nil { - return nil - } - out := new(MachineDeploymentStrategy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineList) DeepCopyInto(out *MachineList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]Machine, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineList. -func (in *MachineList) DeepCopy() *MachineList { - if in == nil { - return nil - } - out := new(MachineList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSet) DeepCopyInto(out *MachineSet) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.TypeMeta = in.TypeMeta - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSet. -func (in *MachineSet) DeepCopy() *MachineSet { - if in == nil { - return nil - } - out := new(MachineSet) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineSet) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetCondition) DeepCopyInto(out *MachineSetCondition) { - *out = *in - in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetCondition. -func (in *MachineSetCondition) DeepCopy() *MachineSetCondition { - if in == nil { - return nil - } - out := new(MachineSetCondition) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetList) DeepCopyInto(out *MachineSetList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineSet, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetList. -func (in *MachineSetList) DeepCopy() *MachineSetList { - if in == nil { - return nil - } - out := new(MachineSetList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineSetList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { - *out = *in - if in.Selector != nil { - in, out := &in.Selector, &out.Selector - *out = new(metav1.LabelSelector) - (*in).DeepCopyInto(*out) - } - out.MachineClass = in.MachineClass - in.Template.DeepCopyInto(&out.Template) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetSpec. -func (in *MachineSetSpec) DeepCopy() *MachineSetSpec { - if in == nil { - return nil - } - out := new(MachineSetSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]MachineSetCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - in.LastOperation.DeepCopyInto(&out.LastOperation) - if in.FailedMachines != nil { - in, out := &in.FailedMachines, &out.FailedMachines - *out = new([]MachineSummary) - if **in != nil { - in, out := *in, *out - *out = make([]MachineSummary, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetStatus. -func (in *MachineSetStatus) DeepCopy() *MachineSetStatus { - if in == nil { - return nil - } - out := new(MachineSetStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { - *out = *in - out.Class = in.Class - in.NodeTemplateSpec.DeepCopyInto(&out.NodeTemplateSpec) - if in.MachineConfiguration != nil { - in, out := &in.MachineConfiguration, &out.MachineConfiguration - *out = new(MachineConfiguration) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. -func (in *MachineSpec) DeepCopy() *MachineSpec { - if in == nil { - return nil - } - out := new(MachineSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineStatus) DeepCopyInto(out *MachineStatus) { - *out = *in - if in.Addresses != nil { - in, out := &in.Addresses, &out.Addresses - *out = make([]v1.NodeAddress, len(*in)) - copy(*out, *in) - } - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]v1.NodeCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - in.LastOperation.DeepCopyInto(&out.LastOperation) - in.CurrentStatus.DeepCopyInto(&out.CurrentStatus) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineStatus. -func (in *MachineStatus) DeepCopy() *MachineStatus { - if in == nil { - return nil - } - out := new(MachineStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSummary) DeepCopyInto(out *MachineSummary) { - *out = *in - in.LastOperation.DeepCopyInto(&out.LastOperation) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSummary. -func (in *MachineSummary) DeepCopy() *MachineSummary { - if in == nil { - return nil - } - out := new(MachineSummary) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineTemplateSpec) DeepCopyInto(out *MachineTemplateSpec) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateSpec. -func (in *MachineTemplateSpec) DeepCopy() *MachineTemplateSpec { - if in == nil { - return nil - } - out := new(MachineTemplateSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NodeTemplate) DeepCopyInto(out *NodeTemplate) { - *out = *in - if in.Capacity != nil { - in, out := &in.Capacity, &out.Capacity - *out = make(v1.ResourceList, len(*in)) - for key, val := range *in { - (*out)[key] = val.DeepCopy() - } - } - if in.VirtualCapacity != nil { - in, out := &in.VirtualCapacity, &out.VirtualCapacity - *out = make(v1.ResourceList, len(*in)) - for key, val := range *in { - (*out)[key] = val.DeepCopy() - } - } - if in.Architecture != nil { - in, out := &in.Architecture, &out.Architecture - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplate. -func (in *NodeTemplate) DeepCopy() *NodeTemplate { - if in == nil { - return nil - } - out := new(NodeTemplate) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NodeTemplateSpec) DeepCopyInto(out *NodeTemplateSpec) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplateSpec. -func (in *NodeTemplateSpec) DeepCopy() *NodeTemplateSpec { - if in == nil { - return nil - } - out := new(NodeTemplateSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RollbackConfig) DeepCopyInto(out *RollbackConfig) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollbackConfig. -func (in *RollbackConfig) DeepCopy() *RollbackConfig { - if in == nil { - return nil - } - out := new(RollbackConfig) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RollingUpdateMachineDeployment) DeepCopyInto(out *RollingUpdateMachineDeployment) { - *out = *in - in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateMachineDeployment. -func (in *RollingUpdateMachineDeployment) DeepCopy() *RollingUpdateMachineDeployment { - if in == nil { - return nil - } - out := new(RollingUpdateMachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *UpdateConfiguration) DeepCopyInto(out *UpdateConfiguration) { - *out = *in - if in.MaxUnavailable != nil { - in, out := &in.MaxUnavailable, &out.MaxUnavailable - *out = new(intstr.IntOrString) - **out = **in - } - if in.MaxSurge != nil { - in, out := &in.MaxSurge, &out.MaxSurge - *out = new(intstr.IntOrString) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateConfiguration. -func (in *UpdateConfiguration) DeepCopy() *UpdateConfiguration { - if in == nil { - return nil - } - out := new(UpdateConfiguration) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/apis/machine/v1alpha1/zz_generated.defaults.go b/pkg/apis/machine/v1alpha1/zz_generated.defaults.go deleted file mode 100644 index dce68e638..000000000 --- a/pkg/apis/machine/v1alpha1/zz_generated.defaults.go +++ /dev/null @@ -1,21 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors -// -// SPDX-License-Identifier: Apache-2.0 - -// Code generated by defaulter-gen. DO NOT EDIT. - -package v1alpha1 - -import ( - runtime "k8s.io/apimachinery/pkg/runtime" -) - -// RegisterDefaults adds defaulters functions to the given scheme. -// Public to allow building arbitrary schemes. -// All generated defaulters are covering - they call all nested defaulters. -func RegisterDefaults(scheme *runtime.Scheme) error { - return nil -} diff --git a/pkg/apis/machine/zz_generated.deepcopy.go b/pkg/apis/machine/zz_generated.deepcopy.go deleted file mode 100644 index 90aa57743..000000000 --- a/pkg/apis/machine/zz_generated.deepcopy.go +++ /dev/null @@ -1,888 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors -// -// SPDX-License-Identifier: Apache-2.0 - -// Code generated by deepcopy-gen. DO NOT EDIT. - -package machine - -import ( - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" - intstr "k8s.io/apimachinery/pkg/util/intstr" -) - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ClassSpec) DeepCopyInto(out *ClassSpec) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClassSpec. -func (in *ClassSpec) DeepCopy() *ClassSpec { - if in == nil { - return nil - } - out := new(ClassSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - in.PreserveExpiryTime.DeepCopyInto(&out.PreserveExpiryTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CurrentStatus. -func (in *CurrentStatus) DeepCopy() *CurrentStatus { - if in == nil { - return nil - } - out := new(CurrentStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *InPlaceUpdateMachineDeployment) DeepCopyInto(out *InPlaceUpdateMachineDeployment) { - *out = *in - in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InPlaceUpdateMachineDeployment. -func (in *InPlaceUpdateMachineDeployment) DeepCopy() *InPlaceUpdateMachineDeployment { - if in == nil { - return nil - } - out := new(InPlaceUpdateMachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LastOperation) DeepCopyInto(out *LastOperation) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LastOperation. -func (in *LastOperation) DeepCopy() *LastOperation { - if in == nil { - return nil - } - out := new(LastOperation) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Machine) DeepCopyInto(out *Machine) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.TypeMeta = in.TypeMeta - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Machine. -func (in *Machine) DeepCopy() *Machine { - if in == nil { - return nil - } - out := new(Machine) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *Machine) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineClass) DeepCopyInto(out *MachineClass) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - if in.NodeTemplate != nil { - in, out := &in.NodeTemplate, &out.NodeTemplate - *out = new(NodeTemplate) - (*in).DeepCopyInto(*out) - } - if in.CredentialsSecretRef != nil { - in, out := &in.CredentialsSecretRef, &out.CredentialsSecretRef - *out = new(v1.SecretReference) - **out = **in - } - in.ProviderSpec.DeepCopyInto(&out.ProviderSpec) - if in.SecretRef != nil { - in, out := &in.SecretRef, &out.SecretRef - *out = new(v1.SecretReference) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClass. -func (in *MachineClass) DeepCopy() *MachineClass { - if in == nil { - return nil - } - out := new(MachineClass) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineClass) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineClassList) DeepCopyInto(out *MachineClassList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineClass, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClassList. -func (in *MachineClassList) DeepCopy() *MachineClassList { - if in == nil { - return nil - } - out := new(MachineClassList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineClassList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { - *out = *in - if in.MachineDrainTimeout != nil { - in, out := &in.MachineDrainTimeout, &out.MachineDrainTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineHealthTimeout != nil { - in, out := &in.MachineHealthTimeout, &out.MachineHealthTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineCreationTimeout != nil { - in, out := &in.MachineCreationTimeout, &out.MachineCreationTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachineInPlaceUpdateTimeout != nil { - in, out := &in.MachineInPlaceUpdateTimeout, &out.MachineInPlaceUpdateTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.MachinePreserveTimeout != nil { - in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout - *out = new(metav1.Duration) - **out = **in - } - if in.DisableHealthTimeout != nil { - in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout - *out = new(bool) - **out = **in - } - if in.MaxEvictRetries != nil { - in, out := &in.MaxEvictRetries, &out.MaxEvictRetries - *out = new(int32) - **out = **in - } - if in.NodeConditions != nil { - in, out := &in.NodeConditions, &out.NodeConditions - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineConfiguration. -func (in *MachineConfiguration) DeepCopy() *MachineConfiguration { - if in == nil { - return nil - } - out := new(MachineConfiguration) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeployment) DeepCopyInto(out *MachineDeployment) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeployment. -func (in *MachineDeployment) DeepCopy() *MachineDeployment { - if in == nil { - return nil - } - out := new(MachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineDeployment) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentCondition) DeepCopyInto(out *MachineDeploymentCondition) { - *out = *in - in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) - in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentCondition. -func (in *MachineDeploymentCondition) DeepCopy() *MachineDeploymentCondition { - if in == nil { - return nil - } - out := new(MachineDeploymentCondition) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentList) DeepCopyInto(out *MachineDeploymentList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineDeployment, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentList. -func (in *MachineDeploymentList) DeepCopy() *MachineDeploymentList { - if in == nil { - return nil - } - out := new(MachineDeploymentList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineDeploymentList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentRollback) DeepCopyInto(out *MachineDeploymentRollback) { - *out = *in - out.TypeMeta = in.TypeMeta - if in.UpdatedAnnotations != nil { - in, out := &in.UpdatedAnnotations, &out.UpdatedAnnotations - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - out.RollbackTo = in.RollbackTo - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentRollback. -func (in *MachineDeploymentRollback) DeepCopy() *MachineDeploymentRollback { - if in == nil { - return nil - } - out := new(MachineDeploymentRollback) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineDeploymentRollback) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { - *out = *in - if in.Selector != nil { - in, out := &in.Selector, &out.Selector - *out = new(metav1.LabelSelector) - (*in).DeepCopyInto(*out) - } - in.Template.DeepCopyInto(&out.Template) - in.Strategy.DeepCopyInto(&out.Strategy) - if in.RevisionHistoryLimit != nil { - in, out := &in.RevisionHistoryLimit, &out.RevisionHistoryLimit - *out = new(int32) - **out = **in - } - if in.RollbackTo != nil { - in, out := &in.RollbackTo, &out.RollbackTo - *out = new(RollbackConfig) - **out = **in - } - if in.ProgressDeadlineSeconds != nil { - in, out := &in.ProgressDeadlineSeconds, &out.ProgressDeadlineSeconds - *out = new(int32) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentSpec. -func (in *MachineDeploymentSpec) DeepCopy() *MachineDeploymentSpec { - if in == nil { - return nil - } - out := new(MachineDeploymentSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentStatus) DeepCopyInto(out *MachineDeploymentStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]MachineDeploymentCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.CollisionCount != nil { - in, out := &in.CollisionCount, &out.CollisionCount - *out = new(int32) - **out = **in - } - if in.FailedMachines != nil { - in, out := &in.FailedMachines, &out.FailedMachines - *out = make([]*MachineSummary, len(*in)) - for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = new(MachineSummary) - (*in).DeepCopyInto(*out) - } - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStatus. -func (in *MachineDeploymentStatus) DeepCopy() *MachineDeploymentStatus { - if in == nil { - return nil - } - out := new(MachineDeploymentStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineDeploymentStrategy) DeepCopyInto(out *MachineDeploymentStrategy) { - *out = *in - if in.RollingUpdate != nil { - in, out := &in.RollingUpdate, &out.RollingUpdate - *out = new(RollingUpdateMachineDeployment) - (*in).DeepCopyInto(*out) - } - if in.InPlaceUpdate != nil { - in, out := &in.InPlaceUpdate, &out.InPlaceUpdate - *out = new(InPlaceUpdateMachineDeployment) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStrategy. -func (in *MachineDeploymentStrategy) DeepCopy() *MachineDeploymentStrategy { - if in == nil { - return nil - } - out := new(MachineDeploymentStrategy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineList) DeepCopyInto(out *MachineList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]Machine, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineList. -func (in *MachineList) DeepCopy() *MachineList { - if in == nil { - return nil - } - out := new(MachineList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSet) DeepCopyInto(out *MachineSet) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.TypeMeta = in.TypeMeta - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSet. -func (in *MachineSet) DeepCopy() *MachineSet { - if in == nil { - return nil - } - out := new(MachineSet) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineSet) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetCondition) DeepCopyInto(out *MachineSetCondition) { - *out = *in - in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetCondition. -func (in *MachineSetCondition) DeepCopy() *MachineSetCondition { - if in == nil { - return nil - } - out := new(MachineSetCondition) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetList) DeepCopyInto(out *MachineSetList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineSet, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetList. -func (in *MachineSetList) DeepCopy() *MachineSetList { - if in == nil { - return nil - } - out := new(MachineSetList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineSetList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { - *out = *in - if in.Selector != nil { - in, out := &in.Selector, &out.Selector - *out = new(metav1.LabelSelector) - (*in).DeepCopyInto(*out) - } - out.MachineClass = in.MachineClass - in.Template.DeepCopyInto(&out.Template) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetSpec. -func (in *MachineSetSpec) DeepCopy() *MachineSetSpec { - if in == nil { - return nil - } - out := new(MachineSetSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]MachineSetCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - in.LastOperation.DeepCopyInto(&out.LastOperation) - if in.FailedMachines != nil { - in, out := &in.FailedMachines, &out.FailedMachines - *out = new([]MachineSummary) - if **in != nil { - in, out := *in, *out - *out = make([]MachineSummary, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetStatus. -func (in *MachineSetStatus) DeepCopy() *MachineSetStatus { - if in == nil { - return nil - } - out := new(MachineSetStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { - *out = *in - out.Class = in.Class - in.NodeTemplateSpec.DeepCopyInto(&out.NodeTemplateSpec) - if in.MachineConfiguration != nil { - in, out := &in.MachineConfiguration, &out.MachineConfiguration - *out = new(MachineConfiguration) - (*in).DeepCopyInto(*out) - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. -func (in *MachineSpec) DeepCopy() *MachineSpec { - if in == nil { - return nil - } - out := new(MachineSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineStatus) DeepCopyInto(out *MachineStatus) { - *out = *in - if in.Addresses != nil { - in, out := &in.Addresses, &out.Addresses - *out = make([]v1.NodeAddress, len(*in)) - copy(*out, *in) - } - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]v1.NodeCondition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - in.LastOperation.DeepCopyInto(&out.LastOperation) - in.CurrentStatus.DeepCopyInto(&out.CurrentStatus) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineStatus. -func (in *MachineStatus) DeepCopy() *MachineStatus { - if in == nil { - return nil - } - out := new(MachineStatus) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineSummary) DeepCopyInto(out *MachineSummary) { - *out = *in - in.LastOperation.DeepCopyInto(&out.LastOperation) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSummary. -func (in *MachineSummary) DeepCopy() *MachineSummary { - if in == nil { - return nil - } - out := new(MachineSummary) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineTemplate) DeepCopyInto(out *MachineTemplate) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Template.DeepCopyInto(&out.Template) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplate. -func (in *MachineTemplate) DeepCopy() *MachineTemplate { - if in == nil { - return nil - } - out := new(MachineTemplate) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineTemplate) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineTemplateList) DeepCopyInto(out *MachineTemplateList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]MachineTemplate, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateList. -func (in *MachineTemplateList) DeepCopy() *MachineTemplateList { - if in == nil { - return nil - } - out := new(MachineTemplateList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *MachineTemplateList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *MachineTemplateSpec) DeepCopyInto(out *MachineTemplateSpec) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateSpec. -func (in *MachineTemplateSpec) DeepCopy() *MachineTemplateSpec { - if in == nil { - return nil - } - out := new(MachineTemplateSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NodeTemplate) DeepCopyInto(out *NodeTemplate) { - *out = *in - if in.Capacity != nil { - in, out := &in.Capacity, &out.Capacity - *out = make(v1.ResourceList, len(*in)) - for key, val := range *in { - (*out)[key] = val.DeepCopy() - } - } - if in.VirtualCapacity != nil { - in, out := &in.VirtualCapacity, &out.VirtualCapacity - *out = make(v1.ResourceList, len(*in)) - for key, val := range *in { - (*out)[key] = val.DeepCopy() - } - } - if in.Architecture != nil { - in, out := &in.Architecture, &out.Architecture - *out = new(string) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplate. -func (in *NodeTemplate) DeepCopy() *NodeTemplate { - if in == nil { - return nil - } - out := new(NodeTemplate) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NodeTemplateSpec) DeepCopyInto(out *NodeTemplateSpec) { - *out = *in - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplateSpec. -func (in *NodeTemplateSpec) DeepCopy() *NodeTemplateSpec { - if in == nil { - return nil - } - out := new(NodeTemplateSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RollbackConfig) DeepCopyInto(out *RollbackConfig) { - *out = *in - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollbackConfig. -func (in *RollbackConfig) DeepCopy() *RollbackConfig { - if in == nil { - return nil - } - out := new(RollbackConfig) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RollingUpdateMachineDeployment) DeepCopyInto(out *RollingUpdateMachineDeployment) { - *out = *in - in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateMachineDeployment. -func (in *RollingUpdateMachineDeployment) DeepCopy() *RollingUpdateMachineDeployment { - if in == nil { - return nil - } - out := new(RollingUpdateMachineDeployment) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *UpdateConfiguration) DeepCopyInto(out *UpdateConfiguration) { - *out = *in - if in.MaxUnavailable != nil { - in, out := &in.MaxUnavailable, &out.MaxUnavailable - *out = new(intstr.IntOrString) - **out = **in - } - if in.MaxSurge != nil { - in, out := &in.MaxSurge, &out.MaxSurge - *out = new(intstr.IntOrString) - **out = **in - } - return -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateConfiguration. -func (in *UpdateConfiguration) DeepCopy() *UpdateConfiguration { - if in == nil { - return nil - } - out := new(UpdateConfiguration) - in.DeepCopyInto(out) - return out -} diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index e44505393..6d352ca48 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -25,6 +25,7 @@ package controller import ( "context" "fmt" + "k8s.io/utils/ptr" "reflect" "k8s.io/klog/v2" @@ -160,7 +161,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al newStatus.ReadyReplicas = int32(readyReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.AvailableReplicas = int32(availableReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.LastOperation.LastUpdateTime = metav1.Now() - newStatus.AutoPreserveFailedMachineCount = int32(autoPreserveFailedMachineCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 + newStatus.AutoPreserveFailedMachineCount = ptr.To(int32(autoPreserveFailedMachineCount)) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 return newStatus } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 66a9492fd..9e9b03f62 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -497,7 +497,7 @@ func (c *controller) isMachineCandidateForPreservation(ctx context.Context, mach return false, nil } } - if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { + if *machineSet.Status.AutoPreserveFailedMachineCount < *machineSet.Spec.AutoPreserveFailedMachineMax { err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { return true, err diff --git a/pkg/openapi/api_violations.report b/pkg/openapi/api_violations.report index 5cb955c9e..0861c8d4c 100644 --- a/pkg/openapi/api_violations.report +++ b/pkg/openapi/api_violations.report @@ -7,7 +7,6 @@ API rule violation: names_match,github.com/gardener/machine-controller-manager/p API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineDrainTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineHealthTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachineInPlaceUpdateTimeout -API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineConfiguration,MachinePreserveTimeout API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSetStatus,Conditions API rule violation: names_match,github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1,MachineSpec,NodeTemplateSpec API rule violation: names_match,k8s.io/api/core/v1,AzureDiskVolumeSource,DataDiskURI diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index b0cd12043..0db94f5b6 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -688,7 +688,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineConfiguration(ref common.ReferenceC Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, - "preserveTimeout": { + "machinePreserveTimeout": { SchemaProps: spec.SchemaProps{ Description: "MachinePreserveTimeout is the timeout after which the machine preservation is stopped", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), @@ -1494,7 +1494,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineSpec(ref common.ReferenceCallback) Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, - "preserveTimeout": { + "machinePreserveTimeout": { SchemaProps: spec.SchemaProps{ Description: "MachinePreserveTimeout is the timeout after which the machine preservation is stopped", Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 10eaad4de..62a6b7ed1 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4036,9 +4036,9 @@ var _ = Describe("machine", func() { machine.Status.CurrentStatus.Phase = tc.setup.phase } if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueNow || tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - machine.Status.CurrentStatus.PreserveExpiryTime = metav1.NewTime(metav1.Now().Add(1 * time.Hour)) + machine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)} } else if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && machineutils.IsMachineFailed(machine) { - machine.Status.CurrentStatus.PreserveExpiryTime = metav1.NewTime(metav1.Now().Add(1 * time.Hour)) + machine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)} } controlMachineObjects := []runtime.Object{machine} @@ -4273,7 +4273,7 @@ var _ = Describe("machine", func() { nodeAnnotationValue string nodeName string machinePhase v1alpha1.MachinePhase - preserveExpiryTime metav1.Time + preserveExpiryTime *metav1.Time } type expect struct { retry machineutils.RetryPeriod @@ -4450,7 +4450,7 @@ var _ = Describe("machine", func() { nodeAnnotationValue: "false", nodeName: "node-1", machinePhase: v1alpha1.MachineRunning, - preserveExpiryTime: metav1.NewTime(metav1.Now().Add(1 * time.Hour)), + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4479,7 +4479,7 @@ var _ = Describe("machine", func() { nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, nodeName: "node-1", machinePhase: v1alpha1.MachineRunning, - preserveExpiryTime: metav1.NewTime(metav1.Now().Add(-1 * time.Minute)), + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, }, expect: expect{ preserveExpiryTimeIsSet: false, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index ceea0339a..fe8bdaa97 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1305,7 +1305,7 @@ func (c *controller) setMachineTerminationStatus(ctx context.Context, deleteMach Phase: v1alpha1.MachineTerminating, // TimeoutActive: false, LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.Time{}, + PreserveExpiryTime: nil, } _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) @@ -2439,7 +2439,7 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine Phase: machine.Status.CurrentStatus.Phase, TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)), + PreserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)}, } machine.Status.CurrentStatus = preservedCurrentStatus @@ -2565,7 +2565,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } // Step 3: update machine status to set preserve expiry time to metav1.Time{} clone := machine.DeepCopy() - clone.Status.CurrentStatus.PreserveExpiryTime = metav1.Time{} + clone.Status.CurrentStatus.PreserveExpiryTime = nil clone.Status.CurrentStatus.LastUpdateTime = metav1.Now() _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 87581bda6..7e25d5ed2 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -3961,7 +3961,6 @@ var _ = Describe("machine_util", func() { Describe("#preserveMachine", func() { type setup struct { machinePhase machinev1.MachinePhase - preserveExpiryTime metav1.Time nodeName string preserveValue string isCAAnnotationPresent bool @@ -3998,7 +3997,7 @@ var _ = Describe("machine_util", func() { CurrentStatus: machinev1.CurrentStatus{ Phase: tc.setup.machinePhase, LastUpdateTime: metav1.Now(), - PreserveExpiryTime: tc.setup.preserveExpiryTime, + PreserveExpiryTime: nil, }, }, } @@ -4244,7 +4243,7 @@ var _ = Describe("machine_util", func() { CurrentStatus: machinev1.CurrentStatus{ Phase: machinev1.MachineFailed, LastUpdateTime: metav1.Now(), - PreserveExpiryTime: metav1.NewTime(time.Now().Add(10 * time.Minute)), + PreserveExpiryTime: &metav1.Time{Time: time.Now().Add(10 * time.Minute)}, }, }, } From 1d9e0bad6a685cc62090bccb01800d4ad6821a7b Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 8 Jan 2026 12:24:57 +0530 Subject: [PATCH 41/79] Fix Makefile --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6b0f4f913..aba0236ac 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @GOFLAGS="-buildvcs=false" ./hack/generate-code - @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh + $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @./hack/generate-code + @./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) From 8790b32654b3cfdb925a9e4588a090958e1819b2 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 8 Jan 2026 13:54:32 +0530 Subject: [PATCH 42/79] Add crds --- .../machine.sapcloud.io_machineclasses.yaml | 127 ++ ...achine.sapcloud.io_machinedeployments.yaml | 562 ++++++++ .../crds/machine.sapcloud.io_machines.yaml | 333 +++++ .../crds/machine.sapcloud.io_machinesets.yaml | 447 +++++++ .../v1alpha1/zz_generated.conversion.go | 1179 +++++++++++++++++ .../machine/v1alpha1/zz_generated.deepcopy.go | 813 ++++++++++++ .../machine/v1alpha1/zz_generated.defaults.go | 21 + pkg/apis/machine/zz_generated.deepcopy.go | 906 +++++++++++++ 8 files changed, 4388 insertions(+) create mode 100644 kubernetes/crds/machine.sapcloud.io_machineclasses.yaml create mode 100644 kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml create mode 100644 kubernetes/crds/machine.sapcloud.io_machines.yaml create mode 100644 kubernetes/crds/machine.sapcloud.io_machinesets.yaml create mode 100644 pkg/apis/machine/v1alpha1/zz_generated.conversion.go create mode 100644 pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go create mode 100644 pkg/apis/machine/v1alpha1/zz_generated.defaults.go create mode 100644 pkg/apis/machine/zz_generated.deepcopy.go diff --git a/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml b/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml new file mode 100644 index 000000000..f0cd9d515 --- /dev/null +++ b/kubernetes/crds/machine.sapcloud.io_machineclasses.yaml @@ -0,0 +1,127 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: machineclasses.machine.sapcloud.io +spec: + group: machine.sapcloud.io + names: + kind: MachineClass + listKind: MachineClassList + plural: machineclasses + shortNames: + - mcc + singular: machineclass + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + MachineClass can be used to templatize and re-use provider configuration + across multiple Machines / MachineSets / MachineDeployments. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + credentialsSecretRef: + description: |- + CredentialsSecretRef can optionally store the credentials (in this case the SecretRef does not need to store them). + This might be useful if multiple machine classes with the same credentials but different user-datas are used. + properties: + name: + description: name is unique within a namespace to reference a secret + resource. + type: string + namespace: + description: namespace defines the space within which the secret name + must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + nodeTemplate: + description: NodeTemplate contains subfields to track all node resources + and other node info required to scale nodegroup from zero + properties: + architecture: + description: CPU Architecture of the node belonging to nodeGroup + type: string + capacity: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Capacity contains subfields to track all node resources + required to scale nodegroup from zero + type: object + instanceType: + description: Instance type of the node belonging to nodeGroup + type: string + region: + description: Region of the expected node belonging to nodeGroup + type: string + virtualCapacity: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: VirtualCapacity represents the expected Node 'virtual' + capacity ie comprising virtual extended resources. + type: object + zone: + description: Zone of the expected node belonging to nodeGroup + type: string + required: + - capacity + - instanceType + - region + - zone + type: object + x-kubernetes-preserve-unknown-fields: true + provider: + description: Provider is the combination of name and location of cloud-specific + drivers. + type: string + providerSpec: + description: Provider-specific configuration to use during node creation. + type: object + x-kubernetes-preserve-unknown-fields: true + secretRef: + description: SecretRef stores the necessary secrets such as credentials + or userdata. + properties: + name: + description: name is unique within a namespace to reference a secret + resource. + type: string + namespace: + description: namespace defines the space within which the secret name + must be unique. + type: string + type: object + x-kubernetes-map-type: atomic + required: + - providerSpec + type: object + served: true + storage: true diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml new file mode 100644 index 000000000..abb36d1c4 --- /dev/null +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -0,0 +1,562 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: machinedeployments.machine.sapcloud.io +spec: + group: machine.sapcloud.io + names: + kind: MachineDeployment + listKind: MachineDeploymentList + plural: machinedeployments + shortNames: + - mcd + singular: machinedeployment + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Total number of ready machines targeted by this machine deployment. + jsonPath: .status.readyReplicas + name: Ready + type: integer + - description: Number of desired machines. + jsonPath: .spec.replicas + name: Desired + type: integer + - description: Total number of non-terminated machines targeted by this machine + deployment that have the desired template spec. + jsonPath: .status.updatedReplicas + name: Up-to-date + type: integer + - description: Total number of available machines (ready for at least minReadySeconds) + targeted by this machine deployment. + jsonPath: .status.availableReplicas + name: Available + type: integer + - description: |- + CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. + Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: MachineDeployment enables declarative updates for machines and + MachineSets. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Specification of the desired behavior of the MachineDeployment. + properties: + autoPreserveFailedMachineMax: + description: |- + The maximum number of machines in the machine deployment that will be auto-preserved. + In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments + format: int32 + type: integer + minReadySeconds: + description: |- + Minimum number of seconds for which a newly created machine should be ready + without any of its container crashing, for it to be considered available. + Defaults to 0 (machine will be considered available as soon as it is ready) + format: int32 + type: integer + paused: + description: |- + Indicates that the MachineDeployment is paused and will not be processed by the + MachineDeployment controller. + type: boolean + progressDeadlineSeconds: + description: |- + The maximum time in seconds for a MachineDeployment to make progress before it + is considered to be failed. The MachineDeployment controller will continue to + process failed MachineDeployments and a condition with a ProgressDeadlineExceeded + reason will be surfaced in the MachineDeployment status. Note that progress will + not be estimated during the time a MachineDeployment is paused. This is not set + by default, which is treated as infinite deadline. + format: int32 + type: integer + replicas: + description: |- + Number of desired machines. This is a pointer to distinguish between explicit + zero and not specified. Defaults to 0. + format: int32 + type: integer + revisionHistoryLimit: + description: |- + The number of old MachineSets to retain to allow rollback. + This is a pointer to distinguish between explicit zero and not specified. + format: int32 + type: integer + rollbackTo: + description: |- + DEPRECATED. + The config this MachineDeployment is rolling back to. Will be cleared after rollback is done. + properties: + revision: + description: The revision to rollback to. If set to 0, rollback + to the last revision. + format: int64 + type: integer + type: object + selector: + description: |- + Label selector for machines. Existing MachineSets whose machines are + selected by this will be the ones affected by this MachineDeployment. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + strategy: + description: The MachineDeployment strategy to use to replace existing + machines with new ones. + properties: + inPlaceUpdate: + description: |- + InPlaceUpdate update config params. Present only if MachineDeploymentStrategyType = + InPlaceUpdate. + properties: + maxSurge: + anyOf: + - type: integer + - type: string + description: |- + The maximum number of machines that can be scheduled above the desired number of + machines. + Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). + This can not be 0 if MaxUnavailable is 0. + Absolute number is calculated from percentage by rounding up. + Example: when this is set to 30%, the new machine set can be scaled up immediately when + the update starts, such that the total number of old and new machines does not exceed + 130% of desired machines. Once old machines have been killed, + new machine set can be scaled up further, ensuring that total number of machines running + at any time during the update is utmost 130% of desired machines. + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + description: |- + The maximum number of machines that can be unavailable during the update. + Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). + Absolute number is calculated from percentage by rounding down. + This can not be 0 if MaxSurge is 0. + Example: when this is set to 30%, the old machine set can be scaled down to 70% of desired machines + immediately when the update starts. Once new machines are ready, old machine set + can be scaled down further, followed by scaling up the new machine set, ensuring + that the total number of machines available at all times during the update is at + least 70% of desired machines. + x-kubernetes-int-or-string: true + orchestrationType: + description: OrchestrationType specifies the orchestration + type for the inplace update. + type: string + type: object + rollingUpdate: + description: |- + Rolling update config params. Present only if MachineDeploymentStrategyType = + RollingUpdate. + properties: + maxSurge: + anyOf: + - type: integer + - type: string + description: |- + The maximum number of machines that can be scheduled above the desired number of + machines. + Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). + This can not be 0 if MaxUnavailable is 0. + Absolute number is calculated from percentage by rounding up. + Example: when this is set to 30%, the new machine set can be scaled up immediately when + the update starts, such that the total number of old and new machines does not exceed + 130% of desired machines. Once old machines have been killed, + new machine set can be scaled up further, ensuring that total number of machines running + at any time during the update is utmost 130% of desired machines. + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + description: |- + The maximum number of machines that can be unavailable during the update. + Value can be an absolute number (ex: 5) or a percentage of desired machines (ex: 10%). + Absolute number is calculated from percentage by rounding down. + This can not be 0 if MaxSurge is 0. + Example: when this is set to 30%, the old machine set can be scaled down to 70% of desired machines + immediately when the update starts. Once new machines are ready, old machine set + can be scaled down further, followed by scaling up the new machine set, ensuring + that the total number of machines available at all times during the update is at + least 70% of desired machines. + x-kubernetes-int-or-string: true + type: object + type: + description: Type of MachineDeployment. Can be "Recreate" or "RollingUpdate". + Default is RollingUpdate. + type: string + type: object + template: + description: Template describes the machines that will be created. + properties: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + type: object + x-kubernetes-preserve-unknown-fields: true + spec: + description: |- + Specification of the desired behavior of the machine. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + class: + description: Class contains the machineclass attributes of + a machine + properties: + apiGroup: + description: API group to which it belongs + type: string + kind: + description: Kind for machine class + type: string + name: + description: Name of machine class + type: string + type: object + creationTimeout: + description: MachineCreationTimeout is the timeout after which + machinie creation is declared failed. + type: string + disableHealthTimeout: + description: |- + DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. + This is intended to be used only for in-place updates. + type: boolean + drainTimeout: + description: MachineDraintimeout is the timeout after which + machine is forcefully deleted. + type: string + healthTimeout: + description: MachineHealthTimeout is the timeout after which + machine is declared unhealhty/failed. + type: string + inPlaceUpdateTimeout: + description: MachineInPlaceUpdateTimeout is the timeout after + which in-place update is declared failed. + type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped + type: string + maxEvictRetries: + description: MaxEvictRetries is the number of retries that + will be attempted while draining the node. + format: int32 + type: integer + nodeConditions: + description: NodeConditions are the set of conditions if set + to true for MachineHealthTimeOut, machine will be declared + failed. + type: string + nodeTemplate: + description: NodeTemplateSpec describes the data a node should + have when created from a template + properties: + metadata: + type: object + x-kubernetes-preserve-unknown-fields: true + spec: + description: NodeSpec describes the attributes that a + node is created with. + properties: + configSource: + description: 'Deprecated: Previously used to specify + the source of the node''s configuration for the + DynamicKubeletConfig feature. This feature is removed.' + properties: + configMap: + description: ConfigMap is a reference to a Node's + ConfigMap + properties: + kubeletConfigKey: + description: |- + KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure + This field is required in all cases. + type: string + name: + description: |- + Name is the metadata.name of the referenced ConfigMap. + This field is required in all cases. + type: string + namespace: + description: |- + Namespace is the metadata.namespace of the referenced ConfigMap. + This field is required in all cases. + type: string + resourceVersion: + description: |- + ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + uid: + description: |- + UID is the metadata.UID of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + required: + - kubeletConfigKey + - name + - namespace + type: object + type: object + externalID: + description: |- + Deprecated. Not all kubelets will set this field. Remove field after 1.13. + see: https://issues.k8s.io/61966 + type: string + podCIDR: + description: PodCIDR represents the pod IP range assigned + to the node. + type: string + podCIDRs: + description: |- + podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this + field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for + each of IPv4 and IPv6. + items: + type: string + type: array + x-kubernetes-list-type: set + providerID: + description: 'ID of the node assigned by the cloud + provider in the format: ://' + type: string + taints: + description: If specified, the node's taints. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied + to a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to + the taint key. + type: string + required: + - effect + - key + type: object + type: array + x-kubernetes-list-type: atomic + unschedulable: + description: |- + Unschedulable controls node schedulability of new pods. By default, node is schedulable. + More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration + type: boolean + type: object + type: object + providerID: + description: ProviderID represents the provider's unique ID + given to a machine + type: string + type: object + type: object + required: + - template + type: object + status: + description: Most recently observed status of the MachineDeployment. + properties: + availableReplicas: + description: Total number of available machines (ready for at least + minReadySeconds) targeted by this MachineDeployment. + format: int32 + type: integer + collisionCount: + description: |- + Count of hash collisions for the MachineDeployment. The MachineDeployment controller uses this + field as a collision avoidance mechanism when it needs to create the name for the + newest MachineSet. + format: int32 + type: integer + conditions: + description: Represents the latest available observations of a MachineDeployment's + current state. + items: + description: MachineDeploymentCondition describes the state of a + MachineDeployment at a certain point. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. + format: date-time + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of MachineDeployment condition. + type: string + required: + - status + - type + type: object + type: array + failedMachines: + description: FailedMachines has summary of machines on which lastOperation + Failed + items: + description: MachineSummary store the summary of machine. + properties: + lastOperation: + description: Last operation refers to the status of the last + operation performed + properties: + description: + description: Description of the current operation + type: string + errorCode: + description: ErrorCode of the current operation if any + type: string + lastUpdateTime: + description: Last update time of current operation + format: date-time + type: string + state: + description: State of operation + type: string + type: + description: Type of operation + type: string + type: object + name: + description: Name of the machine object + type: string + ownerRef: + description: OwnerRef + type: string + providerID: + description: ProviderID represents the provider's unique ID + given to a machine + type: string + type: object + type: array + observedGeneration: + description: The generation observed by the MachineDeployment controller. + format: int64 + type: integer + readyReplicas: + description: Total number of ready machines targeted by this MachineDeployment. + format: int32 + type: integer + replicas: + description: Total number of non-terminated machines targeted by this + MachineDeployment (their labels match the selector). + format: int32 + type: integer + unavailableReplicas: + description: |- + Total number of unavailable machines targeted by this MachineDeployment. This is the total number of + machines that are still required for the MachineDeployment to have 100% available capacity. They may + either be machines that are running but not yet available or machines that still have not been created. + format: int32 + type: integer + updatedReplicas: + description: Total number of non-terminated machines targeted by this + MachineDeployment that have the desired template spec. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml new file mode 100644 index 000000000..fcea16750 --- /dev/null +++ b/kubernetes/crds/machine.sapcloud.io_machines.yaml @@ -0,0 +1,333 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: machines.machine.sapcloud.io +spec: + group: machine.sapcloud.io + names: + kind: Machine + listKind: MachineList + plural: machines + shortNames: + - mc + singular: machine + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Current status of the machine. + jsonPath: .status.currentStatus.phase + name: Status + type: string + - description: |- + CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. + Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + jsonPath: .metadata.creationTimestamp + name: Age + type: date + - description: Node backing the machine object + jsonPath: .metadata.labels.node + name: Node + type: string + - description: ProviderID of the infra instance backing the machine object + jsonPath: .spec.providerID + name: ProviderID + priority: 1 + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: Machine is the representation of a physical or virtual machine. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec contains the specification of the machine + properties: + class: + description: Class contains the machineclass attributes of a machine + properties: + apiGroup: + description: API group to which it belongs + type: string + kind: + description: Kind for machine class + type: string + name: + description: Name of machine class + type: string + type: object + creationTimeout: + description: MachineCreationTimeout is the timeout after which machinie + creation is declared failed. + type: string + disableHealthTimeout: + description: |- + DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. + This is intended to be used only for in-place updates. + type: boolean + drainTimeout: + description: MachineDraintimeout is the timeout after which machine + is forcefully deleted. + type: string + healthTimeout: + description: MachineHealthTimeout is the timeout after which machine + is declared unhealhty/failed. + type: string + inPlaceUpdateTimeout: + description: MachineInPlaceUpdateTimeout is the timeout after which + in-place update is declared failed. + type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which the + machine preservation is stopped + type: string + maxEvictRetries: + description: MaxEvictRetries is the number of retries that will be + attempted while draining the node. + format: int32 + type: integer + nodeConditions: + description: NodeConditions are the set of conditions if set to true + for MachineHealthTimeOut, machine will be declared failed. + type: string + nodeTemplate: + description: NodeTemplateSpec describes the data a node should have + when created from a template + properties: + metadata: + type: object + x-kubernetes-preserve-unknown-fields: true + spec: + description: NodeSpec describes the attributes that a node is + created with. + properties: + configSource: + description: 'Deprecated: Previously used to specify the source + of the node''s configuration for the DynamicKubeletConfig + feature. This feature is removed.' + properties: + configMap: + description: ConfigMap is a reference to a Node's ConfigMap + properties: + kubeletConfigKey: + description: |- + KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure + This field is required in all cases. + type: string + name: + description: |- + Name is the metadata.name of the referenced ConfigMap. + This field is required in all cases. + type: string + namespace: + description: |- + Namespace is the metadata.namespace of the referenced ConfigMap. + This field is required in all cases. + type: string + resourceVersion: + description: |- + ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + uid: + description: |- + UID is the metadata.UID of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + required: + - kubeletConfigKey + - name + - namespace + type: object + type: object + externalID: + description: |- + Deprecated. Not all kubelets will set this field. Remove field after 1.13. + see: https://issues.k8s.io/61966 + type: string + podCIDR: + description: PodCIDR represents the pod IP range assigned + to the node. + type: string + podCIDRs: + description: |- + podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this + field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for + each of IPv4 and IPv6. + items: + type: string + type: array + x-kubernetes-list-type: set + providerID: + description: 'ID of the node assigned by the cloud provider + in the format: ://' + type: string + taints: + description: If specified, the node's taints. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + type: array + x-kubernetes-list-type: atomic + unschedulable: + description: |- + Unschedulable controls node schedulability of new pods. By default, node is schedulable. + More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration + type: boolean + type: object + type: object + providerID: + description: ProviderID represents the provider's unique ID given + to a machine + type: string + type: object + status: + description: Status contains fields depicting the status + properties: + addresses: + description: |- + Addresses of this machines. This field is only present if the MCM provider runs without a target cluster and may + be used by clients to determine how to connect to the machine, instead of the `Node.status.addresses` field. + items: + description: NodeAddress contains information for the node's address. + properties: + address: + description: The node address. + type: string + type: + description: Node address type, one of Hostname, ExternalIP + or InternalIP. + type: string + required: + - address + - type + type: object + type: array + conditions: + description: Conditions of this machine, same as node + items: + description: NodeCondition contains condition information for a + node. + properties: + lastHeartbeatTime: + description: Last time we got an update on a given condition. + format: date-time + type: string + lastTransitionTime: + description: Last time the condition transit from one status + to another. + format: date-time + type: string + message: + description: Human readable message indicating details about + last transition. + type: string + reason: + description: (brief) reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of node condition. + type: string + required: + - status + - type + type: object + type: array + currentStatus: + description: Current status of the machine object + properties: + lastUpdateTime: + description: Last update time of current status + format: date-time + type: string + phase: + description: MachinePhase is a label for the condition of a machine + at the current time. + type: string + preserveExpiryTime: + description: PreserveExpiryTime is the time at which MCM will + stop preserving the machine + format: date-time + type: string + timeoutActive: + type: boolean + type: object + lastKnownState: + description: |- + LastKnownState can store details of the last known state of the VM by the plugins. + It can be used by future operation calls to determine current infrastucture state + type: string + lastOperation: + description: Last operation refers to the status of the last operation + performed + properties: + description: + description: Description of the current operation + type: string + errorCode: + description: ErrorCode of the current operation if any + type: string + lastUpdateTime: + description: Last update time of current operation + format: date-time + type: string + state: + description: State of operation + type: string + type: + description: Type of operation + type: string + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml new file mode 100644 index 000000000..46445131f --- /dev/null +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -0,0 +1,447 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: machinesets.machine.sapcloud.io +spec: + group: machine.sapcloud.io + names: + kind: MachineSet + listKind: MachineSetList + plural: machinesets + shortNames: + - mcs + singular: machineset + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Number of desired replicas. + jsonPath: .spec.replicas + name: Desired + type: integer + - description: Number of actual replicas. + jsonPath: .status.replicas + name: Current + type: integer + - description: Number of ready replicas for this machine set. + jsonPath: .status.readyReplicas + name: Ready + type: integer + - description: |- + CreationTimestamp is a timestamp representing the server time when this object was created. It is not guaranteed to be set in happens-before order across separate operations. Clients may not set this value. It is represented in RFC3339 form and is in UTC. + Populated by the system. Read-only. Null for lists. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: MachineSet TODO + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MachineSetSpec is the specification of a MachineSet. + properties: + autoPreserveFailedMachineMax: + format: int32 + type: integer + machineClass: + description: ClassSpec is the class specification of machine + properties: + apiGroup: + description: API group to which it belongs + type: string + kind: + description: Kind for machine class + type: string + name: + description: Name of machine class + type: string + type: object + minReadySeconds: + format: int32 + type: integer + replicas: + format: int32 + type: integer + selector: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + template: + description: MachineTemplateSpec describes the data a machine should + have when created from a template + properties: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + type: object + x-kubernetes-preserve-unknown-fields: true + spec: + description: |- + Specification of the desired behavior of the machine. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + class: + description: Class contains the machineclass attributes of + a machine + properties: + apiGroup: + description: API group to which it belongs + type: string + kind: + description: Kind for machine class + type: string + name: + description: Name of machine class + type: string + type: object + creationTimeout: + description: MachineCreationTimeout is the timeout after which + machinie creation is declared failed. + type: string + disableHealthTimeout: + description: |- + DisableHealthTimeout if set to true, health timeout will be ignored. Leading to machine never being declared failed. + This is intended to be used only for in-place updates. + type: boolean + drainTimeout: + description: MachineDraintimeout is the timeout after which + machine is forcefully deleted. + type: string + healthTimeout: + description: MachineHealthTimeout is the timeout after which + machine is declared unhealhty/failed. + type: string + inPlaceUpdateTimeout: + description: MachineInPlaceUpdateTimeout is the timeout after + which in-place update is declared failed. + type: string + machinePreserveTimeout: + description: MachinePreserveTimeout is the timeout after which + the machine preservation is stopped + type: string + maxEvictRetries: + description: MaxEvictRetries is the number of retries that + will be attempted while draining the node. + format: int32 + type: integer + nodeConditions: + description: NodeConditions are the set of conditions if set + to true for MachineHealthTimeOut, machine will be declared + failed. + type: string + nodeTemplate: + description: NodeTemplateSpec describes the data a node should + have when created from a template + properties: + metadata: + type: object + x-kubernetes-preserve-unknown-fields: true + spec: + description: NodeSpec describes the attributes that a + node is created with. + properties: + configSource: + description: 'Deprecated: Previously used to specify + the source of the node''s configuration for the + DynamicKubeletConfig feature. This feature is removed.' + properties: + configMap: + description: ConfigMap is a reference to a Node's + ConfigMap + properties: + kubeletConfigKey: + description: |- + KubeletConfigKey declares which key of the referenced ConfigMap corresponds to the KubeletConfiguration structure + This field is required in all cases. + type: string + name: + description: |- + Name is the metadata.name of the referenced ConfigMap. + This field is required in all cases. + type: string + namespace: + description: |- + Namespace is the metadata.namespace of the referenced ConfigMap. + This field is required in all cases. + type: string + resourceVersion: + description: |- + ResourceVersion is the metadata.ResourceVersion of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + uid: + description: |- + UID is the metadata.UID of the referenced ConfigMap. + This field is forbidden in Node.Spec, and required in Node.Status. + type: string + required: + - kubeletConfigKey + - name + - namespace + type: object + type: object + externalID: + description: |- + Deprecated. Not all kubelets will set this field. Remove field after 1.13. + see: https://issues.k8s.io/61966 + type: string + podCIDR: + description: PodCIDR represents the pod IP range assigned + to the node. + type: string + podCIDRs: + description: |- + podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this + field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for + each of IPv4 and IPv6. + items: + type: string + type: array + x-kubernetes-list-type: set + providerID: + description: 'ID of the node assigned by the cloud + provider in the format: ://' + type: string + taints: + description: If specified, the node's taints. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied + to a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to + the taint key. + type: string + required: + - effect + - key + type: object + type: array + x-kubernetes-list-type: atomic + unschedulable: + description: |- + Unschedulable controls node schedulability of new pods. By default, node is schedulable. + More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration + type: boolean + type: object + type: object + providerID: + description: ProviderID represents the provider's unique ID + given to a machine + type: string + type: object + type: object + type: object + status: + description: MachineSetStatus holds the most recently observed status + of MachineSet. + properties: + autoPreserveFailedMachineCount: + description: AutoPreserveFailedMachineCount has a count of the number + of failed machines in the machineset that have been auto-preserved + format: int32 + type: integer + availableReplicas: + description: The number of available replicas (ready for at least + minReadySeconds) for this replica set. + format: int32 + type: integer + failedMachines: + description: FailedMachines has summary of machines on which lastOperation + Failed + items: + description: MachineSummary store the summary of machine. + properties: + lastOperation: + description: Last operation refers to the status of the last + operation performed + properties: + description: + description: Description of the current operation + type: string + errorCode: + description: ErrorCode of the current operation if any + type: string + lastUpdateTime: + description: Last update time of current operation + format: date-time + type: string + state: + description: State of operation + type: string + type: + description: Type of operation + type: string + type: object + name: + description: Name of the machine object + type: string + ownerRef: + description: OwnerRef + type: string + providerID: + description: ProviderID represents the provider's unique ID + given to a machine + type: string + type: object + type: array + fullyLabeledReplicas: + description: The number of pods that have labels matching the labels + of the pod template of the replicaset. + format: int32 + type: integer + lastOperation: + description: LastOperation performed + properties: + description: + description: Description of the current operation + type: string + errorCode: + description: ErrorCode of the current operation if any + type: string + lastUpdateTime: + description: Last update time of current operation + format: date-time + type: string + state: + description: State of operation + type: string + type: + description: Type of operation + type: string + type: object + machineSetCondition: + description: Represents the latest available observations of a replica + set's current state. + items: + description: MachineSetCondition describes the state of a machine + set at a certain point. + properties: + lastTransitionTime: + description: The last time the condition transitioned from one + status to another. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of machine set condition. + type: string + required: + - status + - type + type: object + type: array + observedGeneration: + description: ObservedGeneration is the most recent generation observed + by the controller. + format: int64 + type: integer + readyReplicas: + description: The number of ready replicas for this replica set. + format: int32 + type: integer + replicas: + description: Replicas is the number of actual replicas. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go new file mode 100644 index 000000000..d503e95ae --- /dev/null +++ b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -0,0 +1,1179 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by conversion-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + unsafe "unsafe" + + machine "github.com/gardener/machine-controller-manager/pkg/apis/machine" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + conversion "k8s.io/apimachinery/pkg/conversion" + runtime "k8s.io/apimachinery/pkg/runtime" + intstr "k8s.io/apimachinery/pkg/util/intstr" +) + +func init() { + localSchemeBuilder.Register(RegisterConversions) +} + +// RegisterConversions adds conversion functions to the given scheme. +// Public to allow building arbitrary schemes. +func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*ClassSpec)(nil), (*machine.ClassSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(a.(*ClassSpec), b.(*machine.ClassSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.ClassSpec)(nil), (*ClassSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(a.(*machine.ClassSpec), b.(*ClassSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*CurrentStatus)(nil), (*machine.CurrentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(a.(*CurrentStatus), b.(*machine.CurrentStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.CurrentStatus)(nil), (*CurrentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(a.(*machine.CurrentStatus), b.(*CurrentStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*InPlaceUpdateMachineDeployment)(nil), (*machine.InPlaceUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(a.(*InPlaceUpdateMachineDeployment), b.(*machine.InPlaceUpdateMachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.InPlaceUpdateMachineDeployment)(nil), (*InPlaceUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(a.(*machine.InPlaceUpdateMachineDeployment), b.(*InPlaceUpdateMachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*LastOperation)(nil), (*machine.LastOperation)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_LastOperation_To_machine_LastOperation(a.(*LastOperation), b.(*machine.LastOperation), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.LastOperation)(nil), (*LastOperation)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_LastOperation_To_v1alpha1_LastOperation(a.(*machine.LastOperation), b.(*LastOperation), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*Machine)(nil), (*machine.Machine)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_Machine_To_machine_Machine(a.(*Machine), b.(*machine.Machine), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.Machine)(nil), (*Machine)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_Machine_To_v1alpha1_Machine(a.(*machine.Machine), b.(*Machine), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineClass)(nil), (*machine.MachineClass)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineClass_To_machine_MachineClass(a.(*MachineClass), b.(*machine.MachineClass), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineClass)(nil), (*MachineClass)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineClass_To_v1alpha1_MachineClass(a.(*machine.MachineClass), b.(*MachineClass), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineClassList)(nil), (*machine.MachineClassList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineClassList_To_machine_MachineClassList(a.(*MachineClassList), b.(*machine.MachineClassList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineClassList)(nil), (*MachineClassList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineClassList_To_v1alpha1_MachineClassList(a.(*machine.MachineClassList), b.(*MachineClassList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineConfiguration)(nil), (*machine.MachineConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(a.(*MachineConfiguration), b.(*machine.MachineConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineConfiguration)(nil), (*MachineConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(a.(*machine.MachineConfiguration), b.(*MachineConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeployment)(nil), (*machine.MachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(a.(*MachineDeployment), b.(*machine.MachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeployment)(nil), (*MachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(a.(*machine.MachineDeployment), b.(*MachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeploymentCondition)(nil), (*machine.MachineDeploymentCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(a.(*MachineDeploymentCondition), b.(*machine.MachineDeploymentCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentCondition)(nil), (*MachineDeploymentCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(a.(*machine.MachineDeploymentCondition), b.(*MachineDeploymentCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeploymentList)(nil), (*machine.MachineDeploymentList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(a.(*MachineDeploymentList), b.(*machine.MachineDeploymentList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentList)(nil), (*MachineDeploymentList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(a.(*machine.MachineDeploymentList), b.(*MachineDeploymentList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeploymentSpec)(nil), (*machine.MachineDeploymentSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(a.(*MachineDeploymentSpec), b.(*machine.MachineDeploymentSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentSpec)(nil), (*MachineDeploymentSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(a.(*machine.MachineDeploymentSpec), b.(*MachineDeploymentSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeploymentStatus)(nil), (*machine.MachineDeploymentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(a.(*MachineDeploymentStatus), b.(*machine.MachineDeploymentStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentStatus)(nil), (*MachineDeploymentStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(a.(*machine.MachineDeploymentStatus), b.(*MachineDeploymentStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineDeploymentStrategy)(nil), (*machine.MachineDeploymentStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(a.(*MachineDeploymentStrategy), b.(*machine.MachineDeploymentStrategy), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineDeploymentStrategy)(nil), (*MachineDeploymentStrategy)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(a.(*machine.MachineDeploymentStrategy), b.(*MachineDeploymentStrategy), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineList)(nil), (*machine.MachineList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineList_To_machine_MachineList(a.(*MachineList), b.(*machine.MachineList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineList)(nil), (*MachineList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineList_To_v1alpha1_MachineList(a.(*machine.MachineList), b.(*MachineList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSet)(nil), (*machine.MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSet_To_machine_MachineSet(a.(*MachineSet), b.(*machine.MachineSet), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSet)(nil), (*MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSet_To_v1alpha1_MachineSet(a.(*machine.MachineSet), b.(*MachineSet), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSetCondition)(nil), (*machine.MachineSetCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(a.(*MachineSetCondition), b.(*machine.MachineSetCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSetCondition)(nil), (*MachineSetCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(a.(*machine.MachineSetCondition), b.(*MachineSetCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSetList)(nil), (*machine.MachineSetList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSetList_To_machine_MachineSetList(a.(*MachineSetList), b.(*machine.MachineSetList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSetList)(nil), (*MachineSetList)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSetList_To_v1alpha1_MachineSetList(a.(*machine.MachineSetList), b.(*MachineSetList), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSetSpec)(nil), (*machine.MachineSetSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(a.(*MachineSetSpec), b.(*machine.MachineSetSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSetSpec)(nil), (*MachineSetSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(a.(*machine.MachineSetSpec), b.(*MachineSetSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSetStatus)(nil), (*machine.MachineSetStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(a.(*MachineSetStatus), b.(*machine.MachineSetStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSetStatus)(nil), (*MachineSetStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(a.(*machine.MachineSetStatus), b.(*MachineSetStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSpec)(nil), (*machine.MachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(a.(*MachineSpec), b.(*machine.MachineSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSpec)(nil), (*MachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(a.(*machine.MachineSpec), b.(*MachineSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineStatus)(nil), (*machine.MachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(a.(*MachineStatus), b.(*machine.MachineStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineStatus)(nil), (*MachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(a.(*machine.MachineStatus), b.(*MachineStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineSummary)(nil), (*machine.MachineSummary)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineSummary_To_machine_MachineSummary(a.(*MachineSummary), b.(*machine.MachineSummary), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineSummary)(nil), (*MachineSummary)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineSummary_To_v1alpha1_MachineSummary(a.(*machine.MachineSummary), b.(*MachineSummary), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*MachineTemplateSpec)(nil), (*machine.MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(a.(*MachineTemplateSpec), b.(*machine.MachineTemplateSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.MachineTemplateSpec)(nil), (*MachineTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(a.(*machine.MachineTemplateSpec), b.(*MachineTemplateSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*NodeTemplate)(nil), (*machine.NodeTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(a.(*NodeTemplate), b.(*machine.NodeTemplate), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.NodeTemplate)(nil), (*NodeTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(a.(*machine.NodeTemplate), b.(*NodeTemplate), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*NodeTemplateSpec)(nil), (*machine.NodeTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(a.(*NodeTemplateSpec), b.(*machine.NodeTemplateSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.NodeTemplateSpec)(nil), (*NodeTemplateSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(a.(*machine.NodeTemplateSpec), b.(*NodeTemplateSpec), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*RollbackConfig)(nil), (*machine.RollbackConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(a.(*RollbackConfig), b.(*machine.RollbackConfig), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.RollbackConfig)(nil), (*RollbackConfig)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(a.(*machine.RollbackConfig), b.(*RollbackConfig), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*RollingUpdateMachineDeployment)(nil), (*machine.RollingUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(a.(*RollingUpdateMachineDeployment), b.(*machine.RollingUpdateMachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.RollingUpdateMachineDeployment)(nil), (*RollingUpdateMachineDeployment)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(a.(*machine.RollingUpdateMachineDeployment), b.(*RollingUpdateMachineDeployment), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*UpdateConfiguration)(nil), (*machine.UpdateConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(a.(*UpdateConfiguration), b.(*machine.UpdateConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*machine.UpdateConfiguration)(nil), (*UpdateConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(a.(*machine.UpdateConfiguration), b.(*UpdateConfiguration), scope) + }); err != nil { + return err + } + return nil +} + +func autoConvert_v1alpha1_ClassSpec_To_machine_ClassSpec(in *ClassSpec, out *machine.ClassSpec, s conversion.Scope) error { + out.APIGroup = in.APIGroup + out.Kind = in.Kind + out.Name = in.Name + return nil +} + +// Convert_v1alpha1_ClassSpec_To_machine_ClassSpec is an autogenerated conversion function. +func Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(in *ClassSpec, out *machine.ClassSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_ClassSpec_To_machine_ClassSpec(in, out, s) +} + +func autoConvert_machine_ClassSpec_To_v1alpha1_ClassSpec(in *machine.ClassSpec, out *ClassSpec, s conversion.Scope) error { + out.APIGroup = in.APIGroup + out.Kind = in.Kind + out.Name = in.Name + return nil +} + +// Convert_machine_ClassSpec_To_v1alpha1_ClassSpec is an autogenerated conversion function. +func Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(in *machine.ClassSpec, out *ClassSpec, s conversion.Scope) error { + return autoConvert_machine_ClassSpec_To_v1alpha1_ClassSpec(in, out, s) +} + +func autoConvert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in *CurrentStatus, out *machine.CurrentStatus, s conversion.Scope) error { + out.Phase = machine.MachinePhase(in.Phase) + out.TimeoutActive = in.TimeoutActive + out.LastUpdateTime = in.LastUpdateTime + out.PreserveExpiryTime = (*v1.Time)(unsafe.Pointer(in.PreserveExpiryTime)) + return nil +} + +// Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus is an autogenerated conversion function. +func Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in *CurrentStatus, out *machine.CurrentStatus, s conversion.Scope) error { + return autoConvert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(in, out, s) +} + +func autoConvert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.CurrentStatus, out *CurrentStatus, s conversion.Scope) error { + out.Phase = MachinePhase(in.Phase) + out.TimeoutActive = in.TimeoutActive + out.LastUpdateTime = in.LastUpdateTime + out.PreserveExpiryTime = (*v1.Time)(unsafe.Pointer(in.PreserveExpiryTime)) + return nil +} + +// Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus is an autogenerated conversion function. +func Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in *machine.CurrentStatus, out *CurrentStatus, s conversion.Scope) error { + return autoConvert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(in, out, s) +} + +func autoConvert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in *InPlaceUpdateMachineDeployment, out *machine.InPlaceUpdateMachineDeployment, s conversion.Scope) error { + if err := Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { + return err + } + out.OrchestrationType = machine.OrchestrationType(in.OrchestrationType) + return nil +} + +// Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment is an autogenerated conversion function. +func Convert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in *InPlaceUpdateMachineDeployment, out *machine.InPlaceUpdateMachineDeployment, s conversion.Scope) error { + return autoConvert_v1alpha1_InPlaceUpdateMachineDeployment_To_machine_InPlaceUpdateMachineDeployment(in, out, s) +} + +func autoConvert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in *machine.InPlaceUpdateMachineDeployment, out *InPlaceUpdateMachineDeployment, s conversion.Scope) error { + if err := Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { + return err + } + out.OrchestrationType = OrchestrationType(in.OrchestrationType) + return nil +} + +// Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment is an autogenerated conversion function. +func Convert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in *machine.InPlaceUpdateMachineDeployment, out *InPlaceUpdateMachineDeployment, s conversion.Scope) error { + return autoConvert_machine_InPlaceUpdateMachineDeployment_To_v1alpha1_InPlaceUpdateMachineDeployment(in, out, s) +} + +func autoConvert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, out *machine.LastOperation, s conversion.Scope) error { + out.Description = in.Description + out.ErrorCode = in.ErrorCode + out.LastUpdateTime = in.LastUpdateTime + out.State = machine.MachineState(in.State) + out.Type = machine.MachineOperationType(in.Type) + return nil +} + +// Convert_v1alpha1_LastOperation_To_machine_LastOperation is an autogenerated conversion function. +func Convert_v1alpha1_LastOperation_To_machine_LastOperation(in *LastOperation, out *machine.LastOperation, s conversion.Scope) error { + return autoConvert_v1alpha1_LastOperation_To_machine_LastOperation(in, out, s) +} + +func autoConvert_machine_LastOperation_To_v1alpha1_LastOperation(in *machine.LastOperation, out *LastOperation, s conversion.Scope) error { + out.Description = in.Description + out.ErrorCode = in.ErrorCode + out.LastUpdateTime = in.LastUpdateTime + out.State = MachineState(in.State) + out.Type = MachineOperationType(in.Type) + return nil +} + +// Convert_machine_LastOperation_To_v1alpha1_LastOperation is an autogenerated conversion function. +func Convert_machine_LastOperation_To_v1alpha1_LastOperation(in *machine.LastOperation, out *LastOperation, s conversion.Scope) error { + return autoConvert_machine_LastOperation_To_v1alpha1_LastOperation(in, out, s) +} + +func autoConvert_v1alpha1_Machine_To_machine_Machine(in *Machine, out *machine.Machine, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_v1alpha1_Machine_To_machine_Machine is an autogenerated conversion function. +func Convert_v1alpha1_Machine_To_machine_Machine(in *Machine, out *machine.Machine, s conversion.Scope) error { + return autoConvert_v1alpha1_Machine_To_machine_Machine(in, out, s) +} + +func autoConvert_machine_Machine_To_v1alpha1_Machine(in *machine.Machine, out *Machine, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_machine_Machine_To_v1alpha1_Machine is an autogenerated conversion function. +func Convert_machine_Machine_To_v1alpha1_Machine(in *machine.Machine, out *Machine, s conversion.Scope) error { + return autoConvert_machine_Machine_To_v1alpha1_Machine(in, out, s) +} + +func autoConvert_v1alpha1_MachineClass_To_machine_MachineClass(in *MachineClass, out *machine.MachineClass, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + out.NodeTemplate = (*machine.NodeTemplate)(unsafe.Pointer(in.NodeTemplate)) + out.CredentialsSecretRef = (*corev1.SecretReference)(unsafe.Pointer(in.CredentialsSecretRef)) + out.ProviderSpec = in.ProviderSpec + out.Provider = in.Provider + out.SecretRef = (*corev1.SecretReference)(unsafe.Pointer(in.SecretRef)) + return nil +} + +// Convert_v1alpha1_MachineClass_To_machine_MachineClass is an autogenerated conversion function. +func Convert_v1alpha1_MachineClass_To_machine_MachineClass(in *MachineClass, out *machine.MachineClass, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineClass_To_machine_MachineClass(in, out, s) +} + +func autoConvert_machine_MachineClass_To_v1alpha1_MachineClass(in *machine.MachineClass, out *MachineClass, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + out.NodeTemplate = (*NodeTemplate)(unsafe.Pointer(in.NodeTemplate)) + out.CredentialsSecretRef = (*corev1.SecretReference)(unsafe.Pointer(in.CredentialsSecretRef)) + out.Provider = in.Provider + out.ProviderSpec = in.ProviderSpec + out.SecretRef = (*corev1.SecretReference)(unsafe.Pointer(in.SecretRef)) + return nil +} + +// Convert_machine_MachineClass_To_v1alpha1_MachineClass is an autogenerated conversion function. +func Convert_machine_MachineClass_To_v1alpha1_MachineClass(in *machine.MachineClass, out *MachineClass, s conversion.Scope) error { + return autoConvert_machine_MachineClass_To_v1alpha1_MachineClass(in, out, s) +} + +func autoConvert_v1alpha1_MachineClassList_To_machine_MachineClassList(in *MachineClassList, out *machine.MachineClassList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]machine.MachineClass, len(*in)) + for i := range *in { + if err := Convert_v1alpha1_MachineClass_To_machine_MachineClass(&(*in)[i], &(*out)[i], s); err != nil { + return err + } + } + } else { + out.Items = nil + } + return nil +} + +// Convert_v1alpha1_MachineClassList_To_machine_MachineClassList is an autogenerated conversion function. +func Convert_v1alpha1_MachineClassList_To_machine_MachineClassList(in *MachineClassList, out *machine.MachineClassList, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineClassList_To_machine_MachineClassList(in, out, s) +} + +func autoConvert_machine_MachineClassList_To_v1alpha1_MachineClassList(in *machine.MachineClassList, out *MachineClassList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineClass, len(*in)) + for i := range *in { + if err := Convert_machine_MachineClass_To_v1alpha1_MachineClass(&(*in)[i], &(*out)[i], s); err != nil { + return err + } + } + } else { + out.Items = nil + } + return nil +} + +// Convert_machine_MachineClassList_To_v1alpha1_MachineClassList is an autogenerated conversion function. +func Convert_machine_MachineClassList_To_v1alpha1_MachineClassList(in *machine.MachineClassList, out *MachineClassList, s conversion.Scope) error { + return autoConvert_machine_MachineClassList_To_v1alpha1_MachineClassList(in, out, s) +} + +func autoConvert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in *MachineConfiguration, out *machine.MachineConfiguration, s conversion.Scope) error { + out.MachineDrainTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineDrainTimeout)) + out.MachineHealthTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) + out.MachineCreationTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) + out.MachineInPlaceUpdateTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) + out.MachinePreserveTimeout = (*v1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) + out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) + out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) + out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) + return nil +} + +// Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration is an autogenerated conversion function. +func Convert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in *MachineConfiguration, out *machine.MachineConfiguration, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineConfiguration_To_machine_MachineConfiguration(in, out, s) +} + +func autoConvert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in *machine.MachineConfiguration, out *MachineConfiguration, s conversion.Scope) error { + out.MachineDrainTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineDrainTimeout)) + out.MachineHealthTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineHealthTimeout)) + out.MachineCreationTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineCreationTimeout)) + out.MachineInPlaceUpdateTimeout = (*v1.Duration)(unsafe.Pointer(in.MachineInPlaceUpdateTimeout)) + out.MachinePreserveTimeout = (*v1.Duration)(unsafe.Pointer(in.MachinePreserveTimeout)) + out.DisableHealthTimeout = (*bool)(unsafe.Pointer(in.DisableHealthTimeout)) + out.MaxEvictRetries = (*int32)(unsafe.Pointer(in.MaxEvictRetries)) + out.NodeConditions = (*string)(unsafe.Pointer(in.NodeConditions)) + return nil +} + +// Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration is an autogenerated conversion function. +func Convert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in *machine.MachineConfiguration, out *MachineConfiguration, s conversion.Scope) error { + return autoConvert_machine_MachineConfiguration_To_v1alpha1_MachineConfiguration(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in *MachineDeployment, out *machine.MachineDeployment, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in *MachineDeployment, out *machine.MachineDeployment, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeployment_To_machine_MachineDeployment(in, out, s) +} + +func autoConvert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in *machine.MachineDeployment, out *MachineDeployment, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment is an autogenerated conversion function. +func Convert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in *machine.MachineDeployment, out *MachineDeployment, s conversion.Scope) error { + return autoConvert_machine_MachineDeployment_To_v1alpha1_MachineDeployment(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in *MachineDeploymentCondition, out *machine.MachineDeploymentCondition, s conversion.Scope) error { + out.Type = machine.MachineDeploymentConditionType(in.Type) + out.Status = machine.ConditionStatus(in.Status) + out.LastUpdateTime = in.LastUpdateTime + out.LastTransitionTime = in.LastTransitionTime + out.Reason = in.Reason + out.Message = in.Message + return nil +} + +// Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in *MachineDeploymentCondition, out *machine.MachineDeploymentCondition, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeploymentCondition_To_machine_MachineDeploymentCondition(in, out, s) +} + +func autoConvert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in *machine.MachineDeploymentCondition, out *MachineDeploymentCondition, s conversion.Scope) error { + out.Type = MachineDeploymentConditionType(in.Type) + out.Status = ConditionStatus(in.Status) + out.LastUpdateTime = in.LastUpdateTime + out.LastTransitionTime = in.LastTransitionTime + out.Reason = in.Reason + out.Message = in.Message + return nil +} + +// Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition is an autogenerated conversion function. +func Convert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in *machine.MachineDeploymentCondition, out *MachineDeploymentCondition, s conversion.Scope) error { + return autoConvert_machine_MachineDeploymentCondition_To_v1alpha1_MachineDeploymentCondition(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in *MachineDeploymentList, out *machine.MachineDeploymentList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]machine.MachineDeployment)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in *MachineDeploymentList, out *machine.MachineDeploymentList, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeploymentList_To_machine_MachineDeploymentList(in, out, s) +} + +func autoConvert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in *machine.MachineDeploymentList, out *MachineDeploymentList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]MachineDeployment)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList is an autogenerated conversion function. +func Convert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in *machine.MachineDeploymentList, out *MachineDeploymentList, s conversion.Scope) error { + return autoConvert_machine_MachineDeploymentList_To_v1alpha1_MachineDeploymentList(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in *MachineDeploymentSpec, out *machine.MachineDeploymentSpec, s conversion.Scope) error { + out.Replicas = in.Replicas + out.Selector = (*v1.LabelSelector)(unsafe.Pointer(in.Selector)) + if err := Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { + return err + } + if err := Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(&in.Strategy, &out.Strategy, s); err != nil { + return err + } + out.MinReadySeconds = in.MinReadySeconds + out.RevisionHistoryLimit = (*int32)(unsafe.Pointer(in.RevisionHistoryLimit)) + out.Paused = in.Paused + out.RollbackTo = (*machine.RollbackConfig)(unsafe.Pointer(in.RollbackTo)) + out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) + out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + return nil +} + +// Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in *MachineDeploymentSpec, out *machine.MachineDeploymentSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec(in, out, s) +} + +func autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in *machine.MachineDeploymentSpec, out *MachineDeploymentSpec, s conversion.Scope) error { + out.Replicas = in.Replicas + out.Selector = (*v1.LabelSelector)(unsafe.Pointer(in.Selector)) + if err := Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { + return err + } + if err := Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(&in.Strategy, &out.Strategy, s); err != nil { + return err + } + out.MinReadySeconds = in.MinReadySeconds + out.RevisionHistoryLimit = (*int32)(unsafe.Pointer(in.RevisionHistoryLimit)) + out.Paused = in.Paused + out.RollbackTo = (*RollbackConfig)(unsafe.Pointer(in.RollbackTo)) + out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) + out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + return nil +} + +// Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec is an autogenerated conversion function. +func Convert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in *machine.MachineDeploymentSpec, out *MachineDeploymentSpec, s conversion.Scope) error { + return autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in *MachineDeploymentStatus, out *machine.MachineDeploymentStatus, s conversion.Scope) error { + out.ObservedGeneration = in.ObservedGeneration + out.Replicas = in.Replicas + out.UpdatedReplicas = in.UpdatedReplicas + out.ReadyReplicas = in.ReadyReplicas + out.AvailableReplicas = in.AvailableReplicas + out.UnavailableReplicas = in.UnavailableReplicas + out.Conditions = *(*[]machine.MachineDeploymentCondition)(unsafe.Pointer(&in.Conditions)) + out.CollisionCount = (*int32)(unsafe.Pointer(in.CollisionCount)) + out.FailedMachines = *(*[]*machine.MachineSummary)(unsafe.Pointer(&in.FailedMachines)) + return nil +} + +// Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in *MachineDeploymentStatus, out *machine.MachineDeploymentStatus, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeploymentStatus_To_machine_MachineDeploymentStatus(in, out, s) +} + +func autoConvert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in *machine.MachineDeploymentStatus, out *MachineDeploymentStatus, s conversion.Scope) error { + out.ObservedGeneration = in.ObservedGeneration + out.Replicas = in.Replicas + out.UpdatedReplicas = in.UpdatedReplicas + out.ReadyReplicas = in.ReadyReplicas + out.AvailableReplicas = in.AvailableReplicas + out.UnavailableReplicas = in.UnavailableReplicas + out.Conditions = *(*[]MachineDeploymentCondition)(unsafe.Pointer(&in.Conditions)) + out.CollisionCount = (*int32)(unsafe.Pointer(in.CollisionCount)) + out.FailedMachines = *(*[]*MachineSummary)(unsafe.Pointer(&in.FailedMachines)) + return nil +} + +// Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus is an autogenerated conversion function. +func Convert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in *machine.MachineDeploymentStatus, out *MachineDeploymentStatus, s conversion.Scope) error { + return autoConvert_machine_MachineDeploymentStatus_To_v1alpha1_MachineDeploymentStatus(in, out, s) +} + +func autoConvert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in *MachineDeploymentStrategy, out *machine.MachineDeploymentStrategy, s conversion.Scope) error { + out.Type = machine.MachineDeploymentStrategyType(in.Type) + out.RollingUpdate = (*machine.RollingUpdateMachineDeployment)(unsafe.Pointer(in.RollingUpdate)) + out.InPlaceUpdate = (*machine.InPlaceUpdateMachineDeployment)(unsafe.Pointer(in.InPlaceUpdate)) + return nil +} + +// Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy is an autogenerated conversion function. +func Convert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in *MachineDeploymentStrategy, out *machine.MachineDeploymentStrategy, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineDeploymentStrategy_To_machine_MachineDeploymentStrategy(in, out, s) +} + +func autoConvert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in *machine.MachineDeploymentStrategy, out *MachineDeploymentStrategy, s conversion.Scope) error { + out.Type = MachineDeploymentStrategyType(in.Type) + out.RollingUpdate = (*RollingUpdateMachineDeployment)(unsafe.Pointer(in.RollingUpdate)) + out.InPlaceUpdate = (*InPlaceUpdateMachineDeployment)(unsafe.Pointer(in.InPlaceUpdate)) + return nil +} + +// Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy is an autogenerated conversion function. +func Convert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in *machine.MachineDeploymentStrategy, out *MachineDeploymentStrategy, s conversion.Scope) error { + return autoConvert_machine_MachineDeploymentStrategy_To_v1alpha1_MachineDeploymentStrategy(in, out, s) +} + +func autoConvert_v1alpha1_MachineList_To_machine_MachineList(in *MachineList, out *machine.MachineList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]machine.Machine)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_v1alpha1_MachineList_To_machine_MachineList is an autogenerated conversion function. +func Convert_v1alpha1_MachineList_To_machine_MachineList(in *MachineList, out *machine.MachineList, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineList_To_machine_MachineList(in, out, s) +} + +func autoConvert_machine_MachineList_To_v1alpha1_MachineList(in *machine.MachineList, out *MachineList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]Machine)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_machine_MachineList_To_v1alpha1_MachineList is an autogenerated conversion function. +func Convert_machine_MachineList_To_v1alpha1_MachineList(in *machine.MachineList, out *MachineList, s conversion.Scope) error { + return autoConvert_machine_MachineList_To_v1alpha1_MachineList(in, out, s) +} + +func autoConvert_v1alpha1_MachineSet_To_machine_MachineSet(in *MachineSet, out *machine.MachineSet, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_v1alpha1_MachineSet_To_machine_MachineSet is an autogenerated conversion function. +func Convert_v1alpha1_MachineSet_To_machine_MachineSet(in *MachineSet, out *machine.MachineSet, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSet_To_machine_MachineSet(in, out, s) +} + +func autoConvert_machine_MachineSet_To_v1alpha1_MachineSet(in *machine.MachineSet, out *MachineSet, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + if err := Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(&in.Status, &out.Status, s); err != nil { + return err + } + return nil +} + +// Convert_machine_MachineSet_To_v1alpha1_MachineSet is an autogenerated conversion function. +func Convert_machine_MachineSet_To_v1alpha1_MachineSet(in *machine.MachineSet, out *MachineSet, s conversion.Scope) error { + return autoConvert_machine_MachineSet_To_v1alpha1_MachineSet(in, out, s) +} + +func autoConvert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in *MachineSetCondition, out *machine.MachineSetCondition, s conversion.Scope) error { + out.Type = machine.MachineSetConditionType(in.Type) + out.Status = machine.ConditionStatus(in.Status) + out.LastTransitionTime = in.LastTransitionTime + out.Reason = in.Reason + out.Message = in.Message + return nil +} + +// Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition is an autogenerated conversion function. +func Convert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in *MachineSetCondition, out *machine.MachineSetCondition, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSetCondition_To_machine_MachineSetCondition(in, out, s) +} + +func autoConvert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in *machine.MachineSetCondition, out *MachineSetCondition, s conversion.Scope) error { + out.Type = MachineSetConditionType(in.Type) + out.Status = ConditionStatus(in.Status) + out.LastTransitionTime = in.LastTransitionTime + out.Reason = in.Reason + out.Message = in.Message + return nil +} + +// Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition is an autogenerated conversion function. +func Convert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in *machine.MachineSetCondition, out *MachineSetCondition, s conversion.Scope) error { + return autoConvert_machine_MachineSetCondition_To_v1alpha1_MachineSetCondition(in, out, s) +} + +func autoConvert_v1alpha1_MachineSetList_To_machine_MachineSetList(in *MachineSetList, out *machine.MachineSetList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]machine.MachineSet)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_v1alpha1_MachineSetList_To_machine_MachineSetList is an autogenerated conversion function. +func Convert_v1alpha1_MachineSetList_To_machine_MachineSetList(in *MachineSetList, out *machine.MachineSetList, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSetList_To_machine_MachineSetList(in, out, s) +} + +func autoConvert_machine_MachineSetList_To_v1alpha1_MachineSetList(in *machine.MachineSetList, out *MachineSetList, s conversion.Scope) error { + out.ListMeta = in.ListMeta + out.Items = *(*[]MachineSet)(unsafe.Pointer(&in.Items)) + return nil +} + +// Convert_machine_MachineSetList_To_v1alpha1_MachineSetList is an autogenerated conversion function. +func Convert_machine_MachineSetList_To_v1alpha1_MachineSetList(in *machine.MachineSetList, out *MachineSetList, s conversion.Scope) error { + return autoConvert_machine_MachineSetList_To_v1alpha1_MachineSetList(in, out, s) +} + +func autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSetSpec, out *machine.MachineSetSpec, s conversion.Scope) error { + out.Replicas = in.Replicas + out.Selector = (*v1.LabelSelector)(unsafe.Pointer(in.Selector)) + if err := Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(&in.MachineClass, &out.MachineClass, s); err != nil { + return err + } + if err := Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { + return err + } + out.MinReadySeconds = in.MinReadySeconds + out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + return nil +} + +// Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec is an autogenerated conversion function. +func Convert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSetSpec, out *machine.MachineSetSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in, out, s) +} + +func autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.MachineSetSpec, out *MachineSetSpec, s conversion.Scope) error { + out.Replicas = in.Replicas + out.Selector = (*v1.LabelSelector)(unsafe.Pointer(in.Selector)) + if err := Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(&in.MachineClass, &out.MachineClass, s); err != nil { + return err + } + if err := Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(&in.Template, &out.Template, s); err != nil { + return err + } + out.MinReadySeconds = in.MinReadySeconds + out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + return nil +} + +// Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec is an autogenerated conversion function. +func Convert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.MachineSetSpec, out *MachineSetSpec, s conversion.Scope) error { + return autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in, out, s) +} + +func autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *MachineSetStatus, out *machine.MachineSetStatus, s conversion.Scope) error { + out.Replicas = in.Replicas + out.FullyLabeledReplicas = in.FullyLabeledReplicas + out.ReadyReplicas = in.ReadyReplicas + out.AvailableReplicas = in.AvailableReplicas + out.ObservedGeneration = in.ObservedGeneration + out.Conditions = *(*[]machine.MachineSetCondition)(unsafe.Pointer(&in.Conditions)) + if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + out.FailedMachines = (*[]machine.MachineSummary)(unsafe.Pointer(in.FailedMachines)) + out.AutoPreserveFailedMachineCount = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineCount)) + return nil +} + +// Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus is an autogenerated conversion function. +func Convert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *MachineSetStatus, out *machine.MachineSetStatus, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in, out, s) +} + +func autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machine.MachineSetStatus, out *MachineSetStatus, s conversion.Scope) error { + out.Replicas = in.Replicas + out.FullyLabeledReplicas = in.FullyLabeledReplicas + out.ReadyReplicas = in.ReadyReplicas + out.AvailableReplicas = in.AvailableReplicas + out.ObservedGeneration = in.ObservedGeneration + out.Conditions = *(*[]MachineSetCondition)(unsafe.Pointer(&in.Conditions)) + if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + out.FailedMachines = (*[]MachineSummary)(unsafe.Pointer(in.FailedMachines)) + out.AutoPreserveFailedMachineCount = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineCount)) + return nil +} + +// Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus is an autogenerated conversion function. +func Convert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machine.MachineSetStatus, out *MachineSetStatus, s conversion.Scope) error { + return autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in, out, s) +} + +func autoConvert_v1alpha1_MachineSpec_To_machine_MachineSpec(in *MachineSpec, out *machine.MachineSpec, s conversion.Scope) error { + if err := Convert_v1alpha1_ClassSpec_To_machine_ClassSpec(&in.Class, &out.Class, s); err != nil { + return err + } + out.ProviderID = in.ProviderID + if err := Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(&in.NodeTemplateSpec, &out.NodeTemplateSpec, s); err != nil { + return err + } + out.MachineConfiguration = (*machine.MachineConfiguration)(unsafe.Pointer(in.MachineConfiguration)) + return nil +} + +// Convert_v1alpha1_MachineSpec_To_machine_MachineSpec is an autogenerated conversion function. +func Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(in *MachineSpec, out *machine.MachineSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSpec_To_machine_MachineSpec(in, out, s) +} + +func autoConvert_machine_MachineSpec_To_v1alpha1_MachineSpec(in *machine.MachineSpec, out *MachineSpec, s conversion.Scope) error { + if err := Convert_machine_ClassSpec_To_v1alpha1_ClassSpec(&in.Class, &out.Class, s); err != nil { + return err + } + out.ProviderID = in.ProviderID + if err := Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(&in.NodeTemplateSpec, &out.NodeTemplateSpec, s); err != nil { + return err + } + out.MachineConfiguration = (*MachineConfiguration)(unsafe.Pointer(in.MachineConfiguration)) + return nil +} + +// Convert_machine_MachineSpec_To_v1alpha1_MachineSpec is an autogenerated conversion function. +func Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(in *machine.MachineSpec, out *MachineSpec, s conversion.Scope) error { + return autoConvert_machine_MachineSpec_To_v1alpha1_MachineSpec(in, out, s) +} + +func autoConvert_v1alpha1_MachineStatus_To_machine_MachineStatus(in *MachineStatus, out *machine.MachineStatus, s conversion.Scope) error { + out.Addresses = *(*[]corev1.NodeAddress)(unsafe.Pointer(&in.Addresses)) + out.Conditions = *(*[]corev1.NodeCondition)(unsafe.Pointer(&in.Conditions)) + if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + if err := Convert_v1alpha1_CurrentStatus_To_machine_CurrentStatus(&in.CurrentStatus, &out.CurrentStatus, s); err != nil { + return err + } + out.LastKnownState = in.LastKnownState + return nil +} + +// Convert_v1alpha1_MachineStatus_To_machine_MachineStatus is an autogenerated conversion function. +func Convert_v1alpha1_MachineStatus_To_machine_MachineStatus(in *MachineStatus, out *machine.MachineStatus, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineStatus_To_machine_MachineStatus(in, out, s) +} + +func autoConvert_machine_MachineStatus_To_v1alpha1_MachineStatus(in *machine.MachineStatus, out *MachineStatus, s conversion.Scope) error { + out.Addresses = *(*[]corev1.NodeAddress)(unsafe.Pointer(&in.Addresses)) + out.Conditions = *(*[]corev1.NodeCondition)(unsafe.Pointer(&in.Conditions)) + if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + if err := Convert_machine_CurrentStatus_To_v1alpha1_CurrentStatus(&in.CurrentStatus, &out.CurrentStatus, s); err != nil { + return err + } + out.LastKnownState = in.LastKnownState + return nil +} + +// Convert_machine_MachineStatus_To_v1alpha1_MachineStatus is an autogenerated conversion function. +func Convert_machine_MachineStatus_To_v1alpha1_MachineStatus(in *machine.MachineStatus, out *MachineStatus, s conversion.Scope) error { + return autoConvert_machine_MachineStatus_To_v1alpha1_MachineStatus(in, out, s) +} + +func autoConvert_v1alpha1_MachineSummary_To_machine_MachineSummary(in *MachineSummary, out *machine.MachineSummary, s conversion.Scope) error { + out.Name = in.Name + out.ProviderID = in.ProviderID + if err := Convert_v1alpha1_LastOperation_To_machine_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + out.OwnerRef = in.OwnerRef + return nil +} + +// Convert_v1alpha1_MachineSummary_To_machine_MachineSummary is an autogenerated conversion function. +func Convert_v1alpha1_MachineSummary_To_machine_MachineSummary(in *MachineSummary, out *machine.MachineSummary, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineSummary_To_machine_MachineSummary(in, out, s) +} + +func autoConvert_machine_MachineSummary_To_v1alpha1_MachineSummary(in *machine.MachineSummary, out *MachineSummary, s conversion.Scope) error { + out.Name = in.Name + out.ProviderID = in.ProviderID + if err := Convert_machine_LastOperation_To_v1alpha1_LastOperation(&in.LastOperation, &out.LastOperation, s); err != nil { + return err + } + out.OwnerRef = in.OwnerRef + return nil +} + +// Convert_machine_MachineSummary_To_v1alpha1_MachineSummary is an autogenerated conversion function. +func Convert_machine_MachineSummary_To_v1alpha1_MachineSummary(in *machine.MachineSummary, out *MachineSummary, s conversion.Scope) error { + return autoConvert_machine_MachineSummary_To_v1alpha1_MachineSummary(in, out, s) +} + +func autoConvert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in *MachineTemplateSpec, out *machine.MachineTemplateSpec, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_v1alpha1_MachineSpec_To_machine_MachineSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + return nil +} + +// Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec is an autogenerated conversion function. +func Convert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in *MachineTemplateSpec, out *machine.MachineTemplateSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_MachineTemplateSpec_To_machine_MachineTemplateSpec(in, out, s) +} + +func autoConvert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in *machine.MachineTemplateSpec, out *MachineTemplateSpec, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + if err := Convert_machine_MachineSpec_To_v1alpha1_MachineSpec(&in.Spec, &out.Spec, s); err != nil { + return err + } + return nil +} + +// Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec is an autogenerated conversion function. +func Convert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in *machine.MachineTemplateSpec, out *MachineTemplateSpec, s conversion.Scope) error { + return autoConvert_machine_MachineTemplateSpec_To_v1alpha1_MachineTemplateSpec(in, out, s) +} + +func autoConvert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in *NodeTemplate, out *machine.NodeTemplate, s conversion.Scope) error { + out.Capacity = *(*corev1.ResourceList)(unsafe.Pointer(&in.Capacity)) + out.VirtualCapacity = *(*corev1.ResourceList)(unsafe.Pointer(&in.VirtualCapacity)) + out.InstanceType = in.InstanceType + out.Region = in.Region + out.Zone = in.Zone + out.Architecture = (*string)(unsafe.Pointer(in.Architecture)) + return nil +} + +// Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate is an autogenerated conversion function. +func Convert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in *NodeTemplate, out *machine.NodeTemplate, s conversion.Scope) error { + return autoConvert_v1alpha1_NodeTemplate_To_machine_NodeTemplate(in, out, s) +} + +func autoConvert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in *machine.NodeTemplate, out *NodeTemplate, s conversion.Scope) error { + out.Capacity = *(*corev1.ResourceList)(unsafe.Pointer(&in.Capacity)) + out.VirtualCapacity = *(*corev1.ResourceList)(unsafe.Pointer(&in.VirtualCapacity)) + out.InstanceType = in.InstanceType + out.Region = in.Region + out.Zone = in.Zone + out.Architecture = (*string)(unsafe.Pointer(in.Architecture)) + return nil +} + +// Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate is an autogenerated conversion function. +func Convert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in *machine.NodeTemplate, out *NodeTemplate, s conversion.Scope) error { + return autoConvert_machine_NodeTemplate_To_v1alpha1_NodeTemplate(in, out, s) +} + +func autoConvert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in *NodeTemplateSpec, out *machine.NodeTemplateSpec, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + out.Spec = in.Spec + return nil +} + +// Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec is an autogenerated conversion function. +func Convert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in *NodeTemplateSpec, out *machine.NodeTemplateSpec, s conversion.Scope) error { + return autoConvert_v1alpha1_NodeTemplateSpec_To_machine_NodeTemplateSpec(in, out, s) +} + +func autoConvert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in *machine.NodeTemplateSpec, out *NodeTemplateSpec, s conversion.Scope) error { + out.ObjectMeta = in.ObjectMeta + out.Spec = in.Spec + return nil +} + +// Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec is an autogenerated conversion function. +func Convert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in *machine.NodeTemplateSpec, out *NodeTemplateSpec, s conversion.Scope) error { + return autoConvert_machine_NodeTemplateSpec_To_v1alpha1_NodeTemplateSpec(in, out, s) +} + +func autoConvert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in *RollbackConfig, out *machine.RollbackConfig, s conversion.Scope) error { + out.Revision = in.Revision + return nil +} + +// Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig is an autogenerated conversion function. +func Convert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in *RollbackConfig, out *machine.RollbackConfig, s conversion.Scope) error { + return autoConvert_v1alpha1_RollbackConfig_To_machine_RollbackConfig(in, out, s) +} + +func autoConvert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in *machine.RollbackConfig, out *RollbackConfig, s conversion.Scope) error { + out.Revision = in.Revision + return nil +} + +// Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig is an autogenerated conversion function. +func Convert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in *machine.RollbackConfig, out *RollbackConfig, s conversion.Scope) error { + return autoConvert_machine_RollbackConfig_To_v1alpha1_RollbackConfig(in, out, s) +} + +func autoConvert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in *RollingUpdateMachineDeployment, out *machine.RollingUpdateMachineDeployment, s conversion.Scope) error { + if err := Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { + return err + } + return nil +} + +// Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment is an autogenerated conversion function. +func Convert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in *RollingUpdateMachineDeployment, out *machine.RollingUpdateMachineDeployment, s conversion.Scope) error { + return autoConvert_v1alpha1_RollingUpdateMachineDeployment_To_machine_RollingUpdateMachineDeployment(in, out, s) +} + +func autoConvert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in *machine.RollingUpdateMachineDeployment, out *RollingUpdateMachineDeployment, s conversion.Scope) error { + if err := Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(&in.UpdateConfiguration, &out.UpdateConfiguration, s); err != nil { + return err + } + return nil +} + +// Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment is an autogenerated conversion function. +func Convert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in *machine.RollingUpdateMachineDeployment, out *RollingUpdateMachineDeployment, s conversion.Scope) error { + return autoConvert_machine_RollingUpdateMachineDeployment_To_v1alpha1_RollingUpdateMachineDeployment(in, out, s) +} + +func autoConvert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in *UpdateConfiguration, out *machine.UpdateConfiguration, s conversion.Scope) error { + out.MaxUnavailable = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnavailable)) + out.MaxSurge = (*intstr.IntOrString)(unsafe.Pointer(in.MaxSurge)) + return nil +} + +// Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration is an autogenerated conversion function. +func Convert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in *UpdateConfiguration, out *machine.UpdateConfiguration, s conversion.Scope) error { + return autoConvert_v1alpha1_UpdateConfiguration_To_machine_UpdateConfiguration(in, out, s) +} + +func autoConvert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in *machine.UpdateConfiguration, out *UpdateConfiguration, s conversion.Scope) error { + out.MaxUnavailable = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnavailable)) + out.MaxSurge = (*intstr.IntOrString)(unsafe.Pointer(in.MaxSurge)) + return nil +} + +// Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration is an autogenerated conversion function. +func Convert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in *machine.UpdateConfiguration, out *UpdateConfiguration, s conversion.Scope) error { + return autoConvert_machine_UpdateConfiguration_To_v1alpha1_UpdateConfiguration(in, out, s) +} diff --git a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 000000000..f169b3811 --- /dev/null +++ b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,813 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + intstr "k8s.io/apimachinery/pkg/util/intstr" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClassSpec) DeepCopyInto(out *ClassSpec) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClassSpec. +func (in *ClassSpec) DeepCopy() *ClassSpec { + if in == nil { + return nil + } + out := new(ClassSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + if in.PreserveExpiryTime != nil { + in, out := &in.PreserveExpiryTime, &out.PreserveExpiryTime + *out = (*in).DeepCopy() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CurrentStatus. +func (in *CurrentStatus) DeepCopy() *CurrentStatus { + if in == nil { + return nil + } + out := new(CurrentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InPlaceUpdateMachineDeployment) DeepCopyInto(out *InPlaceUpdateMachineDeployment) { + *out = *in + in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InPlaceUpdateMachineDeployment. +func (in *InPlaceUpdateMachineDeployment) DeepCopy() *InPlaceUpdateMachineDeployment { + if in == nil { + return nil + } + out := new(InPlaceUpdateMachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LastOperation) DeepCopyInto(out *LastOperation) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LastOperation. +func (in *LastOperation) DeepCopy() *LastOperation { + if in == nil { + return nil + } + out := new(LastOperation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Machine) DeepCopyInto(out *Machine) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.TypeMeta = in.TypeMeta + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Machine. +func (in *Machine) DeepCopy() *Machine { + if in == nil { + return nil + } + out := new(Machine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Machine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineClass) DeepCopyInto(out *MachineClass) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.NodeTemplate != nil { + in, out := &in.NodeTemplate, &out.NodeTemplate + *out = new(NodeTemplate) + (*in).DeepCopyInto(*out) + } + if in.CredentialsSecretRef != nil { + in, out := &in.CredentialsSecretRef, &out.CredentialsSecretRef + *out = new(v1.SecretReference) + **out = **in + } + in.ProviderSpec.DeepCopyInto(&out.ProviderSpec) + if in.SecretRef != nil { + in, out := &in.SecretRef, &out.SecretRef + *out = new(v1.SecretReference) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClass. +func (in *MachineClass) DeepCopy() *MachineClass { + if in == nil { + return nil + } + out := new(MachineClass) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineClass) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineClassList) DeepCopyInto(out *MachineClassList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineClass, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClassList. +func (in *MachineClassList) DeepCopy() *MachineClassList { + if in == nil { + return nil + } + out := new(MachineClassList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineClassList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { + *out = *in + if in.MachineDrainTimeout != nil { + in, out := &in.MachineDrainTimeout, &out.MachineDrainTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineHealthTimeout != nil { + in, out := &in.MachineHealthTimeout, &out.MachineHealthTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineCreationTimeout != nil { + in, out := &in.MachineCreationTimeout, &out.MachineCreationTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineInPlaceUpdateTimeout != nil { + in, out := &in.MachineInPlaceUpdateTimeout, &out.MachineInPlaceUpdateTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachinePreserveTimeout != nil { + in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.DisableHealthTimeout != nil { + in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout + *out = new(bool) + **out = **in + } + if in.MaxEvictRetries != nil { + in, out := &in.MaxEvictRetries, &out.MaxEvictRetries + *out = new(int32) + **out = **in + } + if in.NodeConditions != nil { + in, out := &in.NodeConditions, &out.NodeConditions + *out = new(string) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineConfiguration. +func (in *MachineConfiguration) DeepCopy() *MachineConfiguration { + if in == nil { + return nil + } + out := new(MachineConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeployment) DeepCopyInto(out *MachineDeployment) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeployment. +func (in *MachineDeployment) DeepCopy() *MachineDeployment { + if in == nil { + return nil + } + out := new(MachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineDeployment) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentCondition) DeepCopyInto(out *MachineDeploymentCondition) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentCondition. +func (in *MachineDeploymentCondition) DeepCopy() *MachineDeploymentCondition { + if in == nil { + return nil + } + out := new(MachineDeploymentCondition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentList) DeepCopyInto(out *MachineDeploymentList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineDeployment, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentList. +func (in *MachineDeploymentList) DeepCopy() *MachineDeploymentList { + if in == nil { + return nil + } + out := new(MachineDeploymentList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineDeploymentList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { + *out = *in + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + in.Template.DeepCopyInto(&out.Template) + in.Strategy.DeepCopyInto(&out.Strategy) + if in.RevisionHistoryLimit != nil { + in, out := &in.RevisionHistoryLimit, &out.RevisionHistoryLimit + *out = new(int32) + **out = **in + } + if in.RollbackTo != nil { + in, out := &in.RollbackTo, &out.RollbackTo + *out = new(RollbackConfig) + **out = **in + } + if in.ProgressDeadlineSeconds != nil { + in, out := &in.ProgressDeadlineSeconds, &out.ProgressDeadlineSeconds + *out = new(int32) + **out = **in + } + if in.AutoPreserveFailedMachineMax != nil { + in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentSpec. +func (in *MachineDeploymentSpec) DeepCopy() *MachineDeploymentSpec { + if in == nil { + return nil + } + out := new(MachineDeploymentSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentStatus) DeepCopyInto(out *MachineDeploymentStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]MachineDeploymentCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.CollisionCount != nil { + in, out := &in.CollisionCount, &out.CollisionCount + *out = new(int32) + **out = **in + } + if in.FailedMachines != nil { + in, out := &in.FailedMachines, &out.FailedMachines + *out = make([]*MachineSummary, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(MachineSummary) + (*in).DeepCopyInto(*out) + } + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStatus. +func (in *MachineDeploymentStatus) DeepCopy() *MachineDeploymentStatus { + if in == nil { + return nil + } + out := new(MachineDeploymentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentStrategy) DeepCopyInto(out *MachineDeploymentStrategy) { + *out = *in + if in.RollingUpdate != nil { + in, out := &in.RollingUpdate, &out.RollingUpdate + *out = new(RollingUpdateMachineDeployment) + (*in).DeepCopyInto(*out) + } + if in.InPlaceUpdate != nil { + in, out := &in.InPlaceUpdate, &out.InPlaceUpdate + *out = new(InPlaceUpdateMachineDeployment) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStrategy. +func (in *MachineDeploymentStrategy) DeepCopy() *MachineDeploymentStrategy { + if in == nil { + return nil + } + out := new(MachineDeploymentStrategy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineList) DeepCopyInto(out *MachineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Machine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineList. +func (in *MachineList) DeepCopy() *MachineList { + if in == nil { + return nil + } + out := new(MachineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSet) DeepCopyInto(out *MachineSet) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.TypeMeta = in.TypeMeta + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSet. +func (in *MachineSet) DeepCopy() *MachineSet { + if in == nil { + return nil + } + out := new(MachineSet) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineSet) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetCondition) DeepCopyInto(out *MachineSetCondition) { + *out = *in + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetCondition. +func (in *MachineSetCondition) DeepCopy() *MachineSetCondition { + if in == nil { + return nil + } + out := new(MachineSetCondition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetList) DeepCopyInto(out *MachineSetList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineSet, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetList. +func (in *MachineSetList) DeepCopy() *MachineSetList { + if in == nil { + return nil + } + out := new(MachineSetList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineSetList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { + *out = *in + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + out.MachineClass = in.MachineClass + in.Template.DeepCopyInto(&out.Template) + if in.AutoPreserveFailedMachineMax != nil { + in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetSpec. +func (in *MachineSetSpec) DeepCopy() *MachineSetSpec { + if in == nil { + return nil + } + out := new(MachineSetSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]MachineSetCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.LastOperation.DeepCopyInto(&out.LastOperation) + if in.FailedMachines != nil { + in, out := &in.FailedMachines, &out.FailedMachines + *out = new([]MachineSummary) + if **in != nil { + in, out := *in, *out + *out = make([]MachineSummary, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + } + if in.AutoPreserveFailedMachineCount != nil { + in, out := &in.AutoPreserveFailedMachineCount, &out.AutoPreserveFailedMachineCount + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetStatus. +func (in *MachineSetStatus) DeepCopy() *MachineSetStatus { + if in == nil { + return nil + } + out := new(MachineSetStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { + *out = *in + out.Class = in.Class + in.NodeTemplateSpec.DeepCopyInto(&out.NodeTemplateSpec) + if in.MachineConfiguration != nil { + in, out := &in.MachineConfiguration, &out.MachineConfiguration + *out = new(MachineConfiguration) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. +func (in *MachineSpec) DeepCopy() *MachineSpec { + if in == nil { + return nil + } + out := new(MachineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineStatus) DeepCopyInto(out *MachineStatus) { + *out = *in + if in.Addresses != nil { + in, out := &in.Addresses, &out.Addresses + *out = make([]v1.NodeAddress, len(*in)) + copy(*out, *in) + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.NodeCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.LastOperation.DeepCopyInto(&out.LastOperation) + in.CurrentStatus.DeepCopyInto(&out.CurrentStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineStatus. +func (in *MachineStatus) DeepCopy() *MachineStatus { + if in == nil { + return nil + } + out := new(MachineStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSummary) DeepCopyInto(out *MachineSummary) { + *out = *in + in.LastOperation.DeepCopyInto(&out.LastOperation) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSummary. +func (in *MachineSummary) DeepCopy() *MachineSummary { + if in == nil { + return nil + } + out := new(MachineSummary) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineTemplateSpec) DeepCopyInto(out *MachineTemplateSpec) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateSpec. +func (in *MachineTemplateSpec) DeepCopy() *MachineTemplateSpec { + if in == nil { + return nil + } + out := new(MachineTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeTemplate) DeepCopyInto(out *NodeTemplate) { + *out = *in + if in.Capacity != nil { + in, out := &in.Capacity, &out.Capacity + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.VirtualCapacity != nil { + in, out := &in.VirtualCapacity, &out.VirtualCapacity + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.Architecture != nil { + in, out := &in.Architecture, &out.Architecture + *out = new(string) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplate. +func (in *NodeTemplate) DeepCopy() *NodeTemplate { + if in == nil { + return nil + } + out := new(NodeTemplate) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeTemplateSpec) DeepCopyInto(out *NodeTemplateSpec) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplateSpec. +func (in *NodeTemplateSpec) DeepCopy() *NodeTemplateSpec { + if in == nil { + return nil + } + out := new(NodeTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RollbackConfig) DeepCopyInto(out *RollbackConfig) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollbackConfig. +func (in *RollbackConfig) DeepCopy() *RollbackConfig { + if in == nil { + return nil + } + out := new(RollbackConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RollingUpdateMachineDeployment) DeepCopyInto(out *RollingUpdateMachineDeployment) { + *out = *in + in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateMachineDeployment. +func (in *RollingUpdateMachineDeployment) DeepCopy() *RollingUpdateMachineDeployment { + if in == nil { + return nil + } + out := new(RollingUpdateMachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UpdateConfiguration) DeepCopyInto(out *UpdateConfiguration) { + *out = *in + if in.MaxUnavailable != nil { + in, out := &in.MaxUnavailable, &out.MaxUnavailable + *out = new(intstr.IntOrString) + **out = **in + } + if in.MaxSurge != nil { + in, out := &in.MaxSurge, &out.MaxSurge + *out = new(intstr.IntOrString) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateConfiguration. +func (in *UpdateConfiguration) DeepCopy() *UpdateConfiguration { + if in == nil { + return nil + } + out := new(UpdateConfiguration) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/apis/machine/v1alpha1/zz_generated.defaults.go b/pkg/apis/machine/v1alpha1/zz_generated.defaults.go new file mode 100644 index 000000000..dce68e638 --- /dev/null +++ b/pkg/apis/machine/v1alpha1/zz_generated.defaults.go @@ -0,0 +1,21 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by defaulter-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// RegisterDefaults adds defaulters functions to the given scheme. +// Public to allow building arbitrary schemes. +// All generated defaulters are covering - they call all nested defaulters. +func RegisterDefaults(scheme *runtime.Scheme) error { + return nil +} diff --git a/pkg/apis/machine/zz_generated.deepcopy.go b/pkg/apis/machine/zz_generated.deepcopy.go new file mode 100644 index 000000000..2f1d11e2f --- /dev/null +++ b/pkg/apis/machine/zz_generated.deepcopy.go @@ -0,0 +1,906 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package machine + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + intstr "k8s.io/apimachinery/pkg/util/intstr" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClassSpec) DeepCopyInto(out *ClassSpec) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClassSpec. +func (in *ClassSpec) DeepCopy() *ClassSpec { + if in == nil { + return nil + } + out := new(ClassSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CurrentStatus) DeepCopyInto(out *CurrentStatus) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + if in.PreserveExpiryTime != nil { + in, out := &in.PreserveExpiryTime, &out.PreserveExpiryTime + *out = (*in).DeepCopy() + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CurrentStatus. +func (in *CurrentStatus) DeepCopy() *CurrentStatus { + if in == nil { + return nil + } + out := new(CurrentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InPlaceUpdateMachineDeployment) DeepCopyInto(out *InPlaceUpdateMachineDeployment) { + *out = *in + in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InPlaceUpdateMachineDeployment. +func (in *InPlaceUpdateMachineDeployment) DeepCopy() *InPlaceUpdateMachineDeployment { + if in == nil { + return nil + } + out := new(InPlaceUpdateMachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LastOperation) DeepCopyInto(out *LastOperation) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LastOperation. +func (in *LastOperation) DeepCopy() *LastOperation { + if in == nil { + return nil + } + out := new(LastOperation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Machine) DeepCopyInto(out *Machine) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.TypeMeta = in.TypeMeta + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Machine. +func (in *Machine) DeepCopy() *Machine { + if in == nil { + return nil + } + out := new(Machine) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Machine) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineClass) DeepCopyInto(out *MachineClass) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + if in.NodeTemplate != nil { + in, out := &in.NodeTemplate, &out.NodeTemplate + *out = new(NodeTemplate) + (*in).DeepCopyInto(*out) + } + if in.CredentialsSecretRef != nil { + in, out := &in.CredentialsSecretRef, &out.CredentialsSecretRef + *out = new(v1.SecretReference) + **out = **in + } + in.ProviderSpec.DeepCopyInto(&out.ProviderSpec) + if in.SecretRef != nil { + in, out := &in.SecretRef, &out.SecretRef + *out = new(v1.SecretReference) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClass. +func (in *MachineClass) DeepCopy() *MachineClass { + if in == nil { + return nil + } + out := new(MachineClass) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineClass) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineClassList) DeepCopyInto(out *MachineClassList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineClass, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineClassList. +func (in *MachineClassList) DeepCopy() *MachineClassList { + if in == nil { + return nil + } + out := new(MachineClassList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineClassList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineConfiguration) DeepCopyInto(out *MachineConfiguration) { + *out = *in + if in.MachineDrainTimeout != nil { + in, out := &in.MachineDrainTimeout, &out.MachineDrainTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineHealthTimeout != nil { + in, out := &in.MachineHealthTimeout, &out.MachineHealthTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineCreationTimeout != nil { + in, out := &in.MachineCreationTimeout, &out.MachineCreationTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachineInPlaceUpdateTimeout != nil { + in, out := &in.MachineInPlaceUpdateTimeout, &out.MachineInPlaceUpdateTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.MachinePreserveTimeout != nil { + in, out := &in.MachinePreserveTimeout, &out.MachinePreserveTimeout + *out = new(metav1.Duration) + **out = **in + } + if in.DisableHealthTimeout != nil { + in, out := &in.DisableHealthTimeout, &out.DisableHealthTimeout + *out = new(bool) + **out = **in + } + if in.MaxEvictRetries != nil { + in, out := &in.MaxEvictRetries, &out.MaxEvictRetries + *out = new(int32) + **out = **in + } + if in.NodeConditions != nil { + in, out := &in.NodeConditions, &out.NodeConditions + *out = new(string) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineConfiguration. +func (in *MachineConfiguration) DeepCopy() *MachineConfiguration { + if in == nil { + return nil + } + out := new(MachineConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeployment) DeepCopyInto(out *MachineDeployment) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeployment. +func (in *MachineDeployment) DeepCopy() *MachineDeployment { + if in == nil { + return nil + } + out := new(MachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineDeployment) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentCondition) DeepCopyInto(out *MachineDeploymentCondition) { + *out = *in + in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentCondition. +func (in *MachineDeploymentCondition) DeepCopy() *MachineDeploymentCondition { + if in == nil { + return nil + } + out := new(MachineDeploymentCondition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentList) DeepCopyInto(out *MachineDeploymentList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineDeployment, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentList. +func (in *MachineDeploymentList) DeepCopy() *MachineDeploymentList { + if in == nil { + return nil + } + out := new(MachineDeploymentList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineDeploymentList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentRollback) DeepCopyInto(out *MachineDeploymentRollback) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.UpdatedAnnotations != nil { + in, out := &in.UpdatedAnnotations, &out.UpdatedAnnotations + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + out.RollbackTo = in.RollbackTo + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentRollback. +func (in *MachineDeploymentRollback) DeepCopy() *MachineDeploymentRollback { + if in == nil { + return nil + } + out := new(MachineDeploymentRollback) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineDeploymentRollback) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { + *out = *in + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + in.Template.DeepCopyInto(&out.Template) + in.Strategy.DeepCopyInto(&out.Strategy) + if in.RevisionHistoryLimit != nil { + in, out := &in.RevisionHistoryLimit, &out.RevisionHistoryLimit + *out = new(int32) + **out = **in + } + if in.RollbackTo != nil { + in, out := &in.RollbackTo, &out.RollbackTo + *out = new(RollbackConfig) + **out = **in + } + if in.ProgressDeadlineSeconds != nil { + in, out := &in.ProgressDeadlineSeconds, &out.ProgressDeadlineSeconds + *out = new(int32) + **out = **in + } + if in.AutoPreserveFailedMachineMax != nil { + in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentSpec. +func (in *MachineDeploymentSpec) DeepCopy() *MachineDeploymentSpec { + if in == nil { + return nil + } + out := new(MachineDeploymentSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentStatus) DeepCopyInto(out *MachineDeploymentStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]MachineDeploymentCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.CollisionCount != nil { + in, out := &in.CollisionCount, &out.CollisionCount + *out = new(int32) + **out = **in + } + if in.FailedMachines != nil { + in, out := &in.FailedMachines, &out.FailedMachines + *out = make([]*MachineSummary, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(MachineSummary) + (*in).DeepCopyInto(*out) + } + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStatus. +func (in *MachineDeploymentStatus) DeepCopy() *MachineDeploymentStatus { + if in == nil { + return nil + } + out := new(MachineDeploymentStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineDeploymentStrategy) DeepCopyInto(out *MachineDeploymentStrategy) { + *out = *in + if in.RollingUpdate != nil { + in, out := &in.RollingUpdate, &out.RollingUpdate + *out = new(RollingUpdateMachineDeployment) + (*in).DeepCopyInto(*out) + } + if in.InPlaceUpdate != nil { + in, out := &in.InPlaceUpdate, &out.InPlaceUpdate + *out = new(InPlaceUpdateMachineDeployment) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStrategy. +func (in *MachineDeploymentStrategy) DeepCopy() *MachineDeploymentStrategy { + if in == nil { + return nil + } + out := new(MachineDeploymentStrategy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineList) DeepCopyInto(out *MachineList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Machine, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineList. +func (in *MachineList) DeepCopy() *MachineList { + if in == nil { + return nil + } + out := new(MachineList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSet) DeepCopyInto(out *MachineSet) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.TypeMeta = in.TypeMeta + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSet. +func (in *MachineSet) DeepCopy() *MachineSet { + if in == nil { + return nil + } + out := new(MachineSet) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineSet) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetCondition) DeepCopyInto(out *MachineSetCondition) { + *out = *in + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetCondition. +func (in *MachineSetCondition) DeepCopy() *MachineSetCondition { + if in == nil { + return nil + } + out := new(MachineSetCondition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetList) DeepCopyInto(out *MachineSetList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineSet, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetList. +func (in *MachineSetList) DeepCopy() *MachineSetList { + if in == nil { + return nil + } + out := new(MachineSetList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineSetList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { + *out = *in + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + out.MachineClass = in.MachineClass + in.Template.DeepCopyInto(&out.Template) + if in.AutoPreserveFailedMachineMax != nil { + in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetSpec. +func (in *MachineSetSpec) DeepCopy() *MachineSetSpec { + if in == nil { + return nil + } + out := new(MachineSetSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]MachineSetCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.LastOperation.DeepCopyInto(&out.LastOperation) + if in.FailedMachines != nil { + in, out := &in.FailedMachines, &out.FailedMachines + *out = new([]MachineSummary) + if **in != nil { + in, out := *in, *out + *out = make([]MachineSummary, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + } + if in.AutoPreserveFailedMachineCount != nil { + in, out := &in.AutoPreserveFailedMachineCount, &out.AutoPreserveFailedMachineCount + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSetStatus. +func (in *MachineSetStatus) DeepCopy() *MachineSetStatus { + if in == nil { + return nil + } + out := new(MachineSetStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { + *out = *in + out.Class = in.Class + in.NodeTemplateSpec.DeepCopyInto(&out.NodeTemplateSpec) + if in.MachineConfiguration != nil { + in, out := &in.MachineConfiguration, &out.MachineConfiguration + *out = new(MachineConfiguration) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. +func (in *MachineSpec) DeepCopy() *MachineSpec { + if in == nil { + return nil + } + out := new(MachineSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineStatus) DeepCopyInto(out *MachineStatus) { + *out = *in + if in.Addresses != nil { + in, out := &in.Addresses, &out.Addresses + *out = make([]v1.NodeAddress, len(*in)) + copy(*out, *in) + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.NodeCondition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.LastOperation.DeepCopyInto(&out.LastOperation) + in.CurrentStatus.DeepCopyInto(&out.CurrentStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineStatus. +func (in *MachineStatus) DeepCopy() *MachineStatus { + if in == nil { + return nil + } + out := new(MachineStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineSummary) DeepCopyInto(out *MachineSummary) { + *out = *in + in.LastOperation.DeepCopyInto(&out.LastOperation) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSummary. +func (in *MachineSummary) DeepCopy() *MachineSummary { + if in == nil { + return nil + } + out := new(MachineSummary) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineTemplate) DeepCopyInto(out *MachineTemplate) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Template.DeepCopyInto(&out.Template) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplate. +func (in *MachineTemplate) DeepCopy() *MachineTemplate { + if in == nil { + return nil + } + out := new(MachineTemplate) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineTemplate) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineTemplateList) DeepCopyInto(out *MachineTemplateList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MachineTemplate, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateList. +func (in *MachineTemplateList) DeepCopy() *MachineTemplateList { + if in == nil { + return nil + } + out := new(MachineTemplateList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MachineTemplateList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineTemplateSpec) DeepCopyInto(out *MachineTemplateSpec) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineTemplateSpec. +func (in *MachineTemplateSpec) DeepCopy() *MachineTemplateSpec { + if in == nil { + return nil + } + out := new(MachineTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeTemplate) DeepCopyInto(out *NodeTemplate) { + *out = *in + if in.Capacity != nil { + in, out := &in.Capacity, &out.Capacity + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.VirtualCapacity != nil { + in, out := &in.VirtualCapacity, &out.VirtualCapacity + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + if in.Architecture != nil { + in, out := &in.Architecture, &out.Architecture + *out = new(string) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplate. +func (in *NodeTemplate) DeepCopy() *NodeTemplate { + if in == nil { + return nil + } + out := new(NodeTemplate) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeTemplateSpec) DeepCopyInto(out *NodeTemplateSpec) { + *out = *in + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTemplateSpec. +func (in *NodeTemplateSpec) DeepCopy() *NodeTemplateSpec { + if in == nil { + return nil + } + out := new(NodeTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RollbackConfig) DeepCopyInto(out *RollbackConfig) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollbackConfig. +func (in *RollbackConfig) DeepCopy() *RollbackConfig { + if in == nil { + return nil + } + out := new(RollbackConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RollingUpdateMachineDeployment) DeepCopyInto(out *RollingUpdateMachineDeployment) { + *out = *in + in.UpdateConfiguration.DeepCopyInto(&out.UpdateConfiguration) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateMachineDeployment. +func (in *RollingUpdateMachineDeployment) DeepCopy() *RollingUpdateMachineDeployment { + if in == nil { + return nil + } + out := new(RollingUpdateMachineDeployment) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UpdateConfiguration) DeepCopyInto(out *UpdateConfiguration) { + *out = *in + if in.MaxUnavailable != nil { + in, out := &in.MaxUnavailable, &out.MaxUnavailable + *out = new(intstr.IntOrString) + **out = **in + } + if in.MaxSurge != nil { + in, out := &in.MaxSurge, &out.MaxSurge + *out = new(intstr.IntOrString) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpdateConfiguration. +func (in *UpdateConfiguration) DeepCopy() *UpdateConfiguration { + if in == nil { + return nil + } + out := new(UpdateConfiguration) + in.DeepCopyInto(out) + return out +} From 2f4fa29b09c8e83ad521f1f96f0b25d50aaf97d3 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 8 Jan 2026 14:19:11 +0530 Subject: [PATCH 43/79] Address review comments - part 6: Replace function preserveExpiryTimeSet to nil check --- pkg/controller/machineset.go | 6 +++--- pkg/util/provider/machinecontroller/machine.go | 5 ++--- pkg/util/provider/machinecontroller/machine_util.go | 7 +++---- pkg/util/provider/machineutils/utils.go | 5 ----- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 9e9b03f62..861b3bd96 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -484,7 +484,7 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 // or if it is a candidate for auto-preservation // TODO@thiyyakat: find more suitable name for function func (c *controller) isMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { - if machineutils.IsPreserveExpiryTimeSet(machine) && !machineutils.HasPreservationTimedOut(machine) { + if machine.Status.CurrentStatus.PreserveExpiryTime != nil && !machineutils.HasPreservationTimedOut(machine) { klog.V(3).Infof("Machine %q is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true, nil } @@ -497,7 +497,7 @@ func (c *controller) isMachineCandidateForPreservation(ctx context.Context, mach return false, nil } } - if *machineSet.Status.AutoPreserveFailedMachineCount < *machineSet.Spec.AutoPreserveFailedMachineMax { + if machineSet.Status.AutoPreserveFailedMachineCount != nil && machineSet.Spec.AutoPreserveFailedMachineMax != nil && *machineSet.Status.AutoPreserveFailedMachineCount < *machineSet.Spec.AutoPreserveFailedMachineMax { err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { return true, err @@ -719,7 +719,7 @@ func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machi preservedMachines := make([]*v1alpha1.Machine, 0, len(machines)) otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) for _, mc := range machines { - if machineutils.IsPreserveExpiryTimeSet(mc) { + if mc.Status.CurrentStatus.PreserveExpiryTime != nil { preservedMachines = append(preservedMachines, mc) } else { otherMachines = append(otherMachines, mc) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 93fc4e548..598539b2e 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -759,8 +759,7 @@ func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotati return existsInOld != existsInNew || valueOld != valueNew } // Special case: annotation changed from 'now' to 'when-failed' - isPreserved := machineutils.IsPreserveExpiryTimeSet(machine) - if !isPreserved { + if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return true } if machineutils.IsMachineFailed(machine) { @@ -809,7 +808,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if !isPreserveAnnotationValueValid(preserveValue) { klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (machineutils.IsPreserveExpiryTimeSet(clone) && machineutils.HasPreservationTimedOut(clone)) { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && machineutils.HasPreservationTimedOut(clone)) { err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index fe8bdaa97..7080fcd3c 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2181,7 +2181,7 @@ func (c *controller) canMarkMachineFailed(machineDeployName, machineName, namesp if machine.Status.CurrentStatus.Phase != v1alpha1.MachineUnknown && machine.Status.CurrentStatus.Phase != v1alpha1.MachineRunning { // since Preserved Failed machines are not replaced immediately, // they need not be considered towards inProgress - if !machineutils.IsPreserveExpiryTimeSet(machine) { + if machine.Status.CurrentStatus.PreserveExpiryTime == nil { inProgress++ } switch machine.Status.CurrentStatus.Phase { @@ -2370,9 +2370,8 @@ Utility Functions for Machine Preservation // preserveMachine contains logic to start the preservation of a machine and node. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { nodeName := machine.Labels[v1alpha1.NodeLabelKey] - isExpirySet := machineutils.IsPreserveExpiryTimeSet(machine) updatedMachine := machine.DeepCopy() - if !isExpirySet { + if machine.Status.CurrentStatus.PreserveExpiryTime == nil { klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) // Step 1: Add preserveExpiryTime to machine status updatedMachine, err := c.setPreserveExpiryTimeOnMachine(ctx, updatedMachine) @@ -2526,7 +2525,7 @@ func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCond func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) error { // removal of preserveExpiryTime is the last step of stopping preservation // if preserveExpiryTime is not set, preservation is already stopped - if !machineutils.IsPreserveExpiryTimeSet(machine) { + if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return nil } nodeName := machine.Labels[v1alpha1.NodeLabelKey] diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 0e0ec144d..ea968fb76 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -152,11 +152,6 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } -// IsPreserveExpiryTimeSet checks if machine is preserved by MCM -func IsPreserveExpiryTimeSet(m *v1alpha1.Machine) bool { - return !m.Status.CurrentStatus.PreserveExpiryTime.IsZero() -} - // HasPreservationTimedOut checks if the Status.CurrentStatus.PreserveExpiryTime has not yet passed func HasPreservationTimedOut(m *v1alpha1.Machine) bool { return !m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) From 652f09417b83565ced4d13c8805b4e313a9c969a Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 13 Jan 2026 12:53:18 +0530 Subject: [PATCH 44/79] Address review comments - part 7: * additionally, add tests for isFailedMachineCandidateForPreservation() --- pkg/apis/machine/types.go | 4 +- .../v1alpha1/machinedeployment_types.go | 2 +- pkg/apis/machine/v1alpha1/machineset_types.go | 2 +- pkg/controller/deployment_machineset_util.go | 4 +- pkg/controller/machineset.go | 25 +- pkg/controller/machineset_test.go | 216 ++++++++++++++++++ .../provider/machinecontroller/machine.go | 19 +- .../machinecontroller/machine_test.go | 2 +- .../machinecontroller/machine_util.go | 15 +- 9 files changed, 253 insertions(+), 36 deletions(-) diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index d9ae90a19..093896770 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -417,7 +417,7 @@ type MachineSetStatus struct { // FailedMachines has summary of machines on which lastOperation Failed FailedMachines *[]MachineSummary - // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved AutoPreserveFailedMachineCount *int32 } @@ -498,7 +498,7 @@ type MachineDeploymentSpec struct { // by default. ProgressDeadlineSeconds *int32 - // The maximum number of machines in the machine deployment that will be auto-preserved. + // The maximum number of machines in the machine deployment that can be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments AutoPreserveFailedMachineMax *int32 } diff --git a/pkg/apis/machine/v1alpha1/machinedeployment_types.go b/pkg/apis/machine/v1alpha1/machinedeployment_types.go index 3fc6487f7..e16914373 100644 --- a/pkg/apis/machine/v1alpha1/machinedeployment_types.go +++ b/pkg/apis/machine/v1alpha1/machinedeployment_types.go @@ -92,7 +92,7 @@ type MachineDeploymentSpec struct { // +optional ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty"` - // The maximum number of machines in the machine deployment that will be auto-preserved. + // The maximum number of machines in the machine deployment that can be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments // +optional AutoPreserveFailedMachineMax *int32 `json:"autoPreserveFailedMachineMax,omitempty"` diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 2e6d24bd7..55675efdb 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -139,7 +139,7 @@ type MachineSetStatus struct { // +optional FailedMachines *[]MachineSummary `json:"failedMachines,omitempty"` - // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved + // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved // +optional AutoPreserveFailedMachineCount *int32 `json:"autoPreserveFailedMachineCount,omitempty"` } diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index 6d352ca48..efabaed06 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -50,7 +50,7 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin is.Generation == is.Status.ObservedGeneration && reflect.DeepEqual(is.Status.Conditions, newStatus.Conditions) && reflect.DeepEqual(is.Status.FailedMachines, newStatus.FailedMachines) && - is.Status.AutoPreserveFailedMachineCount == newStatus.AutoPreserveFailedMachineCount { + *is.Status.AutoPreserveFailedMachineCount == *newStatus.AutoPreserveFailedMachineCount { return is, nil } @@ -68,7 +68,7 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin fmt.Sprintf("fullyLabeledReplicas %d->%d, ", is.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas)+ fmt.Sprintf("readyReplicas %d->%d, ", is.Status.ReadyReplicas, newStatus.ReadyReplicas)+ fmt.Sprintf("availableReplicas %d->%d, ", is.Status.AvailableReplicas, newStatus.AvailableReplicas)+ - fmt.Sprintf("sequence No: %v->%v", is.Status.ObservedGeneration, newStatus.ObservedGeneration)+ + fmt.Sprintf("sequence No: %v->%v,", is.Status.ObservedGeneration, newStatus.ObservedGeneration)+ fmt.Sprintf("autoPreserveFailedMachineCount %v->%v", is.Status.AutoPreserveFailedMachineCount, newStatus.AutoPreserveFailedMachineCount)) is.Status = newStatus diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 861b3bd96..d0a97ac27 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -480,10 +480,10 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 return nil } -// isMachineCandidateForPreservation checks if the machine is already preserved, in the process of being preserved +// isFailedMachineCandidateForPreservation checks if the machine is already preserved, in the process of being preserved // or if it is a candidate for auto-preservation // TODO@thiyyakat: find more suitable name for function -func (c *controller) isMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { +func (c *controller) isFailedMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { if machine.Status.CurrentStatus.PreserveExpiryTime != nil && !machineutils.HasPreservationTimedOut(machine) { klog.V(3).Infof("Machine %q is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true, nil @@ -497,6 +497,17 @@ func (c *controller) isMachineCandidateForPreservation(ctx context.Context, mach return false, nil } } + // check if backing node is annotated with preserve=false, if yes, do not consider for preservation + if machine.Labels[v1alpha1.NodeLabelKey] != "" { + // check if backing node has preserve=false annotation, if yes, do not auto-preserve + node, err := c.nodeLister.Get(machine.Labels[v1alpha1.NodeLabelKey]) + if err != nil { + return false, err // we return true here to avoid losing the machine in case of any error fetching the node + } + if val, exists = node.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueFalse { + return false, nil + } + } if machineSet.Status.AutoPreserveFailedMachineCount != nil && machineSet.Spec.AutoPreserveFailedMachineMax != nil && *machineSet.Status.AutoPreserveFailedMachineCount < *machineSet.Spec.AutoPreserveFailedMachineMax { err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { @@ -949,16 +960,6 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. } func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { - if m.Labels[v1alpha1.NodeLabelKey] != "" { - // check if backing node has preserve=false annotation, if yes, do not auto-preserve - node, err := dc.nodeLister.Get(m.Labels[v1alpha1.NodeLabelKey]) - if err != nil { - return err - } - if val, exists := node.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueFalse { - return nil - } - } _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { if clone.Annotations == nil { clone.Annotations = make(map[string]string) diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 88d847017..689426124 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -8,6 +8,7 @@ import ( "context" "errors" "fmt" + corev1 "k8s.io/api/core/v1" "sync" "time" @@ -1049,6 +1050,221 @@ var _ = Describe("machineset", func() { }) }) + Describe("#isFailedMachineCandidateForPreservation", func() { + + type setup struct { + autoPreserveFailedMachineMax int32 + machineIsPreserved bool + machinePreserveAnnotationValue string + backingNode *corev1.Node + } + type expect struct { + result bool + err error + } + type testCase struct { + setup setup + expect expect + } + + DescribeTable("isFailedMachineCandidateForPreservation test cases", func(tc testCase) { + stop := make(chan struct{}) + defer close(stop) + + objects := []runtime.Object{} + + testMachineSet := &machinev1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "MachineSet-test", + Namespace: testNamespace, + Labels: map[string]string{ + "test-label": "test-label", + }, + UID: "1234567", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "MachineSet", + APIVersion: "machine.sapcloud.io/v1alpha1", + }, + Spec: machinev1.MachineSetSpec{ + Replicas: 2, + Template: machinev1.MachineTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "test-label": "test-label", + }, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test-label": "test-label", + }, + }, + AutoPreserveFailedMachineMax: ptr.To(tc.setup.autoPreserveFailedMachineMax), + }, + Status: machinev1.MachineSetStatus{ + AutoPreserveFailedMachineCount: ptr.To(int32(0)), + }, + } + + testMachine := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + UID: "1234568", + Labels: map[string]string{ + "test-label": "test-label", + }, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: tc.setup.machinePreserveAnnotationValue, + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + APIVersion: "machine.sapcloud.io/v1alpha1", + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineFailed, + }, + }, + } + if tc.setup.backingNode != nil { + testMachine.Labels[machinev1.NodeLabelKey] = "node-1" + } + if tc.setup.machineIsPreserved { + testMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{ + Time: time.Now().Add(1 * time.Hour), + } + } + + objects = append(objects, testMachineSet, testMachine) + var targetCoreObjects []runtime.Object + if tc.setup.backingNode != nil { + targetCoreObjects = append(targetCoreObjects, tc.setup.backingNode) + } + c, trackers := createController(stop, testNamespace, objects, nil, targetCoreObjects) + defer trackers.Stop() + waitForCacheSync(stop, c) + result, err := c.isFailedMachineCandidateForPreservation(context.TODO(), testMachineSet, testMachine) + Expect(result).To(Equal(tc.expect.result)) + if tc.expect.err != nil { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + return + } + Expect(err).ToNot(HaveOccurred()) + }, + Entry("should return false for un-preserved machine, when autoPreserveFailedMachineMax is 0", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 0, + machineIsPreserved: false, + backingNode: nil, + }, + expect: expect{ + result: false, + err: nil, + }, + }), + Entry("should return true for un-preserved machine, when autoPreserveFailedMachineMax is 1", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + backingNode: nil, + }, + expect: expect{ + result: true, + err: nil, + }, + }), + Entry("should return true for machine annotated with preserve=now", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: true, + machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + backingNode: nil, + }, + expect: expect{ + result: true, + err: nil, + }, + }), + Entry("should return true for machine annotated with preserve=when-failed, but not yet preserved", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + backingNode: nil, + }, + expect: expect{ + result: true, + err: nil, + }, + }), + Entry("should return false for machine annotated with preserve=false", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueFalse, + backingNode: nil, + }, + expect: expect{ + result: false, + err: nil, + }, + }), + Entry("should return false when backing node is annotated with preserve=false", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + backingNode: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueFalse, + }, + }, + }, + }, + expect: expect{ + result: false, + err: nil, + }, + }), + Entry("should return true when backing node has no preserve-annotation, and autoPreserveFailedMachineMax is 1", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + backingNode: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{}, + }, + }, + }, + expect: expect{ + result: true, + err: nil, + }, + }), + Entry("should return error when backing node is not found", testCase{ + setup: setup{ + autoPreserveFailedMachineMax: 1, + machineIsPreserved: false, + backingNode: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "invalid", + Annotations: map[string]string{}, + }, + }, + }, + expect: expect{ + result: false, + err: errors.New("node \"node-1\" not found"), + }, + }), + ) + }) // TODO: This method has dependency on generic-machineclass. Implement later. Describe("#reconcileClusterMachineSet", func() { var ( diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 598539b2e..6d0b65e17 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -783,8 +783,9 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if err != nil { if apierrors.IsConflict(err) { retry = machineutils.ConflictRetry + } else { + retry = machineutils.ShortRetry } - retry = machineutils.ShortRetry } else { retry = machineutils.LongRetry } @@ -812,8 +813,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { - machineFailed := machineutils.IsMachineFailed(clone) - if machineFailed { + if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) } // Here, if the preserve value is when-failed, but the machine is in running, there could be 2 possibilities: @@ -887,16 +887,3 @@ func isPreserveAnnotationValueValid(preserveValue string) bool { _, exists := machineutils.AllowedPreserveAnnotationValues[preserveValue] return exists } - -// isPreservedNodeConditionStatusTrue check if all the steps in the preservation logic have been completed for the machine -// if the machine has no backing node, only PreserveExpiryTime needs to be set -// if the machine has a backing node, the NodePreserved condition on the node needs to be true -func (c *controller) isPreservedNodeConditionStatusTrue(cond *corev1.NodeCondition) bool { - if cond == nil { - return false - } - if cond.Status == corev1.ConditionTrue { - return true - } - return false -} diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 62a6b7ed1..e3b72fd2c 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4286,7 +4286,7 @@ var _ = Describe("machine", func() { expect expect } - DescribeTable("manageMachinePreservation behavior", + DescribeTable("manageMachinePreservation behavior scenarios", func(tc testCase) { stop := make(chan struct{}) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 7080fcd3c..f52ad0147 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2451,6 +2451,19 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine return updatedMachine, nil } +// isPreservedNodeConditionStatusTrue check if all the steps in the preservation logic have been completed for the machine +// if the machine has no backing node, only PreserveExpiryTime needs to be set +// if the machine has a backing node, the NodePreserved condition on the node needs to be true +func (c *controller) isPreservedNodeConditionStatusTrue(cond *v1.NodeCondition) bool { + if cond == nil { + return false + } + if cond.Status == v1.ConditionTrue { + return true + } + return false +} + // addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { // Check if annotation already exists with correct value @@ -2562,7 +2575,7 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } } } - // Step 3: update machine status to set preserve expiry time to metav1.Time{} + // Step 3: update machine status to set preserve expiry time to nil clone := machine.DeepCopy() clone.Status.CurrentStatus.PreserveExpiryTime = nil clone.Status.CurrentStatus.LastUpdateTime = metav1.Now() From a6faa511c038f948e324394db129985dd5aeb422 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 13 Jan 2026 13:14:04 +0530 Subject: [PATCH 45/79] Address review comments - part 7: * additionally, add tests for isFailedMachineCandidateForPreservation() --- Makefile | 6 +- docs/documents/apis.md | 266 +++--------------- ...achine.sapcloud.io_machinedeployments.yaml | 2 +- .../crds/machine.sapcloud.io_machinesets.yaml | 2 +- pkg/apis/machine/types.go | 8 +- .../v1alpha1/machinedeployment_types.go | 4 +- pkg/apis/machine/v1alpha1/machineset_types.go | 4 +- .../v1alpha1/zz_generated.conversion.go | 12 +- .../machine/v1alpha1/zz_generated.deepcopy.go | 15 - pkg/apis/machine/zz_generated.deepcopy.go | 15 - pkg/controller/deployment_machineset_util.go | 5 +- pkg/controller/machineset.go | 8 +- pkg/controller/machineset_test.go | 4 +- pkg/openapi/openapi_generated.go | 4 +- .../provider/machinecontroller/machine.go | 10 +- 15 files changed, 80 insertions(+), 285 deletions(-) diff --git a/Makefile b/Makefile index aba0236ac..1b05c478b 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @./hack/generate-code - @./hack/api-reference/generate-spec-doc.sh + @GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @GOFLAGS="-buildvcs=false" ./hack/generate-code + @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index 0918a46fb..7e9c072b2 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -46,15 +46,11 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type

ObjectMeta for machine object

-Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -205,15 +201,11 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type (Optional) -Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -238,9 +230,7 @@ NodeTemplate - -Kubernetes core/v1.SecretReference - +invalid type @@ -254,9 +244,7 @@ This might be useful if multiple machine classes with the same credentials but d - -k8s.io/apimachinery/pkg/runtime.RawExtension - +invalid type @@ -282,9 +270,7 @@ string - -Kubernetes core/v1.SecretReference - +invalid type @@ -339,16 +325,12 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type (Optional)

Standard object metadata.

-Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -389,9 +371,7 @@ zero and not specified. Defaults to 0.

- -Kubernetes meta/v1.LabelSelector - +invalid type @@ -519,12 +499,12 @@ by default, which is treated as infinite deadline.

-*int32 +int32 (Optional) -

The maximum number of machines in the machine deployment that will be auto-preserved. +

The maximum number of machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

@@ -595,15 +575,11 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type (Optional) -Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -641,9 +617,7 @@ int32 - -Kubernetes meta/v1.LabelSelector - +invalid type @@ -699,7 +673,7 @@ int32 -*int32 +int32 @@ -852,9 +826,7 @@ bool - -Kubernetes meta/v1.Time - +invalid type @@ -867,9 +839,7 @@ Kubernetes meta/v1.Time - -Kubernetes meta/v1.Time - +invalid type @@ -987,9 +957,7 @@ string - -Kubernetes meta/v1.Time - +invalid type @@ -1054,9 +1022,7 @@ MachineOperationType - -Kubernetes meta/v1.Duration - +invalid type @@ -1070,9 +1036,7 @@ Kubernetes meta/v1.Duration - -Kubernetes meta/v1.Duration - +invalid type @@ -1086,9 +1050,7 @@ Kubernetes meta/v1.Duration - -Kubernetes meta/v1.Duration - +invalid type @@ -1102,9 +1064,7 @@ Kubernetes meta/v1.Duration - -Kubernetes meta/v1.Duration - +invalid type @@ -1118,9 +1078,7 @@ Kubernetes meta/v1.Duration - -Kubernetes meta/v1.Duration - +invalid type @@ -1229,9 +1187,7 @@ ConditionStatus - -Kubernetes meta/v1.Time - +invalid type @@ -1244,9 +1200,7 @@ Kubernetes meta/v1.Time - -Kubernetes meta/v1.Time - +invalid type @@ -1333,9 +1287,7 @@ zero and not specified. Defaults to 0.

- -Kubernetes meta/v1.LabelSelector - +invalid type @@ -1463,12 +1415,12 @@ by default, which is treated as infinite deadline.

-*int32 +int32 (Optional) -

The maximum number of machines in the machine deployment that will be auto-preserved. +

The maximum number of machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

@@ -1617,8 +1569,8 @@ newest MachineSet.

- -[]*github.com/thiyyakat/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary + +[]*github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary @@ -1792,9 +1744,7 @@ ConditionStatus - -Kubernetes meta/v1.Time - +invalid type @@ -1882,9 +1832,7 @@ int32 - -Kubernetes meta/v1.LabelSelector - +invalid type @@ -1940,7 +1888,7 @@ int32 -*int32 +int32 @@ -2075,8 +2023,8 @@ LastOperation - -[]github.com/thiyyakat/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary + +[]github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary @@ -2091,12 +2039,12 @@ LastOperation -*int32 +int32 (Optional) -

AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved

+

AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved

@@ -2226,9 +2174,7 @@ MachineConfiguration - -[]Kubernetes core/v1.NodeAddress - +[]invalid type @@ -2243,9 +2189,7 @@ be used by clients to determine how to connect to the machine, instead of the - -[]Kubernetes core/v1.NodeCondition - +[]invalid type @@ -2398,17 +2342,13 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type (Optional)

Standard object’s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata

-Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -2525,9 +2465,7 @@ MachineConfiguration - -Kubernetes core/v1.ResourceList - +invalid type @@ -2540,9 +2478,7 @@ Kubernetes core/v1.ResourceList - -Kubernetes core/v1.ResourceList - +invalid type @@ -2631,15 +2567,11 @@ string - -Kubernetes meta/v1.ObjectMeta - +invalid type (Optional) -Refer to the Kubernetes API documentation for the fields of the -metadata field. @@ -2648,9 +2580,7 @@ Refer to the Kubernetes API documentation for the fields of the - -Kubernetes core/v1.NodeSpec - +invalid type @@ -2659,112 +2589,6 @@ Kubernetes core/v1.NodeSpec

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-podCIDR - - -string - - -(Optional) -

PodCIDR represents the pod IP range assigned to the node.

-
-podCIDRs - - -[]string - - -(Optional) -

podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this -field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for -each of IPv4 and IPv6.

-
-providerID - - -string - - -(Optional) -

ID of the node assigned by the cloud provider in the format: ://

-
-unschedulable - - -bool - - -(Optional) -

Unschedulable controls node schedulability of new pods. By default, node is schedulable. -More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration

-
-taints - - - -[]Kubernetes core/v1.Taint - - - -(Optional) -

If specified, the node’s taints.

-
-configSource - - - -Kubernetes core/v1.NodeConfigSource - - - -(Optional) -

Deprecated: Previously used to specify the source of the node’s configuration for the DynamicKubeletConfig feature. This feature is removed.

-
-externalID - - -string - - -(Optional) -

Deprecated. Not all kubelets will set this field. Remove field after 1.13. -see: https://issues.k8s.io/61966

-
@@ -2883,9 +2707,7 @@ UpdateConfiguration - -k8s.io/apimachinery/pkg/util/intstr.IntOrString - +invalid type @@ -2907,9 +2729,7 @@ least 70% of desired machines.

- -k8s.io/apimachinery/pkg/util/intstr.IntOrString - +invalid type diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index abb36d1c4..81ddc6da3 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -69,7 +69,7 @@ spec: properties: autoPreserveFailedMachineMax: description: |- - The maximum number of machines in the machine deployment that will be auto-preserved. + The maximum number of machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments format: int32 type: integer diff --git a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml index 46445131f..ba176b8df 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinesets.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinesets.yaml @@ -321,7 +321,7 @@ spec: properties: autoPreserveFailedMachineCount: description: AutoPreserveFailedMachineCount has a count of the number - of failed machines in the machineset that have been auto-preserved + of failed machines in the machineset that are currently auto-preserved format: int32 type: integer availableReplicas: diff --git a/pkg/apis/machine/types.go b/pkg/apis/machine/types.go index 093896770..6772c2100 100644 --- a/pkg/apis/machine/types.go +++ b/pkg/apis/machine/types.go @@ -357,7 +357,7 @@ type MachineSetSpec struct { MinReadySeconds int32 - AutoPreserveFailedMachineMax *int32 + AutoPreserveFailedMachineMax int32 } // MachineSetConditionType is the condition on machineset object @@ -418,7 +418,7 @@ type MachineSetStatus struct { FailedMachines *[]MachineSummary // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved - AutoPreserveFailedMachineCount *int32 + AutoPreserveFailedMachineCount int32 } // MachineSummary store the summary of machine. @@ -498,9 +498,9 @@ type MachineDeploymentSpec struct { // by default. ProgressDeadlineSeconds *int32 - // The maximum number of machines in the machine deployment that can be auto-preserved. + // The maximum number of failed machines in the machine deployment that can be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments - AutoPreserveFailedMachineMax *int32 + AutoPreserveFailedMachineMax int32 } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/machine/v1alpha1/machinedeployment_types.go b/pkg/apis/machine/v1alpha1/machinedeployment_types.go index e16914373..2e1e6da0e 100644 --- a/pkg/apis/machine/v1alpha1/machinedeployment_types.go +++ b/pkg/apis/machine/v1alpha1/machinedeployment_types.go @@ -92,10 +92,10 @@ type MachineDeploymentSpec struct { // +optional ProgressDeadlineSeconds *int32 `json:"progressDeadlineSeconds,omitempty"` - // The maximum number of machines in the machine deployment that can be auto-preserved. + // The maximum number of failed machines in the machine deployment that can be auto-preserved. // In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments // +optional - AutoPreserveFailedMachineMax *int32 `json:"autoPreserveFailedMachineMax,omitempty"` + AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` } const ( diff --git a/pkg/apis/machine/v1alpha1/machineset_types.go b/pkg/apis/machine/v1alpha1/machineset_types.go index 55675efdb..82fbb2dbf 100644 --- a/pkg/apis/machine/v1alpha1/machineset_types.go +++ b/pkg/apis/machine/v1alpha1/machineset_types.go @@ -70,7 +70,7 @@ type MachineSetSpec struct { MinReadySeconds int32 `json:"minReadySeconds,omitempty"` // +optional - AutoPreserveFailedMachineMax *int32 `json:"autoPreserveFailedMachineMax,omitempty"` + AutoPreserveFailedMachineMax int32 `json:"autoPreserveFailedMachineMax,omitempty"` } // MachineSetConditionType is the condition on machineset object @@ -141,5 +141,5 @@ type MachineSetStatus struct { // AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved // +optional - AutoPreserveFailedMachineCount *int32 `json:"autoPreserveFailedMachineCount,omitempty"` + AutoPreserveFailedMachineCount int32 `json:"autoPreserveFailedMachineCount,omitempty"` } diff --git a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go index d503e95ae..b636363da 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.conversion.go @@ -660,7 +660,7 @@ func autoConvert_v1alpha1_MachineDeploymentSpec_To_machine_MachineDeploymentSpec out.Paused = in.Paused out.RollbackTo = (*machine.RollbackConfig)(unsafe.Pointer(in.RollbackTo)) out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) - out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -683,7 +683,7 @@ func autoConvert_machine_MachineDeploymentSpec_To_v1alpha1_MachineDeploymentSpec out.Paused = in.Paused out.RollbackTo = (*RollbackConfig)(unsafe.Pointer(in.RollbackTo)) out.ProgressDeadlineSeconds = (*int32)(unsafe.Pointer(in.ProgressDeadlineSeconds)) - out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -866,7 +866,7 @@ func autoConvert_v1alpha1_MachineSetSpec_To_machine_MachineSetSpec(in *MachineSe return err } out.MinReadySeconds = in.MinReadySeconds - out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -885,7 +885,7 @@ func autoConvert_machine_MachineSetSpec_To_v1alpha1_MachineSetSpec(in *machine.M return err } out.MinReadySeconds = in.MinReadySeconds - out.AutoPreserveFailedMachineMax = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineMax)) + out.AutoPreserveFailedMachineMax = in.AutoPreserveFailedMachineMax return nil } @@ -905,7 +905,7 @@ func autoConvert_v1alpha1_MachineSetStatus_To_machine_MachineSetStatus(in *Machi return err } out.FailedMachines = (*[]machine.MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreserveFailedMachineCount = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineCount)) + out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount return nil } @@ -925,7 +925,7 @@ func autoConvert_machine_MachineSetStatus_To_v1alpha1_MachineSetStatus(in *machi return err } out.FailedMachines = (*[]MachineSummary)(unsafe.Pointer(in.FailedMachines)) - out.AutoPreserveFailedMachineCount = (*int32)(unsafe.Pointer(in.AutoPreserveFailedMachineCount)) + out.AutoPreserveFailedMachineCount = in.AutoPreserveFailedMachineCount return nil } diff --git a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go index f169b3811..788f6cbee 100644 --- a/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go @@ -350,11 +350,6 @@ func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { *out = new(int32) **out = **in } - if in.AutoPreserveFailedMachineMax != nil { - in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax - *out = new(int32) - **out = **in - } return } @@ -554,11 +549,6 @@ func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { } out.MachineClass = in.MachineClass in.Template.DeepCopyInto(&out.Template) - if in.AutoPreserveFailedMachineMax != nil { - in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax - *out = new(int32) - **out = **in - } return } @@ -594,11 +584,6 @@ func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { } } } - if in.AutoPreserveFailedMachineCount != nil { - in, out := &in.AutoPreserveFailedMachineCount, &out.AutoPreserveFailedMachineCount - *out = new(int32) - **out = **in - } return } diff --git a/pkg/apis/machine/zz_generated.deepcopy.go b/pkg/apis/machine/zz_generated.deepcopy.go index 2f1d11e2f..d22685750 100644 --- a/pkg/apis/machine/zz_generated.deepcopy.go +++ b/pkg/apis/machine/zz_generated.deepcopy.go @@ -383,11 +383,6 @@ func (in *MachineDeploymentSpec) DeepCopyInto(out *MachineDeploymentSpec) { *out = new(int32) **out = **in } - if in.AutoPreserveFailedMachineMax != nil { - in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax - *out = new(int32) - **out = **in - } return } @@ -587,11 +582,6 @@ func (in *MachineSetSpec) DeepCopyInto(out *MachineSetSpec) { } out.MachineClass = in.MachineClass in.Template.DeepCopyInto(&out.Template) - if in.AutoPreserveFailedMachineMax != nil { - in, out := &in.AutoPreserveFailedMachineMax, &out.AutoPreserveFailedMachineMax - *out = new(int32) - **out = **in - } return } @@ -627,11 +617,6 @@ func (in *MachineSetStatus) DeepCopyInto(out *MachineSetStatus) { } } } - if in.AutoPreserveFailedMachineCount != nil { - in, out := &in.AutoPreserveFailedMachineCount, &out.AutoPreserveFailedMachineCount - *out = new(int32) - **out = **in - } return } diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index efabaed06..1bd2348ea 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -25,7 +25,6 @@ package controller import ( "context" "fmt" - "k8s.io/utils/ptr" "reflect" "k8s.io/klog/v2" @@ -50,7 +49,7 @@ func updateMachineSetStatus(ctx context.Context, machineClient machineapi.Machin is.Generation == is.Status.ObservedGeneration && reflect.DeepEqual(is.Status.Conditions, newStatus.Conditions) && reflect.DeepEqual(is.Status.FailedMachines, newStatus.FailedMachines) && - *is.Status.AutoPreserveFailedMachineCount == *newStatus.AutoPreserveFailedMachineCount { + is.Status.AutoPreserveFailedMachineCount == newStatus.AutoPreserveFailedMachineCount { return is, nil } @@ -161,7 +160,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al newStatus.ReadyReplicas = int32(readyReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.AvailableReplicas = int32(availableReplicasCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 newStatus.LastOperation.LastUpdateTime = metav1.Now() - newStatus.AutoPreserveFailedMachineCount = ptr.To(int32(autoPreserveFailedMachineCount)) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 + newStatus.AutoPreserveFailedMachineCount = int32(autoPreserveFailedMachineCount) // #nosec G115 (CWE-190) -- number of machines will not exceed MaxInt32 return newStatus } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index d0a97ac27..b20f93fc5 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -484,8 +484,8 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 // or if it is a candidate for auto-preservation // TODO@thiyyakat: find more suitable name for function func (c *controller) isFailedMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { - if machine.Status.CurrentStatus.PreserveExpiryTime != nil && !machineutils.HasPreservationTimedOut(machine) { - klog.V(3).Infof("Machine %q is preserved until %v, not adding to stale machines", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + if machine.Status.CurrentStatus.PreserveExpiryTime != nil && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true, nil } val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] @@ -508,7 +508,7 @@ func (c *controller) isFailedMachineCandidateForPreservation(ctx context.Context return false, nil } } - if machineSet.Status.AutoPreserveFailedMachineCount != nil && machineSet.Spec.AutoPreserveFailedMachineMax != nil && *machineSet.Status.AutoPreserveFailedMachineCount < *machineSet.Spec.AutoPreserveFailedMachineMax { + if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { err := c.annotateMachineForAutoPreservation(ctx, machine) if err != nil { return true, err @@ -970,7 +970,7 @@ func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m if err != nil { return err } - klog.V(2).Infof("Updated machine %q with auto-preserved annotation.", m.Name) + klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) return nil } diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 689426124..2890c08b5 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1100,10 +1100,10 @@ var _ = Describe("machineset", func() { "test-label": "test-label", }, }, - AutoPreserveFailedMachineMax: ptr.To(tc.setup.autoPreserveFailedMachineMax), + AutoPreserveFailedMachineMax: tc.setup.autoPreserveFailedMachineMax, }, Status: machinev1.MachineSetStatus{ - AutoPreserveFailedMachineCount: ptr.To(int32(0)), + AutoPreserveFailedMachineCount: int32(0), }, } diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index 0db94f5b6..f744632e7 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -957,7 +957,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineDeploymentSpec(ref common.Reference }, "autoPreserveFailedMachineMax": { SchemaProps: spec.SchemaProps{ - Description: "The maximum number of machines in the machine deployment that will be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments", + Description: "The maximum number of machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments", Type: []string{"integer"}, Format: "int32", }, @@ -1429,7 +1429,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineSetStatus(ref common.ReferenceCallb }, "autoPreserveFailedMachineCount": { SchemaProps: spec.SchemaProps{ - Description: "AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that have been auto-preserved", + Description: "AutoPreserveFailedMachineCount has a count of the number of failed machines in the machineset that are currently auto-preserved", Type: []string{"integer"}, Format: "int32", }, diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 6d0b65e17..202d3ab67 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -752,11 +752,17 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // handlePreserveAnnotationsChange returns true if there is a change in preserve annotations // it also handles the special case where the annotation is changed from 'now' to 'when-failed' // in which case it stops the preservation if expiry time is already set +// when a machine is annotated with "now", the machine is preserved even when Running +// if the annotation has been changed to 'when-failed', we need to stop preservation if the machine is not in Failed Phase func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotations map[string]string, machine *v1alpha1.Machine) bool { valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] + if existsInNew != existsInOld { + return true + } if valueNew != machineutils.PreserveMachineAnnotationValueWhenFailed || valueOld != machineutils.PreserveMachineAnnotationValueNow { - return existsInOld != existsInNew || valueOld != valueNew + changed := valueOld != valueNew + return changed } // Special case: annotation changed from 'now' to 'when-failed' if machine.Status.CurrentStatus.PreserveExpiryTime == nil { @@ -809,7 +815,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if !isPreserveAnnotationValueValid(preserveValue) { klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && machineutils.HasPreservationTimedOut(clone)) { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { err = c.stopMachinePreservation(ctx, clone) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { From 3242d4a4e2234253481f3dce10ccd0a2be7cccbb Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 14 Jan 2026 09:31:10 +0530 Subject: [PATCH 46/79] Fix apis.md --- docs/documents/apis.md | 246 +++++++++++++++++++++++++++++++++++------ 1 file changed, 213 insertions(+), 33 deletions(-) diff --git a/docs/documents/apis.md b/docs/documents/apis.md index 7e9c072b2..cb5c4ea05 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -46,11 +46,15 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta +

ObjectMeta for machine object

+Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -201,11 +205,15 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta + (Optional) +Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -230,7 +238,9 @@ NodeTemplate -invalid type + +Kubernetes core/v1.SecretReference + @@ -244,7 +254,9 @@ This might be useful if multiple machine classes with the same credentials but d -invalid type + +k8s.io/apimachinery/pkg/runtime.RawExtension + @@ -270,7 +282,9 @@ string -invalid type + +Kubernetes core/v1.SecretReference + @@ -325,12 +339,16 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta + (Optional)

Standard object metadata.

+Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -371,7 +389,9 @@ zero and not specified. Defaults to 0.

-invalid type + +Kubernetes meta/v1.LabelSelector + @@ -504,7 +524,7 @@ int32 (Optional) -

The maximum number of machines in the machine deployment that can be auto-preserved. +

The maximum number of failed machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

@@ -575,11 +595,15 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta + (Optional) +Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -617,7 +641,9 @@ int32 -invalid type + +Kubernetes meta/v1.LabelSelector + @@ -826,7 +852,9 @@ bool -invalid type + +Kubernetes meta/v1.Time + @@ -839,7 +867,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Time + @@ -957,7 +987,9 @@ string -invalid type + +Kubernetes meta/v1.Time + @@ -1022,7 +1054,9 @@ MachineOperationType -invalid type + +Kubernetes meta/v1.Duration + @@ -1036,7 +1070,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Duration + @@ -1050,7 +1086,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Duration + @@ -1064,7 +1102,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Duration + @@ -1078,7 +1118,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Duration + @@ -1187,7 +1229,9 @@ ConditionStatus -invalid type + +Kubernetes meta/v1.Time + @@ -1200,7 +1244,9 @@ invalid type -invalid type + +Kubernetes meta/v1.Time + @@ -1287,7 +1333,9 @@ zero and not specified. Defaults to 0.

-invalid type + +Kubernetes meta/v1.LabelSelector + @@ -1420,7 +1468,7 @@ int32 (Optional) -

The maximum number of machines in the machine deployment that can be auto-preserved. +

The maximum number of failed machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker’s machine deployments

@@ -1744,7 +1792,9 @@ ConditionStatus -invalid type + +Kubernetes meta/v1.Time + @@ -1832,7 +1882,9 @@ int32 -invalid type + +Kubernetes meta/v1.LabelSelector + @@ -2174,7 +2226,9 @@ MachineConfiguration -[]invalid type + +[]Kubernetes core/v1.NodeAddress + @@ -2189,7 +2243,9 @@ be used by clients to determine how to connect to the machine, instead of the -[]invalid type + +[]Kubernetes core/v1.NodeCondition + @@ -2342,13 +2398,17 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta + (Optional)

Standard object’s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata

+Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -2465,7 +2525,9 @@ MachineConfiguration -invalid type + +Kubernetes core/v1.ResourceList + @@ -2478,7 +2540,9 @@ invalid type -invalid type + +Kubernetes core/v1.ResourceList + @@ -2567,11 +2631,15 @@ string -invalid type + +Kubernetes meta/v1.ObjectMeta + (Optional) +Refer to the Kubernetes API documentation for the fields of the +metadata field. @@ -2580,7 +2648,9 @@ invalid type -invalid type + +Kubernetes core/v1.NodeSpec + @@ -2589,6 +2659,112 @@ invalid type

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+podCIDR + + +string + + +(Optional) +

PodCIDR represents the pod IP range assigned to the node.

+
+podCIDRs + + +[]string + + +(Optional) +

podCIDRs represents the IP ranges assigned to the node for usage by Pods on that node. If this +field is specified, the 0th entry must match the podCIDR field. It may contain at most 1 value for +each of IPv4 and IPv6.

+
+providerID + + +string + + +(Optional) +

ID of the node assigned by the cloud provider in the format: ://

+
+unschedulable + + +bool + + +(Optional) +

Unschedulable controls node schedulability of new pods. By default, node is schedulable. +More info: https://kubernetes.io/docs/concepts/nodes/node/#manual-node-administration

+
+taints + + + +[]Kubernetes core/v1.Taint + + + +(Optional) +

If specified, the node’s taints.

+
+configSource + + + +Kubernetes core/v1.NodeConfigSource + + + +(Optional) +

Deprecated: Previously used to specify the source of the node’s configuration for the DynamicKubeletConfig feature. This feature is removed.

+
+externalID + + +string + + +(Optional) +

Deprecated. Not all kubelets will set this field. Remove field after 1.13. +see: https://issues.k8s.io/61966

+
@@ -2707,7 +2883,9 @@ UpdateConfiguration -invalid type + +k8s.io/apimachinery/pkg/util/intstr.IntOrString + @@ -2729,7 +2907,9 @@ least 70% of desired machines.

-invalid type + +k8s.io/apimachinery/pkg/util/intstr.IntOrString + From 9cab0bc7b25866682c9e99ae1444fef3311f6815 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 14 Jan 2026 09:32:50 +0530 Subject: [PATCH 47/79] Fix apis.md and address review comments --- docs/documents/_index.md | 5 ----- docs/documents/apis.md | 8 ++++---- .../crds/machine.sapcloud.io_machinedeployments.yaml | 2 +- pkg/openapi/openapi_generated.go | 2 +- pkg/util/provider/machinecontroller/machine.go | 1 - 5 files changed, 6 insertions(+), 12 deletions(-) delete mode 100644 docs/documents/_index.md diff --git a/docs/documents/_index.md b/docs/documents/_index.md deleted file mode 100644 index bdfec1968..000000000 --- a/docs/documents/_index.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: Documents -weight: 2 -persona: Developers ---- \ No newline at end of file diff --git a/docs/documents/apis.md b/docs/documents/apis.md index cb5c4ea05..8aa70fc4b 100644 --- a/docs/documents/apis.md +++ b/docs/documents/apis.md @@ -1617,8 +1617,8 @@ newest MachineSet.

- -[]*github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary + +[]*../../pkg/apis/machine/v1alpha1.MachineSummary @@ -2075,8 +2075,8 @@ LastOperation - -[]github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1.MachineSummary + +[]../../pkg/apis/machine/v1alpha1.MachineSummary diff --git a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml index 81ddc6da3..f55235fc0 100644 --- a/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml +++ b/kubernetes/crds/machine.sapcloud.io_machinedeployments.yaml @@ -69,7 +69,7 @@ spec: properties: autoPreserveFailedMachineMax: description: |- - The maximum number of machines in the machine deployment that can be auto-preserved. + The maximum number of failed machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments format: int32 type: integer diff --git a/pkg/openapi/openapi_generated.go b/pkg/openapi/openapi_generated.go index f744632e7..862a74f98 100644 --- a/pkg/openapi/openapi_generated.go +++ b/pkg/openapi/openapi_generated.go @@ -957,7 +957,7 @@ func schema_pkg_apis_machine_v1alpha1_MachineDeploymentSpec(ref common.Reference }, "autoPreserveFailedMachineMax": { SchemaProps: spec.SchemaProps{ - Description: "The maximum number of machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments", + Description: "The maximum number of failed machines in the machine deployment that can be auto-preserved. In the gardener context, this number is derived from the AutoPreserveFailedMachineMax set at the worker level, distributed amongst the worker's machine deployments", Type: []string{"integer"}, Format: "int32", }, diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 202d3ab67..53efea6c9 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -460,7 +460,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque uninitializedMachine = true klog.Infof("VM instance associated with machine %s was created but not initialized.", machine.Name) //clean me up. I'm dirty. - //TODO@thiyyakat add a pointer to a boolean variable indicating whether initialization has happened successfully. nodeName = getMachineStatusResponse.NodeName providerID = getMachineStatusResponse.ProviderID From e3b5dd6afe28eea399acf44978e85fc6590358f7 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 16 Jan 2026 09:55:25 +0530 Subject: [PATCH 48/79] Modify nodeops.AddOrUpdateConditionsOnNode() to return updated node --- docs/documents/_index.md | 5 +++++ pkg/util/nodeops/conditions.go | 17 ++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 docs/documents/_index.md diff --git a/docs/documents/_index.md b/docs/documents/_index.md new file mode 100644 index 000000000..bdfec1968 --- /dev/null +++ b/docs/documents/_index.md @@ -0,0 +1,5 @@ +--- +title: Documents +weight: 2 +persona: Developers +--- \ No newline at end of file diff --git a/pkg/util/nodeops/conditions.go b/pkg/util/nodeops/conditions.go index 81b4b38c3..f3bafad77 100644 --- a/pkg/util/nodeops/conditions.go +++ b/pkg/util/nodeops/conditions.go @@ -66,9 +66,10 @@ func GetNodeCondition(ctx context.Context, c clientset.Interface, nodeName strin } // AddOrUpdateConditionsOnNode adds a condition to the node's status -func AddOrUpdateConditionsOnNode(ctx context.Context, c clientset.Interface, nodeName string, condition v1.NodeCondition) error { +func AddOrUpdateConditionsOnNode(ctx context.Context, c clientset.Interface, nodeName string, condition v1.NodeCondition) (*v1.Node, error) { firstTry := true - return clientretry.RetryOnConflict(Backoff, func() error { + var updatedNode *v1.Node + err := clientretry.RetryOnConflict(Backoff, func() error { var err error var oldNode *v1.Node // First we try getting node from the API server cache, as it's cheaper. If it fails @@ -87,18 +88,20 @@ func AddOrUpdateConditionsOnNode(ctx context.Context, c clientset.Interface, nod var newNode *v1.Node oldNodeCopy := oldNode newNode = AddOrUpdateCondition(oldNodeCopy, condition) - return UpdateNodeConditions(ctx, c, nodeName, oldNode, newNode) + updatedNode, err = UpdateNodeConditions(ctx, c, nodeName, oldNode, newNode) + return err }) + return updatedNode, err } // UpdateNodeConditions is for updating the node conditions from oldNode to the newNode // using the node's UpdateStatus() method -func UpdateNodeConditions(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { +func UpdateNodeConditions(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) (*v1.Node, error) { newNodeClone := oldNode.DeepCopy() newNodeClone.Status.Conditions = newNode.Status.Conditions - _, err := c.CoreV1().Nodes().UpdateStatus(ctx, newNodeClone, metav1.UpdateOptions{}) + updatedNode, err := c.CoreV1().Nodes().UpdateStatus(ctx, newNodeClone, metav1.UpdateOptions{}) if err != nil { - return fmt.Errorf("failed to create/update conditions on node %q: %v", nodeName, err) + return nil, fmt.Errorf("failed to create/update conditions on node %q: %v", nodeName, err) } - return nil + return updatedNode, nil } From 2ff8ec9b72031da5cca59dee81a6feffd6b18f14 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 16 Jan 2026 09:56:59 +0530 Subject: [PATCH 49/79] Address review comments - part 8: * remove auto preservation logic from manageReplicas() * rename constants * simplify preserved Running machine switch from preserve=now to preserve=when-failed * update tests --- pkg/apis/machine/v1alpha1/machine_types.go | 12 +- pkg/controller/deployment_machineset_util.go | 2 +- pkg/controller/machineset.go | 76 ++---- pkg/controller/machineset_test.go | 216 ------------------ pkg/controller/machineset_util.go | 45 ++++ .../provider/machinecontroller/machine.go | 71 +++--- .../machinecontroller/machine_test.go | 130 ----------- .../machinecontroller/machine_util.go | 150 +++++++----- .../machinecontroller/machine_util_test.go | 47 ++-- pkg/util/provider/machinecontroller/node.go | 5 +- pkg/util/provider/machineutils/utils.go | 39 +++- 11 files changed, 254 insertions(+), 539 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 3744534f3..25577ccef 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -248,14 +248,14 @@ const ( // NodePreserved is a node condition type for preservation of machines to allow end-user to know that a node is preserved NodePreserved corev1.NodeConditionType = "NodePreserved" - // NodePreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM - NodePreservedByMCM string = "PreservedByMCM" + // PreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM + PreservedByMCM string = "PreservedByMCM" - // NodePreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user - NodePreservedByUser string = "PreservedByUser" + // PreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user + PreservedByUser string = "PreservedByUser" - // NodePreservationStopped is a node condition reason to indicate that a machine/node preservation has been stopped due to annotation update or timeout - NodePreservationStopped string = "PreservationStopped" + // PreservationStopped is a node condition reason to indicate that a machine/node preservation has been stopped due to annotation update or timeout + PreservationStopped string = "PreservationStopped" // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful PreservedNodeDrainSuccessful string = "Preserved Node drained successfully" diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index 1bd2348ea..d7628b734 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -128,7 +128,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al failedMachines = append(failedMachines, machineSummary) } cond := getMachineCondition(machine, v1alpha1.NodePreserved) - if cond != nil && cond.Reason == v1alpha1.NodePreservedByMCM { + if cond != nil && cond.Reason == v1alpha1.PreservedByMCM { autoPreserveFailedMachineCount++ } } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index b20f93fc5..6ff1b8f9b 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -341,6 +341,19 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 if m.Labels[v1alpha1.LabelKeyNodeUpdateResult] != v1alpha1.LabelValueNodeUpdateSuccessful { machinesWithoutUpdateSuccessfulLabel = append(machinesWithoutUpdateSuccessfulLabel, m) } + if machineutils.IsMachineTriggeredForDeletion(m) { + staleMachines = append(staleMachines, m) + } else if machineutils.IsMachineFailed(m) { + // if machine is preserved or in the process of being preserved, the machine should be considered an active machine and not be added to stale machines + preserve := machineutils.IsFailedMachineCandidateForPreservation(m) + if preserve { + activeMachines = append(activeMachines, m) + } else { + staleMachines = append(staleMachines, m) + } + } else if machineutils.IsMachineActive(m) { + activeMachines = append(activeMachines, m) + } } allMachinesDiff := len(allMachines) - int(machineSet.Spec.Replicas) machinesWithoutUpdateSuccessfulLabelDiff := len(machinesWithoutUpdateSuccessfulLabel) - int(machineSet.Spec.Replicas) @@ -480,44 +493,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 return nil } -// isFailedMachineCandidateForPreservation checks if the machine is already preserved, in the process of being preserved -// or if it is a candidate for auto-preservation -// TODO@thiyyakat: find more suitable name for function -func (c *controller) isFailedMachineCandidateForPreservation(ctx context.Context, machineSet *v1alpha1.MachineSet, machine *v1alpha1.Machine) (bool, error) { - if machine.Status.CurrentStatus.PreserveExpiryTime != nil && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { - klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return true, nil - } - val, exists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - if exists { - switch val { - case machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValueNow: // this is in case preservation process is not complete yet - return true, nil - case machineutils.PreserveMachineAnnotationValueFalse: - return false, nil - } - } - // check if backing node is annotated with preserve=false, if yes, do not consider for preservation - if machine.Labels[v1alpha1.NodeLabelKey] != "" { - // check if backing node has preserve=false annotation, if yes, do not auto-preserve - node, err := c.nodeLister.Get(machine.Labels[v1alpha1.NodeLabelKey]) - if err != nil { - return false, err // we return true here to avoid losing the machine in case of any error fetching the node - } - if val, exists = node.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && val == machineutils.PreserveMachineAnnotationValueFalse { - return false, nil - } - } - if machineSet.Status.AutoPreserveFailedMachineCount < machineSet.Spec.AutoPreserveFailedMachineMax { - err := c.annotateMachineForAutoPreservation(ctx, machine) - if err != nil { - return true, err - } - return true, nil - } - return false, nil -} - // syncMachineSet will sync the MachineSet with the given key if it has had its expectations fulfilled, // meaning it did not expect to see any more of its machines created or deleted. This function is not meant to be // invoked concurrently with the same key. @@ -605,6 +580,12 @@ func (c *controller) reconcileClusterMachineSet(key string) error { return err } + // triggerAutoPreservation adds the PreserveMachineAnnotationValuePreservedByMCM annotation + // to Failed machines to trigger auto-preservation, if applicable. + // We do not update machineSet.Status.AutoPreserveFailedMachineCount in the function, as it will be calculated + // and updated in the succeeding calls to calculateMachineSetStatus() and updateMachineSetStatus() + c.triggerAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) + // TODO: Fix working of expectations to reflect correct behaviour // machineSetNeedsSync := c.expectations.SatisfiedExpectations(key) var manageReplicasErr error @@ -719,8 +700,7 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al // in the earlier stages whenever possible. sort.Sort(ActiveMachines(filteredMachines)) // machines in Preserved stage will be the last ones to be deleted - // at all times, replica count will be upheld, even if it means deletion of a pending machine - // TODO@thiyyakat: write unit test for this scenario + // At all times, replica count will be upheld, even if it requires the deletion of a preserved machine filteredMachines = prioritisePreservedMachines(filteredMachines) } return filteredMachines[:diff] @@ -958,19 +938,3 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } - -func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { - _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } - clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM - return nil - }) - if err != nil { - return err - } - klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) - return nil - -} diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 2890c08b5..88d847017 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - corev1 "k8s.io/api/core/v1" "sync" "time" @@ -1050,221 +1049,6 @@ var _ = Describe("machineset", func() { }) }) - Describe("#isFailedMachineCandidateForPreservation", func() { - - type setup struct { - autoPreserveFailedMachineMax int32 - machineIsPreserved bool - machinePreserveAnnotationValue string - backingNode *corev1.Node - } - type expect struct { - result bool - err error - } - type testCase struct { - setup setup - expect expect - } - - DescribeTable("isFailedMachineCandidateForPreservation test cases", func(tc testCase) { - stop := make(chan struct{}) - defer close(stop) - - objects := []runtime.Object{} - - testMachineSet := &machinev1.MachineSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "MachineSet-test", - Namespace: testNamespace, - Labels: map[string]string{ - "test-label": "test-label", - }, - UID: "1234567", - }, - TypeMeta: metav1.TypeMeta{ - Kind: "MachineSet", - APIVersion: "machine.sapcloud.io/v1alpha1", - }, - Spec: machinev1.MachineSetSpec{ - Replicas: 2, - Template: machinev1.MachineTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "test-label": "test-label", - }, - }, - }, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "test-label": "test-label", - }, - }, - AutoPreserveFailedMachineMax: tc.setup.autoPreserveFailedMachineMax, - }, - Status: machinev1.MachineSetStatus{ - AutoPreserveFailedMachineCount: int32(0), - }, - } - - testMachine := &machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - UID: "1234568", - Labels: map[string]string{ - "test-label": "test-label", - }, - Annotations: map[string]string{ - machineutils.PreserveMachineAnnotationKey: tc.setup.machinePreserveAnnotationValue, - }, - }, - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - APIVersion: "machine.sapcloud.io/v1alpha1", - }, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: MachineFailed, - }, - }, - } - if tc.setup.backingNode != nil { - testMachine.Labels[machinev1.NodeLabelKey] = "node-1" - } - if tc.setup.machineIsPreserved { - testMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{ - Time: time.Now().Add(1 * time.Hour), - } - } - - objects = append(objects, testMachineSet, testMachine) - var targetCoreObjects []runtime.Object - if tc.setup.backingNode != nil { - targetCoreObjects = append(targetCoreObjects, tc.setup.backingNode) - } - c, trackers := createController(stop, testNamespace, objects, nil, targetCoreObjects) - defer trackers.Stop() - waitForCacheSync(stop, c) - result, err := c.isFailedMachineCandidateForPreservation(context.TODO(), testMachineSet, testMachine) - Expect(result).To(Equal(tc.expect.result)) - if tc.expect.err != nil { - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal(tc.expect.err.Error())) - return - } - Expect(err).ToNot(HaveOccurred()) - }, - Entry("should return false for un-preserved machine, when autoPreserveFailedMachineMax is 0", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 0, - machineIsPreserved: false, - backingNode: nil, - }, - expect: expect{ - result: false, - err: nil, - }, - }), - Entry("should return true for un-preserved machine, when autoPreserveFailedMachineMax is 1", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - backingNode: nil, - }, - expect: expect{ - result: true, - err: nil, - }, - }), - Entry("should return true for machine annotated with preserve=now", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: true, - machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, - backingNode: nil, - }, - expect: expect{ - result: true, - err: nil, - }, - }), - Entry("should return true for machine annotated with preserve=when-failed, but not yet preserved", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - backingNode: nil, - }, - expect: expect{ - result: true, - err: nil, - }, - }), - Entry("should return false for machine annotated with preserve=false", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - machinePreserveAnnotationValue: machineutils.PreserveMachineAnnotationValueFalse, - backingNode: nil, - }, - expect: expect{ - result: false, - err: nil, - }, - }), - Entry("should return false when backing node is annotated with preserve=false", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - backingNode: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{ - machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueFalse, - }, - }, - }, - }, - expect: expect{ - result: false, - err: nil, - }, - }), - Entry("should return true when backing node has no preserve-annotation, and autoPreserveFailedMachineMax is 1", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - backingNode: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - Annotations: map[string]string{}, - }, - }, - }, - expect: expect{ - result: true, - err: nil, - }, - }), - Entry("should return error when backing node is not found", testCase{ - setup: setup{ - autoPreserveFailedMachineMax: 1, - machineIsPreserved: false, - backingNode: &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "invalid", - Annotations: map[string]string{}, - }, - }, - }, - expect: expect{ - result: false, - err: errors.New("node \"node-1\" not found"), - }, - }), - ) - }) // TODO: This method has dependency on generic-machineclass. Implement later. Describe("#reconcileClusterMachineSet", func() { var ( diff --git a/pkg/controller/machineset_util.go b/pkg/controller/machineset_util.go index 78ce9d803..cf804fef7 100644 --- a/pkg/controller/machineset_util.go +++ b/pkg/controller/machineset_util.go @@ -216,3 +216,48 @@ func logMachinesToDelete(machines []*v1alpha1.Machine) { klog.V(3).Infof("Machine %q needs to be deleted", m.Name) } } + +// triggerAutoPreservationOfFailedMachines annotates failed machines with the auto-preservation annotation +// to trigger preservation of the machines by the machine controller, up to the limit defined in the +// MachineSet's AutoPreserveFailedMachineMax field. +func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) { + autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount + if autoPreservationCapacityRemaining <= 0 { + // no capacity remaining, nothing to do + return + } + for _, m := range machines { + if machineutils.IsMachineFailed(m) { + // check if machine is annotated with preserve=false, if yes, do not consider for preservation + if m.Annotations != nil && m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { + continue + } + if autoPreservationCapacityRemaining > 0 { + err := c.annotateMachineForAutoPreservation(ctx, m) + if err != nil { + klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) + // since annotateMachineForAutoPreservation uses retries internally, we can continue with other machines + continue + } + autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 + } + } + } +} + +// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger +// preservation of the machine by the machine controller. +func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { + _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM + return nil + }) + if err != nil { + return err + } + klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + return nil +} diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 53efea6c9..58665f28b 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -8,8 +8,6 @@ package controller import ( "context" "fmt" - "github.com/gardener/machine-controller-manager/pkg/util/nodeops" - clientretry "k8s.io/client-go/util/retry" "maps" "slices" "strings" @@ -60,11 +58,13 @@ func (c *controller) updateMachine(oldObj, newObj any) { klog.Errorf("couldn't convert to machine resource from object") return } - if c.handlePreserveAnnotationsChange(oldMachine.Annotations, newMachine.Annotations, newMachine) { + // to reconcile on change in annotations related to preservation + if machineutils.PreserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } + // this check is required to enqueue a previously failed preserved machine, when the phase changes to Running if _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && newMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { c.enqueueMachine(newObj, "handling preserved machine phase update") } @@ -748,40 +748,6 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { Machine Preservation operations */ -// handlePreserveAnnotationsChange returns true if there is a change in preserve annotations -// it also handles the special case where the annotation is changed from 'now' to 'when-failed' -// in which case it stops the preservation if expiry time is already set -// when a machine is annotated with "now", the machine is preserved even when Running -// if the annotation has been changed to 'when-failed', we need to stop preservation if the machine is not in Failed Phase -func (c *controller) handlePreserveAnnotationsChange(oldAnnotations, newAnnotations map[string]string, machine *v1alpha1.Machine) bool { - valueNew, existsInNew := newAnnotations[machineutils.PreserveMachineAnnotationKey] - valueOld, existsInOld := oldAnnotations[machineutils.PreserveMachineAnnotationKey] - if existsInNew != existsInOld { - return true - } - if valueNew != machineutils.PreserveMachineAnnotationValueWhenFailed || valueOld != machineutils.PreserveMachineAnnotationValueNow { - changed := valueOld != valueNew - return changed - } - // Special case: annotation changed from 'now' to 'when-failed' - if machine.Status.CurrentStatus.PreserveExpiryTime == nil { - return true - } - if machineutils.IsMachineFailed(machine) { - // If machine is already in failed state, no need to stop preservation - return true - } - ctx := context.Background() - err := clientretry.RetryOnConflict(nodeops.Backoff, func() error { - klog.V(3).Infof("Stopping preservation for machine %q as preserve annotation changed from 'now' to 'when-failed'.", machine.Name) - return c.stopMachinePreservation(ctx, machine) - }) - if err != nil { - klog.Errorf("error while stopping preservation for machine %q: %v. Use preserve=false to stop preservation.", machine.Name, err) - } - return true -} - // manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { defer func() { @@ -815,21 +781,36 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { - err = c.stopMachinePreservation(ctx, clone) + err = c.stopMachinePreservation(ctx, clone, true) return } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) + } else { + // Here, if the preserve value is when-failed, the preserveExpiry is set, but the machine is not Failed, there are 2 scenarios that need to be handled: + // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. + // 2. The machine was initially annotated with preserve=when-failed, was preserved on failure and has recovered from Failed to Running. + // In both cases, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. + err = c.stopMachinePreservation(ctx, clone, false) + if err != nil { + return + } + // If the machine is running and has a backing node, uncordon the node if cordoned + // this is to handle the scenario where a preserved machine recovers from Failed to Running + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { + err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) + } } - // Here, if the preserve value is when-failed, but the machine is in running, there could be 2 possibilities: - // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. In this case, - // we want to stop preservation. This case is already being handled in updateMachine and updateNodeToMachine functions. - // 2. The machine was initially annotated with preserve=when-failed and has recovered from Failed to Running. In this case, - // we want to continue preservation until the annotation is changed to false or the preservation times out, so that CA does not - // scale down the node before pods are assigned to it. - return } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { err = c.preserveMachine(ctx, clone, preserveValue) + if err != nil { + return + } + // If the machine is running and has a backing node, uncordon the node if cordoned + // this is to handle the scenario where a preserved machine recovers from Failed to Running + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { + err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) + } return } return diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index e3b72fd2c..af1bf0781 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -3999,136 +3999,6 @@ var _ = Describe("machine", func() { ) }) - Describe("#handlePreserveAnnotationsChange", func() { - type setup struct { - oldPreserveValue string - newPreserveValue string - phase v1alpha1.MachinePhase - } - - type expect struct { - change bool - } - type testCase struct { - setup setup - expect expect - } - DescribeTable("##handlePreserveAnnotationsChange scenarios", func(tc testCase) { - stop := make(chan struct{}) - defer close(stop) - - // Build machine object - machine := &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: "node-1", - }, - }, Status: v1alpha1.MachineStatus{ - CurrentStatus: v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - LastUpdateTime: metav1.Now(), - }, - }, - } - if tc.setup.phase != "" { - machine.Status.CurrentStatus.Phase = tc.setup.phase - } - if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueNow || tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - machine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)} - } else if tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && machineutils.IsMachineFailed(machine) { - machine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)} - } - - controlMachineObjects := []runtime.Object{machine} - - // Build node object - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node-1", - }, - } - targetCoreObjects := []runtime.Object{node} - - c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) - defer trackers.Stop() - - waitForCacheSync(stop, c) - result := c.handlePreserveAnnotationsChange(map[string]string{machineutils.PreserveMachineAnnotationKey: tc.setup.oldPreserveValue}, map[string]string{machineutils.PreserveMachineAnnotationKey: tc.setup.newPreserveValue}, machine) - Expect(result).To(Equal(tc.expect.change)) - if tc.setup.newPreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed && tc.setup.oldPreserveValue == machineutils.PreserveMachineAnnotationValueNow { - updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - if tc.setup.phase == v1alpha1.MachineFailed { - Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeFalse()) // machine preservation should be active - } else { - Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) // machine preservation should have been stopped - } - } - }, - Entry("no change in preserve annotations", testCase{ - setup: setup{ - oldPreserveValue: "someValue", - newPreserveValue: "someValue", - }, - expect: expect{ - change: false, - }, - }), - Entry("preserve annotation newly added on machine", testCase{ - setup: setup{ - newPreserveValue: "newValue", - }, - expect: expect{ - change: true, - }, - }), - Entry("preserve annotation removed", testCase{ - setup: setup{ - oldPreserveValue: "someValue", - newPreserveValue: "", - }, - expect: expect{ - change: true, - }, - }), - Entry("preserve annotation value changed", testCase{ - setup: setup{ - oldPreserveValue: "oldValue", - newPreserveValue: "newValue"}, - expect: expect{ - change: true, - }, - }), - Entry("both annotations are nil", testCase{ - setup: setup{}, - expect: expect{ - change: false, - }, - }), - Entry("preserve annotation changed from now to when-failed on Running machine", testCase{ - setup: setup{ - oldPreserveValue: machineutils.PreserveMachineAnnotationValueNow, - newPreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - }, - expect: expect{ - change: true, - }, - }), - Entry("preserve annotation changed from now to when-failed on Failed machine", testCase{ - setup: setup{ - oldPreserveValue: machineutils.PreserveMachineAnnotationValueNow, - newPreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - phase: v1alpha1.MachineFailed, - }, - expect: expect{ - change: true, - }, - }), - ) - }) - Describe("#computeEffectivePreserveAnnotationValue", func() { type setup struct { machinePreserveAnnotation string diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index f52ad0147..09ed61cea 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -405,7 +405,7 @@ func (c *controller) inPlaceUpdate(ctx context.Context, machine *v1alpha1.Machin cond.Reason = v1alpha1.ReadyForUpdate cond.LastTransitionTime = metav1.Now() cond.Message = "Node is ready for in-place update" - if err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *cond); err != nil { + if _, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *cond); err != nil { return machineutils.ShortRetry, err } // give machine time for update to get applied @@ -480,7 +480,7 @@ func (c *controller) updateMachineStatusAndNodeCondition(ctx context.Context, ma cond.Reason = v1alpha1.DrainSuccessful cond.Message = "Node draining successful" - if err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *cond); err != nil { + if _, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, getNodeName(machine), *cond); err != nil { return machineutils.ShortRetry, err } @@ -982,15 +982,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph klog.Warning(err) } } else { - // if machine was preserved and in Failed phase, uncordon node so that pods can be scheduled on it again - if cond := nodeops.GetCondition(node, v1alpha1.NodePreserved); cond != nil && machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { - nodeCopy := node.DeepCopy() - nodeCopy.Spec.Unschedulable = false - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - return machineutils.ShortRetry, err - } - } // Machine rejoined the cluster after a health-check description = fmt.Sprintf("Machine %s successfully re-joined the cluster", clone.Name) lastOperationType = v1alpha1.MachineOperationHealthCheck @@ -2121,7 +2112,7 @@ func (c *controller) UpdateNodeTerminationCondition(ctx context.Context, machine setTerminationReasonByPhase(machine.Status.CurrentStatus.Phase, &terminationCondition) } - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, terminationCondition) + _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, terminationCondition) if apierrors.IsNotFound(err) { return nil } @@ -2369,12 +2360,13 @@ Utility Functions for Machine Preservation // preserveMachine contains logic to start the preservation of a machine and node. func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { + var err error nodeName := machine.Labels[v1alpha1.NodeLabelKey] updatedMachine := machine.DeepCopy() if machine.Status.CurrentStatus.PreserveExpiryTime == nil { klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) // Step 1: Add preserveExpiryTime to machine status - updatedMachine, err := c.setPreserveExpiryTimeOnMachine(ctx, updatedMachine) + updatedMachine, err = c.setPreserveExpiryTimeOnMachine(ctx, updatedMachine) if err != nil { return err } @@ -2398,20 +2390,19 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // Preservation incomplete - either the flow is just starting or in progress // Step 2: Add annotations to prevent scale down of node by CA - _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) + updatedNode, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { return err } - drainSuccessful := false - if c.shouldNodeBeDrained(updatedMachine, existingNodePreservedCondition) { + if c.shouldPreservedNodeBeDrained(updatedMachine, existingNodePreservedCondition) { // Step 3: If machine is in Failed Phase, drain the backing node err = c.drainPreservedNode(ctx, machine) if err != nil { newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { // Step 4a: Update NodePreserved Condition on Node, with drain unsuccessful status - _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) + _, _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) return err } return err @@ -2421,7 +2412,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // Step 4b: Update NodePreserved Condition on Node with drain successful status newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { - err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, nodeName, *newCond) + _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) if err != nil { klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", nodeName, machine.Name, err) return err @@ -2431,6 +2422,47 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return nil } +// stopMachinePreservation stops the preservation of the machine and node +func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine, removeCAScaleDownDisabledAnnotation bool) error { + // removal of preserveExpiryTime is the last step of stopping preservation + // if preserveExpiryTime is not set, preservation is already stopped + if machine.Status.CurrentStatus.PreserveExpiryTime == nil { + return nil + } + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName != "" { + // Machine has a backing node + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + return err + } + // prepare NodeCondition to set preservation as stopped + preservedConditionFalse := v1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: v1.ConditionFalse, + LastTransitionTime: metav1.Now(), + Reason: v1alpha1.PreservationStopped, + } + // Step 1: change node condition to reflect that preservation has stopped + updatedNode, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, node.Name, preservedConditionFalse) + if err != nil { + return err + } + // Step 2: remove CA scale down disabled annotation from node + // only remove if removeCAScaleDownDisabledAnnotation is not "when-failed" since in that case, + // scale down should remain disabled even after preservation is stopped + if removeCAScaleDownDisabledAnnotation { + err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) + if err != nil { + return err + } + } + } + // Step 3: update machine status to set preserve expiry time to nil + return c.clearMachinePreserveExpiryTime(ctx, machine) +} + // setPreserveExpiryTimeOnMachine sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { @@ -2485,7 +2517,38 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, return updatedNode, nil } -// getNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation +// removeCAScaleDownDisabledAnnotationOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node +func (c *controller) removeCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) error { + // Check if annotation already absent + if node.Annotations == nil || + node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == "" { + return nil + } + nodeCopy := node.DeepCopy() + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) + return err + } + return nil +} + +func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string) error { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + return err + } + if !node.Spec.Unschedulable { + return nil + } + clonedNode := node.DeepCopy() + clonedNode.Spec.Unschedulable = false + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, clonedNode, metav1.UpdateOptions{}) + return err +} + +// computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { var newNodePreservedCondition *v1.NodeCondition var needsUpdate bool @@ -2516,15 +2579,15 @@ func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.Mach needsUpdate = true } if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByMCM + newNodePreservedCondition.Reason = v1alpha1.PreservedByMCM } else { - newNodePreservedCondition.Reason = v1alpha1.NodePreservedByUser + newNodePreservedCondition.Reason = v1alpha1.PreservedByUser } return newNodePreservedCondition, needsUpdate } -// shouldNodeBeDrained returns true if the machine's backing node must be drained, else false -func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCondition *v1.NodeCondition) bool { +// shouldPreservedNodeBeDrained returns true if the machine's backing node must be drained, else false +func (c *controller) shouldPreservedNodeBeDrained(machine *v1alpha1.Machine, existingCondition *v1.NodeCondition) bool { if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { if existingCondition == nil { return true @@ -2534,48 +2597,11 @@ func (c *controller) shouldNodeBeDrained(machine *v1alpha1.Machine, existingCond return false } -// stopMachinePreservation stops the preservation of the machine and node -func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) error { - // removal of preserveExpiryTime is the last step of stopping preservation - // if preserveExpiryTime is not set, preservation is already stopped +// clearMachinePreserveExpiryTime clears the PreserveExpiryTime on the machine object's Status.CurrentStatus +func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine *v1alpha1.Machine) error { if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return nil } - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName != "" { - // Machine has a backing node - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return err - } - // prepare NodeCondition to set preservation as stopped - preservedConditionFalse := v1.NodeCondition{ - Type: v1alpha1.NodePreserved, - Status: v1.ConditionFalse, - LastTransitionTime: metav1.Now(), - Reason: v1alpha1.NodePreservationStopped, - } - // Step 1: change node condition to reflect that preservation has stopped - updatedNode := nodeops.AddOrUpdateCondition(node, preservedConditionFalse) - updatedNode, err = c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, updatedNode, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %s", nodeName, machine.Name, err) - return err - } - // Step 2: remove CA's scale-down disabled annotations to allow CA to scale down node if needed - // fetch latest node object since cache may be not be up-to-date with node updated earlier - if updatedNode.Annotations != nil && updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] != "" { - nodeCopy := updatedNode.DeepCopy() - delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("node UPDATE failed for node %q of machine %q. Retrying, error: %s", nodeName, machine.Name, err) - return err - } - } - } - // Step 3: update machine status to set preserve expiry time to nil clone := machine.DeepCopy() clone.Status.CurrentStatus.PreserveExpiryTime = nil clone.Status.CurrentStatus.LastUpdateTime = metav1.Now() diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 7e25d5ed2..6dfcd6e53 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4077,7 +4077,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, }, }, }), @@ -4094,7 +4094,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4112,7 +4112,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4131,7 +4131,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4145,7 +4145,7 @@ var _ = Describe("machine_util", func() { preservedNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainUnsuccessful, }, }, @@ -4156,7 +4156,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4174,7 +4174,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4192,7 +4192,7 @@ var _ = Describe("machine_util", func() { preserveNodeCondition: corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByMCM, + Reason: machinev1.PreservedByMCM, Message: machinev1.PreservedNodeDrainSuccessful, }, }, @@ -4214,7 +4214,8 @@ var _ = Describe("machine_util", func() { }) Describe("#stopMachinePreservation", func() { type setup struct { - nodeName string + nodeName string + removeCAAnnotation bool } type expect struct { @@ -4261,7 +4262,7 @@ var _ = Describe("machine_util", func() { { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, - Reason: machinev1.NodePreservedByUser, + Reason: machinev1.PreservedByUser, }, }, }, @@ -4277,7 +4278,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.stopMachinePreservation(context.TODO(), machine) + err := c.stopMachinePreservation(context.TODO(), machine, tc.setup.removeCAAnnotation) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) @@ -4293,15 +4294,21 @@ var _ = Describe("machine_util", func() { } updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) Expect(getErr).To(BeNil()) - Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("")) + if tc.setup.removeCAAnnotation { + Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("")) + } else { + Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("true")) + } + updatedNodeCondition := nodeops.GetCondition(updatedNode, machinev1.NodePreserved) Expect(updatedNodeCondition).ToNot(BeNil()) Expect(updatedNodeCondition.Status).To(Equal(corev1.ConditionFalse)) - Expect(updatedNodeCondition.Reason).To(Equal(machinev1.NodePreservationStopped)) + Expect(updatedNodeCondition.Reason).To(Equal(machinev1.PreservationStopped)) }, Entry("when stopping preservation on a preserved machine with backing node", &testCase{ setup: setup{ - nodeName: "node-1", + nodeName: "node-1", + removeCAAnnotation: true, }, expect: expect{ err: nil, @@ -4309,7 +4316,8 @@ var _ = Describe("machine_util", func() { }), Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ setup: setup{ - nodeName: "", + nodeName: "", + removeCAAnnotation: true, }, expect: expect{ err: nil, @@ -4323,6 +4331,15 @@ var _ = Describe("machine_util", func() { err: fmt.Errorf("node \"no-backing-node\" not found"), }, }), + Entry("when stopping preservation on a preserved machine, but retaining CA annotation", &testCase{ + setup: setup{ + nodeName: "node-1", + removeCAAnnotation: false, + }, + expect: expect{ + err: nil, + }, + }), ) }) }) diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index ae28505d7..7cdf53e73 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -100,14 +100,11 @@ func (c *controller) updateNode(oldObj, newObj any) { if nodeConditionsHaveChanged && !(isMachineCrashLooping || isMachineTerminating) { c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Conditions of node %q differ from machine status", node.Name)) } - // to reconcile on change in annotations related to preservation - if c.handlePreserveAnnotationsChange(oldNode.Annotations, node.Annotations, machine) { - klog.V(3).Infof("Node %q for machine %q is annotated for preservation with value %q.", node.Name, machine.Name, node.Annotations[machineutils.PreserveMachineAnnotationKey]) + if machineutils.PreserveAnnotationsChanged(oldNode.Annotations, node.Annotations) { c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Preserve annotations added or updated for node %q", getNodeName(machine))) return } - } func (c *controller) deleteNode(obj any) { diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index ea968fb76..89fef24a2 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -6,6 +6,7 @@ package machineutils import ( + "k8s.io/klog/v2" "time" v1 "k8s.io/api/core/v1" @@ -94,7 +95,8 @@ const ( PreserveMachineAnnotationValueWhenFailed = "when-failed" // PreserveMachineAnnotationValuePreservedByMCM is the annotation value used to explicitly request that - // a Machine be preserved if and when in it enters Failed phase + // a Machine be preserved if and when in it enters Failed phase. + // The AutoPreserveFailedMachineMax, set on the MCD, is enforced based on the number of machines annotated with this value. PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that @@ -152,7 +154,36 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } -// HasPreservationTimedOut checks if the Status.CurrentStatus.PreserveExpiryTime has not yet passed -func HasPreservationTimedOut(m *v1alpha1.Machine) bool { - return !m.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) +// PreserveAnnotationsChanged returns true if there is a change in preserve annotations +func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { + valueNew, existsInNew := newAnnotations[PreserveMachineAnnotationKey] + valueOld, existsInOld := oldAnnotations[PreserveMachineAnnotationKey] + if existsInNew != existsInOld { + return true + } + if valueNew != valueOld { + return true + } + return false +} + +// IsFailedMachineCandidateForPreservation checks if the failed machine is already preserved, in the process of being preserved +// or if it is a candidate for auto-preservation +func IsFailedMachineCandidateForPreservation(machine *v1alpha1.Machine) bool { + // if preserve expiry time is set and is in the future, machine is already preserved + if machine.Status.CurrentStatus.PreserveExpiryTime != nil && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return true + } + val, exists := machine.Annotations[PreserveMachineAnnotationKey] + // if the machine preservation is not complete yet even though the machine is annotated, consider it as a candidate for preservation + if exists { + switch val { + case PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet + return true + case PreserveMachineAnnotationValueFalse: + return false + } + } + return false } From e796a930746042776e46984ef7e1d9371ada4e9f Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 16 Jan 2026 11:02:51 +0530 Subject: [PATCH 50/79] Handle auto-preserved case similar to when-failed case --- pkg/util/provider/machinecontroller/machine.go | 9 +++++++-- pkg/util/provider/machinecontroller/machine_test.go | 2 +- pkg/util/provider/machinecontroller/machine_util.go | 4 ++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 58665f28b..314d6c842 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -783,7 +783,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { err = c.stopMachinePreservation(ctx, clone, true) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) } else { @@ -791,6 +791,10 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. // 2. The machine was initially annotated with preserve=when-failed, was preserved on failure and has recovered from Failed to Running. // In both cases, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. + + // If the preserve value is auto-preserved, and the machine is Running, it would mean the machine has recovered from Failure to Running. + // In this case, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. + // If the machine fails again, since preserve annotation is present, it will be preserved again. err = c.stopMachinePreservation(ctx, clone, false) if err != nil { return @@ -801,7 +805,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) } } - } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow { err = c.preserveMachine(ctx, clone, preserveValue) if err != nil { return @@ -811,6 +815,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) } + // since the preserve value is 'now', machine preservation need not be stopped. return } return diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index af1bf0781..3b8a87bb8 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4333,7 +4333,7 @@ var _ = Describe("machine", func() { machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, nodeAnnotationValue: "", nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, + machinePhase: v1alpha1.MachineFailed, }, expect: expect{ preserveExpiryTimeIsSet: true, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 09ed61cea..9ee12b11a 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2375,6 +2375,10 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil } + } else if nodeName == "" { + // Machine has no backing node, preservation is complete + klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + return nil } // Machine has a backing node node, err := c.nodeLister.Get(nodeName) From ff5b90a34855e20ae0fbdfe891e510da4995bc99 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 16 Jan 2026 15:42:10 +0530 Subject: [PATCH 51/79] Fix bugs, incorporate design change for when-failed, and add tests --- .../provider/machinecontroller/machine.go | 37 +++- .../machinecontroller/machine_test.go | 90 ++++++--- .../machinecontroller/machine_util.go | 25 +-- .../machinecontroller/machine_util_test.go | 171 ++++++++++++++++- pkg/util/provider/machineutils/utils.go | 24 ++- pkg/util/provider/machineutils/utils_test.go | 174 ++++++++++++++++++ 6 files changed, 457 insertions(+), 64 deletions(-) create mode 100644 pkg/util/provider/machineutils/utils_test.go diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 314d6c842..9f5198c96 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -777,30 +777,51 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } } - if !isPreserveAnnotationValueValid(preserveValue) { - klog.Warningf("Preserve annotation value %q on machine %s is invalid", preserveValue, machine.Name) + if !machineutils.AllowedPreserveAnnotationValues.Has(preserveValue) { + klog.Warningf("Preserve annotation value %q on machine %q is invalid", preserveValue, machine.Name) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { - err = c.stopMachinePreservation(ctx, clone, true) + } + // if preserve=false or if preservation has expired, stop preservation + if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { + err = c.stopMachinePreservationIfPreserved(ctx, clone, true) return - } else if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + } + if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) } else { - // Here, if the preserve value is when-failed, the preserveExpiry is set, but the machine is not Failed, there are 2 scenarios that need to be handled: + // Here, if the preserve value is when-failed, the preserveExpiry is set, but the machine is not in Failed Phase, there are 2 scenarios that need to be handled: // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. // 2. The machine was initially annotated with preserve=when-failed, was preserved on failure and has recovered from Failed to Running. // In both cases, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. - // If the preserve value is auto-preserved, and the machine is Running, it would mean the machine has recovered from Failure to Running. + // If the preserve value is auto-preserved, and the machine is Running, it would mean the machine has recovered from Failed phase to Running phase. // In this case, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. // If the machine fails again, since preserve annotation is present, it will be preserved again. - err = c.stopMachinePreservation(ctx, clone, false) + + // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, so that + // CA does not scale down the node due to under-utilization immediately after recovery. + // This allows pods to get scheduled onto the recovered node + + if machine.Labels[v1alpha1.NodeLabelKey] != "" { + var node *corev1.Node + node, err = c.nodeLister.Get(machine.Labels[v1alpha1.NodeLabelKey]) + if err != nil { + klog.Errorf("error getting node %q for machine %q: %v", machine.Labels[v1alpha1.NodeLabelKey], machine.Name, err) + return + } + _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) + if err != nil { + return + } + } + err = c.stopMachinePreservationIfPreserved(ctx, clone, false) if err != nil { return } // If the machine is running and has a backing node, uncordon the node if cordoned // this is to handle the scenario where a preserved machine recovers from Failed to Running + // in which case, pods should be allowed to be scheduled onto the node if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) } diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 3b8a87bb8..22df6898c 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -7,6 +7,7 @@ package controller import ( "context" "fmt" + "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" k8stesting "k8s.io/client-go/testing" "math" "time" @@ -4146,10 +4147,11 @@ var _ = Describe("machine", func() { preserveExpiryTime *metav1.Time } type expect struct { - retry machineutils.RetryPeriod - preserveExpiryTimeIsSet bool - err error - nodeCondition *corev1.NodeCondition + retry machineutils.RetryPeriod + preserveExpiryTimeIsSet bool + nodeCondition *corev1.NodeCondition + CAScaleDownDisabledAnnotationPresent bool + err error } type testCase struct { setup setup @@ -4238,6 +4240,14 @@ var _ = Describe("machine", func() { } else { Expect(found).To(BeFalse()) } + if tc.expect.CAScaleDownDisabledAnnotationPresent { + val, ok := updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] + Expect(ok).To(BeTrue()) + Expect(val).To(Equal(autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue)) + } else { + _, ok := updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] + Expect(ok).To(BeFalse()) + } } }, Entry("no preserve annotation on machine and node", testCase{ @@ -4245,9 +4255,10 @@ var _ = Describe("machine", func() { nodeName: "node-1", }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: false, }, }), Entry("preserve annotation 'now' added on Running machine", testCase{ @@ -4261,7 +4272,8 @@ var _ = Describe("machine", func() { nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }, }), Entry("preserve annotation 'when-failed' added on Running machine", testCase{ @@ -4271,9 +4283,10 @@ var _ = Describe("machine", func() { machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }, }), Entry("Failed machine annotated with when-failed", testCase{ @@ -4287,7 +4300,8 @@ var _ = Describe("machine", func() { nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }, }), Entry("preserve annotation 'now' added on Healthy node ", testCase{ @@ -4301,7 +4315,8 @@ var _ = Describe("machine", func() { nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }, }), Entry("preserve annotation 'when-failed' added on Healthy node ", testCase{ @@ -4311,9 +4326,10 @@ var _ = Describe("machine", func() { machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }}), Entry("preserve annotation 'false' added on backing node of preserved machine", testCase{ setup: setup{ @@ -4323,9 +4339,10 @@ var _ = Describe("machine", func() { preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: false, }, }), Entry("machine auto-preserved by MCM", testCase{ @@ -4340,7 +4357,8 @@ var _ = Describe("machine", func() { nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: true, }, }), Entry("preservation timed out", testCase{ @@ -4352,12 +4370,13 @@ var _ = Describe("machine", func() { preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, - retry: machineutils.LongRetry, + preserveExpiryTimeIsSet: false, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + retry: machineutils.LongRetry, + CAScaleDownDisabledAnnotationPresent: false, }, }), - Entry("invalid preserve annotation on node of unpreserved machine", testCase{ + Entry("invalid preserve annotation on node of un-preserved machine", testCase{ setup: setup{ machineAnnotationValue: "", nodeAnnotationValue: "invalidValue", @@ -4365,10 +4384,11 @@ var _ = Describe("machine", func() { machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - err: nil, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, + CAScaleDownDisabledAnnotationPresent: false, }, }), Entry("machine annotated with preserve=now, but has no backing node", testCase{ @@ -4399,6 +4419,20 @@ var _ = Describe("machine", func() { err: fmt.Errorf("node %q not found", "invalid"), }, }), + Entry("machine annotated with auto-preserved and in Running phase after recovery from failure", testCase{ + setup: setup{ + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeAnnotationValue: "", + nodeName: "invalid", + machinePhase: v1alpha1.MachineRunning, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.ShortRetry, + err: fmt.Errorf("node %q not found", "invalid"), + }, + }), ) }) }) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 9ee12b11a..fe8b84dae 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2403,7 +2403,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // Step 3: If machine is in Failed Phase, drain the backing node err = c.drainPreservedNode(ctx, machine) if err != nil { - newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) + newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { // Step 4a: Update NodePreserved Condition on Node, with drain unsuccessful status _, _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) @@ -2414,7 +2414,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach drainSuccessful = true } // Step 4b: Update NodePreserved Condition on Node with drain successful status - newCond, needsUpdate := c.computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) + newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) if needsUpdate { _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) if err != nil { @@ -2426,10 +2426,10 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return nil } -// stopMachinePreservation stops the preservation of the machine and node -func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alpha1.Machine, removeCAScaleDownDisabledAnnotation bool) error { +// stopMachinePreservationIfPreserved stops the preservation of the machine and node +func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, removeCAScaleDownDisabledAnnotation bool) error { // removal of preserveExpiryTime is the last step of stopping preservation - // if preserveExpiryTime is not set, preservation is already stopped + // therefore, if preserveExpiryTime is not set, machine is not preserved if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return nil } @@ -2464,19 +2464,22 @@ func (c *controller) stopMachinePreservation(ctx context.Context, machine *v1alp } } // Step 3: update machine status to set preserve expiry time to nil - return c.clearMachinePreserveExpiryTime(ctx, machine) + err := c.clearMachinePreserveExpiryTime(ctx, machine) + if err != nil { + return err + } + klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + return nil } // setPreserveExpiryTimeOnMachine sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - preservedCurrentStatus := v1alpha1.CurrentStatus{ Phase: machine.Status.CurrentStatus.Phase, TimeoutActive: machine.Status.CurrentStatus.TimeoutActive, LastUpdateTime: metav1.Now(), PreserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(c.getEffectiveMachinePreserveTimeout(machine).Duration)}, } - machine.Status.CurrentStatus = preservedCurrentStatus updatedMachine, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, machine, metav1.UpdateOptions{}) if err != nil { @@ -2507,7 +2510,6 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { return node, nil } - CAScaleDownAnnotation := map[string]string{ autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, } @@ -2553,7 +2555,7 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string } // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation -func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { +func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { var newNodePreservedCondition *v1.NodeCondition var needsUpdate bool if existingNodeCondition == nil { @@ -2573,7 +2575,7 @@ func (c *controller) computeNewNodePreservedCondition(machinePhase v1alpha1.Mach newNodePreservedCondition.Status = v1.ConditionTrue needsUpdate = true } - } else if newNodePreservedCondition.Status != v1.ConditionFalse { + } else if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainUnsuccessful { newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful newNodePreservedCondition.Status = v1.ConditionFalse needsUpdate = true @@ -2614,7 +2616,6 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return err } - klog.V(3).Infof("Preservation stopped for machine %q", machine.Name) return nil } diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 6dfcd6e53..828faf122 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4051,7 +4051,6 @@ var _ = Describe("machine_util", func() { Expect(updatedNodeCondition.Reason).To(Equal(tc.expect.preserveNodeCondition.Reason)) Expect(updatedNodeCondition.Message).To(Equal(tc.expect.preserveNodeCondition.Message)) } - }, Entry("when preserve=now and there is no backing node", &testCase{ setup: setup{ @@ -4212,7 +4211,7 @@ var _ = Describe("machine_util", func() { ), ) }) - Describe("#stopMachinePreservation", func() { + Describe("#stopMachinePreservationIfPreserved", func() { type setup struct { nodeName string removeCAAnnotation bool @@ -4278,7 +4277,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.stopMachinePreservation(context.TODO(), machine, tc.setup.removeCAAnnotation) + err := c.stopMachinePreservationIfPreserved(context.TODO(), machine, tc.setup.removeCAAnnotation) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) @@ -4342,4 +4341,170 @@ var _ = Describe("machine_util", func() { }), ) }) + Describe("#computeNewNodePreservedCondition", func() { + type setup struct { + machinePhase machinev1.MachinePhase + preserveValue string + drainSuccess bool + existingNodeCondition *corev1.NodeCondition + } + type expect struct { + newNodeCondition *corev1.NodeCondition + needsUpdate bool + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("##computeNewNodePreservedCondition behaviour scenarios", + func(tc *testCase) { + newNodeCondition, needsUpdate := computeNewNodePreservedCondition( + tc.setup.machinePhase, + tc.setup.preserveValue, + tc.setup.drainSuccess, + tc.setup.existingNodeCondition, + ) + if tc.expect.newNodeCondition == nil { + Expect(newNodeCondition).To(BeNil()) + } else { + Expect(newNodeCondition.Type).To(Equal(tc.expect.newNodeCondition.Type)) + Expect(newNodeCondition.Status).To(Equal(tc.expect.newNodeCondition.Status)) + Expect(newNodeCondition.Reason).To(Equal(tc.expect.newNodeCondition.Reason)) + Expect(newNodeCondition.Message).To(Equal(tc.expect.newNodeCondition.Message)) + } + Expect(needsUpdate).To(Equal(tc.expect.needsUpdate)) + }, + Entry("when preserve=now, machine is Running, no existing condition", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineRunning, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + existingNodeCondition: nil, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByUser, + }, + needsUpdate: true, + }, + }), + Entry("when preserve=now, machine is Failed, drain successful, no existing condition", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + drainSuccess: true, + existingNodeCondition: nil, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + needsUpdate: true, + }, + }), + Entry("when preserve=now, machine is Failed, drain is unsuccessful, no existing condition", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + drainSuccess: false, + existingNodeCondition: nil, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + needsUpdate: true, + }, + }), + Entry("when machine auto-preserved by MCM, machine is Failed, drain is successful, no existing condition", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + drainSuccess: true, + existingNodeCondition: nil, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByMCM, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + needsUpdate: true, + }, + }), + Entry("when preserve=now, machine is Failed, drain is unsuccessful, existing condition present", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + drainSuccess: false, + existingNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + }, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + needsUpdate: true, + }, + }), + Entry("when preserve=now, machine is Failed, drain is unsuccessful for the second time, existing condition present", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + drainSuccess: false, + existingNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + needsUpdate: false, + }, + }), + Entry("when preserve=now, machine is Failed, drain is successful, existing condition present and status is true", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + preserveValue: machineutils.PreserveMachineAnnotationValueNow, + drainSuccess: true, + existingNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + }, + expect: expect{ + newNodeCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainSuccessful, + }, + needsUpdate: false, + }, + }), + ) + }) }) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 89fef24a2..909c67d05 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -6,12 +6,11 @@ package machineutils import ( + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "time" - - v1 "k8s.io/api/core/v1" - - "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" ) const ( @@ -105,12 +104,7 @@ const ( ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation -var AllowedPreserveAnnotationValues = map[string]bool{ - PreserveMachineAnnotationValueNow: true, - PreserveMachineAnnotationValueWhenFailed: true, - PreserveMachineAnnotationValuePreservedByMCM: true, - PreserveMachineAnnotationValueFalse: true, -} +var AllowedPreserveAnnotationValues = sets.New[string](PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration @@ -171,9 +165,13 @@ func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string // or if it is a candidate for auto-preservation func IsFailedMachineCandidateForPreservation(machine *v1alpha1.Machine) bool { // if preserve expiry time is set and is in the future, machine is already preserved - if machine.Status.CurrentStatus.PreserveExpiryTime != nil && machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { - klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return true + if machine.Status.CurrentStatus.PreserveExpiryTime != nil { + if machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return true + } + klog.V(3).Infof("Preservation of failed machine %q has timed out at %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return false } val, exists := machine.Annotations[PreserveMachineAnnotationKey] // if the machine preservation is not complete yet even though the machine is annotated, consider it as a candidate for preservation diff --git a/pkg/util/provider/machineutils/utils_test.go b/pkg/util/provider/machineutils/utils_test.go new file mode 100644 index 000000000..eed5019a0 --- /dev/null +++ b/pkg/util/provider/machineutils/utils_test.go @@ -0,0 +1,174 @@ +package machineutils + +import ( + "flag" + machinev1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + "testing" + "time" +) + +func TestMachineUtilsSuite(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Machine Utils Suite") +} + +var _ = BeforeSuite(func() { + klog.SetOutput(GinkgoWriter) + //for filtering out warning logs. Reflector short watch warning logs won't print now + klog.LogToStderr(false) + flags := &flag.FlagSet{} + klog.InitFlags(flags) + Expect(flags.Set("v", "10")).To(Succeed()) + + DeferCleanup(klog.Flush) +}) + +var _ = Describe("utils.go", func() { + Describe("#isFailedMachineCandidateForPreservation", func() { + + type setup struct { + preserveExpiryTime *metav1.Time + annotationValue string + } + type expect struct { + result bool + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("isFailedMachineCandidateForPreservation test cases", func(tc testCase) { + machine := machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Annotations: map[string]string{ + PreserveMachineAnnotationKey: tc.setup.annotationValue, + }, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + PreserveExpiryTime: tc.setup.preserveExpiryTime, + }, + }, + } + result := IsFailedMachineCandidateForPreservation(&machine) + Expect(result).To(Equal(tc.expect.result)) + }, + Entry("should return true if preserve expiry time is in the future", testCase{ + setup: setup{ + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, + annotationValue: PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return false if machine is annotated with preserve=false", testCase{ + setup: setup{ + annotationValue: PreserveMachineAnnotationValueFalse, + }, + expect: expect{ + result: false, + }, + }), + Entry("should return true if machine is annotated with preserve=now", testCase{ + setup: setup{ + annotationValue: PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return true if machine is annotated with preserve=when-failed", testCase{ + setup: setup{ + annotationValue: PreserveMachineAnnotationValueWhenFailed, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return false if preservation has timed out", testCase{ + setup: setup{ + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Second)}, + annotationValue: PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: false, + }, + }), + ) + }) + Describe("#PreserveAnnotationsChanged", func() { + type setup struct { + oldAnnotations map[string]string + newAnnotations map[string]string + } + type expect struct { + result bool + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("PreserveAnnotationsChanged test cases", func(tc testCase) { + + result := PreserveAnnotationsChanged(tc.setup.oldAnnotations, tc.setup.newAnnotations) + Expect(result).To(Equal(tc.expect.result)) + }, + Entry("should return true if preserve annotation added for the first time", testCase{ + setup: setup{ + oldAnnotations: map[string]string{}, + newAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, + }, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return true if preserve annotation is removed", testCase{ + setup: setup{ + oldAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, + }, + newAnnotations: map[string]string{}, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return true if preserve annotation value is changed", testCase{ + setup: setup{ + oldAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, + }, + newAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueWhenFailed, + }, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return false if preserve annotation is unchanged", testCase{ + setup: setup{ + oldAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, + }, + newAnnotations: map[string]string{ + PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, + }, + }, + expect: expect{ + result: false, + }, + }), + ) + }) +}) From 6c17ccd68367cc01d86cd1ccae84e33a247789cf Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 16 Jan 2026 15:45:23 +0530 Subject: [PATCH 52/79] Revert Makefile changes --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1b05c478b..aba0236ac 100644 --- a/Makefile +++ b/Makefile @@ -172,9 +172,9 @@ test-clean: .PHONY: generate generate: $(VGOPATH) $(DEEPCOPY_GEN) $(DEFAULTER_GEN) $(CONVERSION_GEN) $(OPENAPI_GEN) $(CONTROLLER_GEN) $(GEN_CRD_API_REFERENCE_DOCS) - @GOFLAGS="-buildvcs=false" $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout - @GOFLAGS="-buildvcs=false" ./hack/generate-code - @GOFLAGS="-buildvcs=false" ./hack/api-reference/generate-spec-doc.sh + $(CONTROLLER_GEN) crd paths=./pkg/apis/machine/v1alpha1/... output:crd:dir=kubernetes/crds output:stdout + @./hack/generate-code + @./hack/api-reference/generate-spec-doc.sh .PHONY: add-license-headers add-license-headers: $(GO_ADD_LICENSE) From 48d8b7d65254c55d1ebb8dc8829f7e39d08d8548 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 19 Jan 2026 12:33:16 +0530 Subject: [PATCH 53/79] Add preservation tests for machineSet controller --- pkg/controller/machineset.go | 48 +++++++- pkg/controller/machineset_test.go | 197 +++++++++++++++++++++++++++++- pkg/controller/machineset_util.go | 45 ------- 3 files changed, 243 insertions(+), 47 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 6ff1b8f9b..cc26a0070 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -710,7 +710,7 @@ func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machi preservedMachines := make([]*v1alpha1.Machine, 0, len(machines)) otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) for _, mc := range machines { - if mc.Status.CurrentStatus.PreserveExpiryTime != nil { + if mc.Status.CurrentStatus.PreserveExpiryTime != nil && mc.Status.CurrentStatus.PreserveExpiryTime.After(metav1.Now().Time) { preservedMachines = append(preservedMachines, mc) } else { otherMachines = append(otherMachines, mc) @@ -938,3 +938,49 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } + +// triggerAutoPreservationOfFailedMachines annotates failed machines with the auto-preservation annotation +// to trigger preservation of the machines by the machine controller, up to the limit defined in the +// MachineSet's AutoPreserveFailedMachineMax field. +func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) { + autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount + if autoPreservationCapacityRemaining <= 0 { + // no capacity remaining, nothing to do + return + } + for _, m := range machines { + if machineutils.IsMachineFailed(m) { + // check if machine is annotated with preserve=false, if yes, do not consider for preservation + if m.Annotations != nil && m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { + continue + } + if autoPreservationCapacityRemaining > 0 { + klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) + err := c.annotateMachineForAutoPreservation(ctx, m) + if err != nil { + klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) + // since annotateMachineForAutoPreservation uses retries internally, we can continue with other machines + continue + } + autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 + } + } + } +} + +// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger +// preservation of the machine by the machine controller. +func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { + _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM + return nil + }) + if err != nil { + return err + } + klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + return nil +} diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 88d847017..caa9d71e3 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1385,9 +1385,37 @@ var _ = Describe("machineset", func() { filteredMachines := []*machinev1.Machine{testActiveMachine1, testFailedMachine1} machinesToDelete := getMachinesToDelete(filteredMachines, diff) - Expect(len(machinesToDelete)).To(Equal(len(filteredMachines) - diff)) + Expect(len(machinesToDelete)).To(Equal(diff)) Expect(machinesToDelete[0].Name).To(Equal(testFailedMachine1.Name)) }) + + // Testcase: It should return non-preserved machines first. + It("should return non-preserved machines first.", func() { + stop := make(chan struct{}) + defer close(stop) + diff = 2 + testPreservedFailedMachine := testFailedMachine1.DeepCopy() + testPreservedFailedMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: time.Now().Add(1 * time.Hour)} + filteredMachines := []*machinev1.Machine{testActiveMachine1, testFailedMachine1, testPreservedFailedMachine} + machinesToDelete := getMachinesToDelete(filteredMachines, diff) + Expect(len(machinesToDelete)).To(Equal(diff)) + // expect machinesToDelete to not contain testPreservedFailedMachine + Expect(machinesToDelete).ToNot(ContainElement(testPreservedFailedMachine)) + }) + + // Testcase: It should return preserved machine if no other option. Replica count must be maintained. + It("should return preserved machine if needed to maintain replica count", func() { + stop := make(chan struct{}) + defer close(stop) + diff = 2 + testPreservedFailedMachine := testFailedMachine1.DeepCopy() + testPreservedFailedMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: time.Now().Add(1 * time.Hour)} + filteredMachines := []*machinev1.Machine{testActiveMachine1, testPreservedFailedMachine} + machinesToDelete := getMachinesToDelete(filteredMachines, diff) + Expect(len(machinesToDelete)).To(Equal(diff)) + // expect machinesToDelete to not contain testPreservedFailedMachine + Expect(machinesToDelete).To(ContainElement(testPreservedFailedMachine)) + }) }) Describe("#getMachineKeys", func() { @@ -1813,4 +1841,171 @@ var _ = Describe("machineset", func() { Expect(testMachineSet.Finalizers).To(Equal(finalizers)) }) }) + + Describe("#triggerAutoPreservationOfFailedMachines", func() { + type setup struct { + autoPreserveFailedMachineCount int32 + autoPreserveFailedMachineMax int32 + } + type expect struct { + preservedMachineCount int + } + type testCase struct { + setup setup + expect expect + } + + DescribeTable("#triggerAutoPreservationOfFailedMachines scenarios", func(tc testCase) { + stop := make(chan struct{}) + defer close(stop) + testMachineSet := &machinev1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "MachineSet-test", + Namespace: testNamespace, + Labels: map[string]string{ + "test-label": "test-label", + }, + UID: "1234567", + }, + Spec: machinev1.MachineSetSpec{ + Replicas: 4, + Template: machinev1.MachineTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "test-label": "test-label", + }, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test-label": "test-label", + }, + }, + AutoPreserveFailedMachineMax: tc.setup.autoPreserveFailedMachineMax, + }, + Status: machinev1.MachineSetStatus{ + AutoPreserveFailedMachineCount: tc.setup.autoPreserveFailedMachineCount, + }, + } + testMachine1 := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineFailed, + }, + }, + } + testMachine2 := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-2", + Namespace: testNamespace, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineFailed, + }, + }, + } + testMachine3 := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-3", + Namespace: testNamespace, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineRunning, + }, + }, + } + testMachine4 := &machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-4", + Namespace: testNamespace, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueFalse, + }, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineFailed, + }, + }, + } + objects := []runtime.Object{} + objects = append(objects, testMachineSet, testMachine1, testMachine2, testMachine3, testMachine4) + c, trackers := createController(stop, testNamespace, objects, nil, nil) + defer trackers.Stop() + waitForCacheSync(stop, c) + machinesList := []*machinev1.Machine{testMachine1, testMachine2} + + c.triggerAutoPreservationOfFailedMachines(context.TODO(), machinesList, testMachineSet) + waitForCacheSync(stop, c) + updatedMachine1, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine1.Name, metav1.GetOptions{}) + updatedMachine2, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine2.Name, metav1.GetOptions{}) + updatedMachine3, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine3.Name, metav1.GetOptions{}) + updatedMachine4, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine4.Name, metav1.GetOptions{}) + preservedCount := 0 + if updatedMachine1.Annotations != nil && updatedMachine1.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + preservedCount++ + } + if updatedMachine2.Annotations != nil && updatedMachine2.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + preservedCount++ + } + Expect(preservedCount).To(Equal(tc.expect.preservedMachineCount)) + // Running machine should not be auto-preserved in any of the cases + Expect(updatedMachine3.Annotations[machineutils.PreserveMachineAnnotationKey]).To(BeEmpty()) + // Machine with explicit preserve annotation set to false should not be auto-preserved + Expect(updatedMachine4.Annotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(machineutils.PreserveMachineAnnotationValueFalse)) + + }, + Entry("should trigger auto preservation of 1 failed machine if AutoPreserveFailedMachineMax is 1 and AutoPreserveFailedMachineCount is 0", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 0, + autoPreserveFailedMachineMax: 1, + }, + expect: expect{ + preservedMachineCount: 1, + }, + }), + Entry("should not trigger auto preservation of failed machines if AutoPreserveFailedMachineMax is 0", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 0, + autoPreserveFailedMachineMax: 0, + }, + expect: expect{ + preservedMachineCount: 0, + }, + }), + Entry("should not trigger auto preservation of failed machines if AutoPreserveFailedMachineCount has reached AutoPreserveFailedMachineMax", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 2, + autoPreserveFailedMachineMax: 2, + }, + expect: expect{ + preservedMachineCount: 0, + }, + }), + Entry("should trigger auto preservation of both failed machines if AutoPreserveFailedMachineCount is 0 and AutoPreserveFailedMachineMax is 2", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 0, + autoPreserveFailedMachineMax: 2, + }, + expect: expect{ + preservedMachineCount: 2, + }, + }), + Entry("should not trigger auto preservation of failed machine annotated with preserve=false even if AutoPreserveFailedMachineCount < AutoPreserveFailedMachineMax", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 0, + autoPreserveFailedMachineMax: 3, + }, + expect: expect{ + preservedMachineCount: 2, + }, + }), + ) + }) }) diff --git a/pkg/controller/machineset_util.go b/pkg/controller/machineset_util.go index cf804fef7..78ce9d803 100644 --- a/pkg/controller/machineset_util.go +++ b/pkg/controller/machineset_util.go @@ -216,48 +216,3 @@ func logMachinesToDelete(machines []*v1alpha1.Machine) { klog.V(3).Infof("Machine %q needs to be deleted", m.Name) } } - -// triggerAutoPreservationOfFailedMachines annotates failed machines with the auto-preservation annotation -// to trigger preservation of the machines by the machine controller, up to the limit defined in the -// MachineSet's AutoPreserveFailedMachineMax field. -func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) { - autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount - if autoPreservationCapacityRemaining <= 0 { - // no capacity remaining, nothing to do - return - } - for _, m := range machines { - if machineutils.IsMachineFailed(m) { - // check if machine is annotated with preserve=false, if yes, do not consider for preservation - if m.Annotations != nil && m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { - continue - } - if autoPreservationCapacityRemaining > 0 { - err := c.annotateMachineForAutoPreservation(ctx, m) - if err != nil { - klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) - // since annotateMachineForAutoPreservation uses retries internally, we can continue with other machines - continue - } - autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 - } - } - } -} - -// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger -// preservation of the machine by the machine controller. -func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { - _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } - clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM - return nil - }) - if err != nil { - return err - } - klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) - return nil -} From d5387073e4e8e1b4eba10a59dd6892e9e4a45ba0 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 19 Jan 2026 15:56:46 +0530 Subject: [PATCH 54/79] Update comments and fix minor bugs --- pkg/controller/machineset.go | 13 ++++---- pkg/controller/machineset_test.go | 12 ++----- .../provider/machinecontroller/machine.go | 32 ++++++------------- .../machinecontroller/machine_util.go | 15 +++++---- .../machinecontroller/machine_util_test.go | 2 +- pkg/util/provider/machineutils/utils.go | 2 ++ 6 files changed, 31 insertions(+), 45 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index cc26a0070..9e9782f06 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -580,7 +580,7 @@ func (c *controller) reconcileClusterMachineSet(key string) error { return err } - // triggerAutoPreservation adds the PreserveMachineAnnotationValuePreservedByMCM annotation + // triggerAutoPreservation adds the preserve=PreserveMachineAnnotationValuePreservedByMCM annotation // to Failed machines to trigger auto-preservation, if applicable. // We do not update machineSet.Status.AutoPreserveFailedMachineCount in the function, as it will be calculated // and updated in the succeeding calls to calculateMachineSetStatus() and updateMachineSetStatus() @@ -699,13 +699,14 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al // < scheduled, and pending < running. This ensures that we delete machines // in the earlier stages whenever possible. sort.Sort(ActiveMachines(filteredMachines)) - // machines in Preserved stage will be the last ones to be deleted + // preserved machines are de-prioritised for deletion // At all times, replica count will be upheld, even if it requires the deletion of a preserved machine filteredMachines = prioritisePreservedMachines(filteredMachines) } return filteredMachines[:diff] } +// prioritisePreservedMachines moves preserved machines to the end of the slice func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machine { preservedMachines := make([]*v1alpha1.Machine, 0, len(machines)) otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) @@ -939,8 +940,8 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } -// triggerAutoPreservationOfFailedMachines annotates failed machines with the auto-preservation annotation -// to trigger preservation of the machines by the machine controller, up to the limit defined in the +// triggerAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation +// to trigger preservation of the machines, by the machine controller, up to the limit defined in the // MachineSet's AutoPreserveFailedMachineMax field. func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) { autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount @@ -970,8 +971,8 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context // annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger // preservation of the machine by the machine controller. -func (dc *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { - _, err := UpdateMachineWithRetries(ctx, dc.controlMachineClient.Machines(m.Namespace), dc.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { +func (c *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { + _, err := UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { if clone.Annotations == nil { clone.Annotations = make(map[string]string) } diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index caa9d71e3..b87cb90ea 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1376,8 +1376,6 @@ var _ = Describe("machineset", func() { }, } }) - - // Testcase: It should return the Failed machines first. It("should return the Failed machines first.", func() { stop := make(chan struct{}) defer close(stop) @@ -1388,9 +1386,7 @@ var _ = Describe("machineset", func() { Expect(len(machinesToDelete)).To(Equal(diff)) Expect(machinesToDelete[0].Name).To(Equal(testFailedMachine1.Name)) }) - - // Testcase: It should return non-preserved machines first. - It("should return non-preserved machines first.", func() { + It("should prioritise non-preserved machines for deletion.", func() { stop := make(chan struct{}) defer close(stop) diff = 2 @@ -1402,9 +1398,7 @@ var _ = Describe("machineset", func() { // expect machinesToDelete to not contain testPreservedFailedMachine Expect(machinesToDelete).ToNot(ContainElement(testPreservedFailedMachine)) }) - - // Testcase: It should return preserved machine if no other option. Replica count must be maintained. - It("should return preserved machine if needed to maintain replica count", func() { + It("should include preserved machine when needed to maintain replica count", func() { stop := make(chan struct{}) defer close(stop) diff = 2 @@ -1413,7 +1407,7 @@ var _ = Describe("machineset", func() { filteredMachines := []*machinev1.Machine{testActiveMachine1, testPreservedFailedMachine} machinesToDelete := getMachinesToDelete(filteredMachines, diff) Expect(len(machinesToDelete)).To(Equal(diff)) - // expect machinesToDelete to not contain testPreservedFailedMachine + // expect machinesToDelete to contain testPreservedFailedMachine Expect(machinesToDelete).To(ContainElement(testPreservedFailedMachine)) }) }) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 9f5198c96..31b8d5694 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -63,8 +63,7 @@ func (c *controller) updateMachine(oldObj, newObj any) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } - - // this check is required to enqueue a previously failed preserved machine, when the phase changes to Running + // this check is required to enqueue a previously failed preserved machine when its phase changes to Running on recovery if _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && newMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { c.enqueueMachine(newObj, "handling preserved machine phase update") } @@ -216,10 +215,6 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp return retry, err } - retry, err = c.manageMachinePreservation(ctx, machine) - if err != nil { - return retry, err - } if machine.Labels[v1alpha1.NodeLabelKey] != "" && machine.Status.CurrentStatus.Phase != "" { // If reference to node object exists execute the below retry, err := c.reconcileMachineHealth(ctx, machine) @@ -747,8 +742,7 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { SECTION Machine Preservation operations */ - -// manageMachinePreservation checks if any preservation-related operations need to be performed on the machine and node objects +// manageMachinePreservation manages machine preservation based on the preserve annotation value. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { defer func() { if err != nil { @@ -842,7 +836,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } -func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) (nAnnotationValue string, nExists bool, err error) { +func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) (nodeAnnotationValue string, existsOnNode bool, err error) { nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName == "" { return @@ -853,7 +847,7 @@ func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) ( return } if node.Annotations != nil { - nAnnotationValue, nExists = node.Annotations[machineutils.PreserveMachineAnnotationKey] + nodeAnnotationValue, existsOnNode = node.Annotations[machineutils.PreserveMachineAnnotationKey] } return } @@ -863,19 +857,19 @@ func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) ( // if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured // if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.Machine) (preserveValue string, exists bool, err error) { - mAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - nAnnotationValue, nExists, err := c.getNodePreserveAnnotationValue(machine) + machineAnnotationValue, existsOnMachine := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + nodeAnnotationValue, existsOnNode, err := c.getNodePreserveAnnotationValue(machine) if err != nil { return } - exists = mExists || nExists + exists = existsOnMachine || existsOnNode if !exists { return } - if nExists { - preserveValue = nAnnotationValue + if existsOnNode { + preserveValue = nodeAnnotationValue } else { - preserveValue = mAnnotationValue + preserveValue = machineAnnotationValue } return } @@ -893,9 +887,3 @@ func (c *controller) writePreserveAnnotationValueOnMachine(ctx context.Context, } return updatedMachine, nil } - -// isPreserveAnnotationValueValid checks if the preserve annotation value is valid -func isPreserveAnnotationValueValid(preserveValue string) bool { - _, exists := machineutils.AllowedPreserveAnnotationValues[preserveValue] - return exists -} diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index fe8b84dae..9e694a979 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2387,7 +2387,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) - // check if preservation is already complete + // checks if preservation is already complete if c.isPreservedNodeConditionStatusTrue(existingNodePreservedCondition) { return nil } @@ -2494,10 +2494,7 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine // if the machine has no backing node, only PreserveExpiryTime needs to be set // if the machine has a backing node, the NodePreserved condition on the node needs to be true func (c *controller) isPreservedNodeConditionStatusTrue(cond *v1.NodeCondition) bool { - if cond == nil { - return false - } - if cond.Status == v1.ConditionTrue { + if cond != nil && cond.Status == v1.ConditionTrue { return true } return false @@ -2514,8 +2511,12 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, } nodeCopy := node.DeepCopy() - updatedNode, _, _ := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) - updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + updatedNode, _, err := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) + if err != nil { + klog.Errorf("error trying to add CA annotation on node %q: %v", node.Name, err) + return nil, err + } + updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error trying to update CA annotation on node %q: %v", node.Name, err) return nil, err diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 828faf122..af80eee28 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4224,7 +4224,7 @@ var _ = Describe("machine_util", func() { setup setup expect expect } - DescribeTable("##preserveMachine behaviour scenarios", + DescribeTable("##stopMachinePreservationIfPreserved behaviour scenarios", func(tc *testCase) { stop := make(chan struct{}) defer close(stop) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 909c67d05..6900f41fe 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -181,6 +181,8 @@ func IsFailedMachineCandidateForPreservation(machine *v1alpha1.Machine) bool { return true case PreserveMachineAnnotationValueFalse: return false + default: + return false } } return false From 91f2f1ff95ae43b3d15821be0cd9e59cb08f99cd Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 20 Jan 2026 16:47:18 +0530 Subject: [PATCH 55/79] Address review comments - part 9 --- pkg/controller/machineset.go | 2 +- .../provider/machinecontroller/machine.go | 33 +++++++------ .../machinecontroller/machine_test.go | 10 +--- .../machinecontroller/machine_util.go | 48 +++++-------------- .../machinecontroller/machine_util_test.go | 16 +++---- 5 files changed, 39 insertions(+), 70 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 9e9782f06..56e3e8914 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -952,7 +952,7 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context for _, m := range machines { if machineutils.IsMachineFailed(m) { // check if machine is annotated with preserve=false, if yes, do not consider for preservation - if m.Annotations != nil && m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { + if m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { continue } if autoPreservationCapacityRemaining > 0 { diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 31b8d5694..eda2a0d7d 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -64,7 +64,8 @@ func (c *controller) updateMachine(oldObj, newObj any) { return } // this check is required to enqueue a previously failed preserved machine when its phase changes to Running on recovery - if _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey]; exists && newMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { + _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey] + if exists && oldMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && newMachine.Status.CurrentStatus.Phase != v1alpha1.MachineFailed { c.enqueueMachine(newObj, "handling preserved machine phase update") } @@ -742,6 +743,7 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { SECTION Machine Preservation operations */ + // manageMachinePreservation manages machine preservation based on the preserve annotation value. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { defer func() { @@ -756,16 +758,19 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } }() - preserveValue, exists, err := c.computeEffectivePreserveAnnotationValue(machine) + preserveValue, err := c.computeEffectivePreserveAnnotationValue(machine) if err != nil { return } - if !exists { + // either annotation has been deleted, set to empty or no preserve annotation exists. + // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. + if preserveValue == "" { + err = c.stopMachinePreservationIfPreserved(ctx, machine, true) return } // if preserve value differs from machine's preserve value, overwrite the value in the machine clone := machine.DeepCopy() - if machine.Annotations == nil || machine.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { + if machine.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { clone, err = c.writePreserveAnnotationValueOnMachine(ctx, clone, preserveValue) if err != nil { return @@ -846,9 +851,7 @@ func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) ( klog.Errorf("error trying to get node %q: %v", nodeName, err) return } - if node.Annotations != nil { - nodeAnnotationValue, existsOnNode = node.Annotations[machineutils.PreserveMachineAnnotationKey] - } + nodeAnnotationValue, existsOnNode = node.Annotations[machineutils.PreserveMachineAnnotationKey] return } @@ -856,22 +859,18 @@ func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) ( // if the backing node is annotated with preserve annotation, the node's preserve value will be honoured // if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured // if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured -func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.Machine) (preserveValue string, exists bool, err error) { +func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.Machine) (string, error) { machineAnnotationValue, existsOnMachine := machine.Annotations[machineutils.PreserveMachineAnnotationKey] nodeAnnotationValue, existsOnNode, err := c.getNodePreserveAnnotationValue(machine) if err != nil { - return - } - exists = existsOnMachine || existsOnNode - if !exists { - return + return "", err } if existsOnNode { - preserveValue = nodeAnnotationValue - } else { - preserveValue = machineAnnotationValue + return nodeAnnotationValue, nil + } else if existsOnMachine { + return machineAnnotationValue, nil } - return + return "", nil } // writePreserveAnnotationValueOnMachine syncs the effective preserve value on the machine objects diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 22df6898c..f05787e2c 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4008,7 +4008,6 @@ var _ = Describe("machine", func() { } type expect struct { preserveValue string - exists bool err error } type testCase struct { @@ -4059,7 +4058,7 @@ var _ = Describe("machine", func() { defer trackers.Stop() waitForCacheSync(stop, c) - value, exists, err := c.computeEffectivePreserveAnnotationValue(machine) + value, err := c.computeEffectivePreserveAnnotationValue(machine) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) @@ -4067,7 +4066,6 @@ var _ = Describe("machine", func() { return } Expect(err).ToNot(HaveOccurred()) - Expect(exists).To(Equal(tc.expect.exists)) Expect(value).To(Equal(tc.expect.preserveValue)) }, Entry("neither machine nor node has preserve annotation", testCase{ @@ -4076,7 +4074,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "", - exists: false, err: nil, }, }), @@ -4087,7 +4084,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "machineValue", - exists: true, err: nil, }, }), @@ -4098,7 +4094,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "nodeValue", - exists: true, err: nil, }, }), @@ -4110,7 +4105,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "nodeValue", - exists: true, err: nil, }, }), @@ -4121,7 +4115,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "", - exists: false, err: fmt.Errorf("node %q not found", "invalid"), }, }), @@ -4131,7 +4124,6 @@ var _ = Describe("machine", func() { }, expect: expect{ preserveValue: "machineValue", - exists: true, err: nil, }, }), diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 9e694a979..4a521e193 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2370,12 +2370,8 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if err != nil { return err } - if nodeName == "" { - // if machine has no backing node, preservation is complete - klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) - return nil - } - } else if nodeName == "" { + } + if nodeName == "" { // Machine has no backing node, preservation is complete klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil @@ -2388,7 +2384,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) // checks if preservation is already complete - if c.isPreservedNodeConditionStatusTrue(existingNodePreservedCondition) { + if existingNodePreservedCondition != nil && existingNodePreservedCondition.Status == v1.ConditionTrue { return nil } // Preservation incomplete - either the flow is just starting or in progress @@ -2398,25 +2394,18 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if err != nil { return err } - drainSuccessful := false + var drainErr error if c.shouldPreservedNodeBeDrained(updatedMachine, existingNodePreservedCondition) { // Step 3: If machine is in Failed Phase, drain the backing node - err = c.drainPreservedNode(ctx, machine) - if err != nil { - newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) - if needsUpdate { - // Step 4a: Update NodePreserved Condition on Node, with drain unsuccessful status - _, _ = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) - return err - } - return err - } - drainSuccessful = true + drainErr = c.drainPreservedNode(ctx, machine) } - // Step 4b: Update NodePreserved Condition on Node with drain successful status - newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainSuccessful, existingNodePreservedCondition) + newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainErr, existingNodePreservedCondition) if needsUpdate { + // Step 4: Update NodePreserved Condition on Node, with drain status _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) + if drainErr != nil { + return drainErr + } if err != nil { klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", nodeName, machine.Name, err) return err @@ -2490,21 +2479,10 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine return updatedMachine, nil } -// isPreservedNodeConditionStatusTrue check if all the steps in the preservation logic have been completed for the machine -// if the machine has no backing node, only PreserveExpiryTime needs to be set -// if the machine has a backing node, the NodePreserved condition on the node needs to be true -func (c *controller) isPreservedNodeConditionStatusTrue(cond *v1.NodeCondition) bool { - if cond != nil && cond.Status == v1.ConditionTrue { - return true - } - return false -} - // addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { // Check if annotation already exists with correct value - if node.Annotations != nil && - node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { + if node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { return node, nil } CAScaleDownAnnotation := map[string]string{ @@ -2556,7 +2534,7 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string } // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation -func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainSuccessful bool, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { +func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { var newNodePreservedCondition *v1.NodeCondition var needsUpdate bool if existingNodeCondition == nil { @@ -2570,7 +2548,7 @@ func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preser newNodePreservedCondition = existingNodeCondition.DeepCopy() } if machinePhase == v1alpha1.MachineFailed { - if drainSuccessful { + if drainErr == nil { if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful newNodePreservedCondition.Status = v1.ConditionTrue diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index af80eee28..c3c0e918e 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4345,7 +4345,7 @@ var _ = Describe("machine_util", func() { type setup struct { machinePhase machinev1.MachinePhase preserveValue string - drainSuccess bool + drainErr error existingNodeCondition *corev1.NodeCondition } type expect struct { @@ -4361,7 +4361,7 @@ var _ = Describe("machine_util", func() { newNodeCondition, needsUpdate := computeNewNodePreservedCondition( tc.setup.machinePhase, tc.setup.preserveValue, - tc.setup.drainSuccess, + tc.setup.drainErr, tc.setup.existingNodeCondition, ) if tc.expect.newNodeCondition == nil { @@ -4393,7 +4393,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValueNow, - drainSuccess: true, + drainErr: nil, existingNodeCondition: nil, }, expect: expect{ @@ -4410,7 +4410,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValueNow, - drainSuccess: false, + drainErr: fmt.Errorf("test drain error"), existingNodeCondition: nil, }, expect: expect{ @@ -4427,7 +4427,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, - drainSuccess: true, + drainErr: nil, existingNodeCondition: nil, }, expect: expect{ @@ -4444,7 +4444,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValueNow, - drainSuccess: false, + drainErr: fmt.Errorf("test drain error"), existingNodeCondition: &corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, @@ -4465,7 +4465,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValueNow, - drainSuccess: false, + drainErr: fmt.Errorf("test drain error"), existingNodeCondition: &corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, @@ -4487,7 +4487,7 @@ var _ = Describe("machine_util", func() { setup: setup{ machinePhase: machinev1.MachineFailed, preserveValue: machineutils.PreserveMachineAnnotationValueNow, - drainSuccess: true, + drainErr: nil, existingNodeCondition: &corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, From c472897532d662b918d4b39d7cf24eb42356fbe6 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 21 Jan 2026 14:22:53 +0530 Subject: [PATCH 56/79] Address review comments - part 10: Remove unnecessary nil checks while accessing maps. --- pkg/apis/machine/v1alpha1/machine_types.go | 6 ++--- pkg/util/provider/machineutils/utils.go | 28 +++++++--------------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 25577ccef..859a4df27 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -246,7 +246,7 @@ const ( const ( // NodePreserved is a node condition type for preservation of machines to allow end-user to know that a node is preserved - NodePreserved corev1.NodeConditionType = "NodePreserved" + NodePreserved corev1.NodeConditionType = "Preserved" // PreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM PreservedByMCM string = "PreservedByMCM" @@ -258,10 +258,10 @@ const ( PreservationStopped string = "PreservationStopped" // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful - PreservedNodeDrainSuccessful string = "Preserved Node drained successfully" + PreservedNodeDrainSuccessful string = "Preserved node drained successfully" // PreservedNodeDrainUnsuccessful is a constant for the message in condition that indicates that the preserved node's drain was not successful - PreservedNodeDrainUnsuccessful string = "Preserved Node could not be drained" + PreservedNodeDrainUnsuccessful string = "Preserved node could not be drained" ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 6900f41fe..8b233de41 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -150,15 +150,7 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { // PreserveAnnotationsChanged returns true if there is a change in preserve annotations func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { - valueNew, existsInNew := newAnnotations[PreserveMachineAnnotationKey] - valueOld, existsInOld := oldAnnotations[PreserveMachineAnnotationKey] - if existsInNew != existsInOld { - return true - } - if valueNew != valueOld { - return true - } - return false + return newAnnotations[PreserveMachineAnnotationKey] != oldAnnotations[PreserveMachineAnnotationKey] } // IsFailedMachineCandidateForPreservation checks if the failed machine is already preserved, in the process of being preserved @@ -173,17 +165,13 @@ func IsFailedMachineCandidateForPreservation(machine *v1alpha1.Machine) bool { klog.V(3).Infof("Preservation of failed machine %q has timed out at %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return false } - val, exists := machine.Annotations[PreserveMachineAnnotationKey] // if the machine preservation is not complete yet even though the machine is annotated, consider it as a candidate for preservation - if exists { - switch val { - case PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet - return true - case PreserveMachineAnnotationValueFalse: - return false - default: - return false - } + switch machine.Annotations[PreserveMachineAnnotationKey] { + case PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet + return true + case PreserveMachineAnnotationValueFalse: + return false + default: + return false } - return false } From 910cef580820934d7c5f816dd8179cfbf3db2ed6 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 21 Jan 2026 16:48:56 +0530 Subject: [PATCH 57/79] Add machine-preserve-timeout flag --- pkg/controller/deployment_machineset_util.go | 8 ++++++-- pkg/util/provider/app/options/options.go | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index d7628b734..e4f301576 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -25,6 +25,7 @@ package controller import ( "context" "fmt" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" "reflect" "k8s.io/klog/v2" @@ -127,8 +128,11 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } failedMachines = append(failedMachines, machineSummary) } - cond := getMachineCondition(machine, v1alpha1.NodePreserved) - if cond != nil && cond.Reason == v1alpha1.PreservedByMCM { + // Count machines which are auto-preserved by MCM + // we count based on number of machines annotated with PreserveMachineAnnotationValuePreservedByMCM + // this is because, the actual preservation of the machine may not have completed yet + // if triggered very recently, and hence we cannot rely on the Preserved Condition Reason + if machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { autoPreserveFailedMachineCount++ } } diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 00e5ced7e..504fd9823 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -111,6 +111,7 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&s.SafetyOptions.PvDetachTimeout.Duration, "machine-pv-detach-timeout", s.SafetyOptions.PvDetachTimeout.Duration, "Timeout (in duration) used while waiting for detach of PV while evicting/deleting pods") fs.DurationVar(&s.SafetyOptions.PvReattachTimeout.Duration, "machine-pv-reattach-timeout", s.SafetyOptions.PvReattachTimeout.Duration, "Timeout (in duration) used while waiting for reattach of PV onto a different node") fs.DurationVar(&s.SafetyOptions.MachineSafetyAPIServerStatusCheckTimeout.Duration, "machine-safety-apiserver-statuscheck-timeout", s.SafetyOptions.MachineSafetyAPIServerStatusCheckTimeout.Duration, "Timeout (in duration) for which the APIServer can be down before declare the machine controller frozen by safety controller") + fs.DurationVar(&s.SafetyOptions.MachinePreserveTimeout.Duration, "machine-preserve-timeout", s.SafetyOptions.MachinePreserveTimeout.Duration, "Duration for which a failed machine should be preserved if it has the appropriate preserve annotation set.") fs.DurationVar(&s.SafetyOptions.MachineSafetyOrphanVMsPeriod.Duration, "machine-safety-orphan-vms-period", s.SafetyOptions.MachineSafetyOrphanVMsPeriod.Duration, "Time period (in duration) used to poll for orphan VMs by safety controller.") fs.DurationVar(&s.SafetyOptions.MachineSafetyAPIServerStatusCheckPeriod.Duration, "machine-safety-apiserver-statuscheck-period", s.SafetyOptions.MachineSafetyAPIServerStatusCheckPeriod.Duration, "Time period (in duration) used to poll for APIServer's health by safety controller") @@ -188,9 +189,11 @@ func (s *MCServer) Validate() error { if s.SafetyOptions.MachineSafetyAPIServerStatusCheckPeriod.Duration < s.SafetyOptions.MachineSafetyAPIServerStatusCheckTimeout.Duration { errs = append(errs, fmt.Errorf("machine safety APIServer status check period should not be less than APIServer status check timeout")) } + if s.SafetyOptions.MachinePreserveTimeout.Duration < 0 { + errs = append(errs, fmt.Errorf("machine preserve timeout should be a non-negative number: got %v", s.SafetyOptions.MachinePreserveTimeout.Duration)) + } if s.ControlKubeconfig == "" && s.TargetKubeconfig == constants.TargetKubeconfigDisabledValue { errs = append(errs, fmt.Errorf("--control-kubeconfig cannot be empty if --target-kubeconfig=%s is specified", constants.TargetKubeconfigDisabledValue)) } - return utilerrors.NewAggregate(errs) } From 160dd75a92f37c64200edcf6733213fa14ee9542 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 22 Jan 2026 13:16:18 +0530 Subject: [PATCH 58/79] Address review comments - part 11: - Modify sort function to de-prioritize preserve machines - Add test for the same - Improve logging - Fix bug in stopMachinePreservationIfPreserved when node is not found - Update default MachinePreserveTimeout to 3 days as per doc --- pkg/controller/controller_utils.go | 14 ++++ pkg/controller/controller_utils_test.go | 77 ++++++++++++++++++ pkg/controller/machineset.go | 75 ++++++++++-------- pkg/controller/machineset_test.go | 76 ++++++++++++++++++ pkg/util/provider/app/options/options.go | 2 +- .../provider/machinecontroller/machine.go | 41 +++++----- .../machinecontroller/machine_util.go | 22 ++++-- .../machinecontroller/machine_util_test.go | 55 ++++++++++++- pkg/util/provider/machineutils/utils.go | 26 +----- pkg/util/provider/machineutils/utils_test.go | 79 ------------------- 10 files changed, 297 insertions(+), 170 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index b0e8e8f54..4ceffb004 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -732,6 +732,20 @@ func (s ActiveMachines) Len() int { return len(s) } func (s ActiveMachines) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s ActiveMachines) Less(i, j int) bool { + // Preserved machines have higher priority than other machines and will be deleted last. + // So, we check if either of the machines is preserved first. + // If one is preserved and the other is not, the preserved one is "greater" + // If both are preserved or both are not preserved, we move to the next criteria. + now := metav1.Now() + isPreserved := func(m *v1alpha1.Machine) bool { + return m.Status.CurrentStatus.PreserveExpiryTime != nil && m.Status.CurrentStatus.PreserveExpiryTime.Time.After(now.Time) + } + isPreservedI := isPreserved(s[i]) + isPreservedJ := isPreserved(s[j]) + if isPreservedI != isPreservedJ { + return !isPreservedI // if s[i] preserved, it is "greater" and should not be deleted first, therefore, "less" is false, and vice versa + } + // Default priority for machine objects machineIPriority := 3 machineJPriority := 3 diff --git a/pkg/controller/controller_utils_test.go b/pkg/controller/controller_utils_test.go index 83e40a656..746f4189b 100644 --- a/pkg/controller/controller_utils_test.go +++ b/pkg/controller/controller_utils_test.go @@ -8,6 +8,7 @@ import ( "context" "sort" "strconv" + "time" "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" @@ -206,6 +207,78 @@ var _ = Describe("#controllerUtils", func() { sortedMachinesInOrderOfCreationTimeStamp[2].DeepCopy(), } + sortedPreservedAndUnpreservedMachines := []*machinev1.Machine{ + newMachine( + &machinev1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: machinev1.MachineSpec{ + Class: machinev1.ClassSpec{ + Kind: AWSMachineClass, + Name: TestMachineClass, + }, + }, + }, + &machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + }, + }, + nil, + nil, + nil, + ), + newMachine( + &machinev1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: machinev1.MachineSpec{ + Class: machinev1.ClassSpec{ + Kind: AWSMachineClass, + Name: TestMachineClass, + }, + }, + }, + &machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + PreserveExpiryTime: &metav1.Time{ + Time: time.Now().Add(10 * time.Minute), + }, + }, + }, + nil, + nil, + nil, + ), + newMachine( + &machinev1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: machinev1.MachineSpec{ + Class: machinev1.ClassSpec{ + Kind: AWSMachineClass, + Name: TestMachineClass, + }, + }, + }, + &machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineRunning, + PreserveExpiryTime: &metav1.Time{ + Time: time.Now().Add(10 * time.Minute), + }, + }, + }, + nil, + nil, + nil, + ), + } + + unsortedPreservedAndUnpreservedMachines := []*machinev1.Machine{ + sortedPreservedAndUnpreservedMachines[2].DeepCopy(), + sortedPreservedAndUnpreservedMachines[0].DeepCopy(), + sortedPreservedAndUnpreservedMachines[1].DeepCopy(), + } + DescribeTable("###sort", func(data *data) { sort.Sort(ActiveMachines(data.inputMachines)) @@ -224,6 +297,10 @@ var _ = Describe("#controllerUtils", func() { inputMachines: unsortedMachinesInOrderOfCreationTimeStamp, outputMachines: sortedMachinesInOrderOfCreationTimeStamp, }), + Entry("sort on preserved and unpreserved", &data{ + inputMachines: unsortedPreservedAndUnpreservedMachines, + outputMachines: sortedPreservedAndUnpreservedMachines, + }), ) }) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 56e3e8914..f009512b9 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -27,7 +27,6 @@ import ( "errors" "fmt" "reflect" - "slices" "sort" "sync" "time" @@ -344,12 +343,10 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 if machineutils.IsMachineTriggeredForDeletion(m) { staleMachines = append(staleMachines, m) } else if machineutils.IsMachineFailed(m) { - // if machine is preserved or in the process of being preserved, the machine should be considered an active machine and not be added to stale machines - preserve := machineutils.IsFailedMachineCandidateForPreservation(m) - if preserve { - activeMachines = append(activeMachines, m) - } else { + if shouldFailedMachineBeTerminated(m) { staleMachines = append(staleMachines, m) + } else { + activeMachines = append(activeMachines, m) } } else if machineutils.IsMachineActive(m) { activeMachines = append(activeMachines, m) @@ -699,27 +696,10 @@ func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1al // < scheduled, and pending < running. This ensures that we delete machines // in the earlier stages whenever possible. sort.Sort(ActiveMachines(filteredMachines)) - // preserved machines are de-prioritised for deletion - // At all times, replica count will be upheld, even if it requires the deletion of a preserved machine - filteredMachines = prioritisePreservedMachines(filteredMachines) } return filteredMachines[:diff] } -// prioritisePreservedMachines moves preserved machines to the end of the slice -func prioritisePreservedMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machine { - preservedMachines := make([]*v1alpha1.Machine, 0, len(machines)) - otherMachines := make([]*v1alpha1.Machine, 0, len(machines)) - for _, mc := range machines { - if mc.Status.CurrentStatus.PreserveExpiryTime != nil && mc.Status.CurrentStatus.PreserveExpiryTime.After(metav1.Now().Time) { - preservedMachines = append(preservedMachines, mc) - } else { - otherMachines = append(otherMachines, mc) - } - } - return slices.Concat(otherMachines, preservedMachines) -} - func getMachineKeys(machines []*v1alpha1.Machine) []string { machineKeys := make([]string, 0, len(machines)) for _, mc := range machines { @@ -940,6 +920,31 @@ func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client. return machine, retryErr } +// shouldFailedMachineBeTerminated checks if the failed machine is already preserved, in the process of being preserved +// or if it is a candidate for auto-preservation. If none of these conditions are met, it returns true indicating +// that the failed machine should be terminated. +func shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { + // if preserve expiry time is set and is in the future, machine is already preserved + if machine.Status.CurrentStatus.PreserveExpiryTime != nil { + if machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return false + } + klog.V(3).Infof("Preservation of failed machine %q has timed out at %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) + return true + } + // if the machine preservation is not complete yet even though the machine is annotated, prevent termination + // so that preservation can complete + switch machine.Annotations[machineutils.PreserveMachineAnnotationKey] { + case machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet + return false + case machineutils.PreserveMachineAnnotationValueFalse: + return true + default: + return true + } +} + // triggerAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation // to trigger preservation of the machines, by the machine controller, up to the limit defined in the // MachineSet's AutoPreserveFailedMachineMax field. @@ -951,20 +956,22 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context } for _, m := range machines { if machineutils.IsMachineFailed(m) { - // check if machine is annotated with preserve=false, if yes, do not consider for preservation - if m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValueFalse { + // check if machine is already annotated for preservation, if yes, skip. Machine controller will take care of the rest. + if machineutils.AllowedPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { continue } - if autoPreservationCapacityRemaining > 0 { - klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - err := c.annotateMachineForAutoPreservation(ctx, m) - if err != nil { - klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) - // since annotateMachineForAutoPreservation uses retries internally, we can continue with other machines - continue - } - autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 + if autoPreservationCapacityRemaining == 0 { + return + } + + klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) + err := c.annotateMachineForAutoPreservation(ctx, m) + if err != nil { + klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) + // since annotateForAutoPreservation uses retries internally, we can continue with other machines + continue } + autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 } } } diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index b87cb90ea..b8da6afc3 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -2002,4 +2002,80 @@ var _ = Describe("machineset", func() { }), ) }) + Describe("#shouldFailedMachineBeTerminated", func() { + + type setup struct { + preserveExpiryTime *metav1.Time + annotationValue string + } + type expect struct { + result bool + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("shouldFailedMachineBeTerminated test cases", func(tc testCase) { + machine := machinev1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + Namespace: "default", + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: tc.setup.annotationValue, + }, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + PreserveExpiryTime: tc.setup.preserveExpiryTime, + }, + }, + } + result := shouldFailedMachineBeTerminated(&machine) + Expect(result).To(Equal(tc.expect.result)) + }, + Entry("should return false if preserve expiry time is in the future", testCase{ + setup: setup{ + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, + annotationValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: false, + }, + }), + Entry("should return true if machine is annotated with preserve=false", testCase{ + setup: setup{ + annotationValue: machineutils.PreserveMachineAnnotationValueFalse, + }, + expect: expect{ + result: true, + }, + }), + Entry("should return false if machine is annotated with preserve=now", testCase{ + setup: setup{ + annotationValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: false, + }, + }), + Entry("should return false if machine is annotated with preserve=when-failed", testCase{ + setup: setup{ + annotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + }, + expect: expect{ + result: false, + }, + }), + Entry("should return true if preservation has timed out", testCase{ + setup: setup{ + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Second)}, + annotationValue: machineutils.PreserveMachineAnnotationValueNow, + }, + expect: expect{ + result: true, + }, + }), + ) + }) }) diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 504fd9823..7626749a0 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -78,7 +78,7 @@ func NewMCServer() *MCServer { MachineSafetyOrphanVMsPeriod: metav1.Duration{Duration: 15 * time.Minute}, MachineSafetyAPIServerStatusCheckPeriod: metav1.Duration{Duration: 1 * time.Minute}, MachineSafetyAPIServerStatusCheckTimeout: metav1.Duration{Duration: 30 * time.Second}, - MachinePreserveTimeout: metav1.Duration{Duration: 3 * time.Hour}, + MachinePreserveTimeout: metav1.Duration{Duration: 72 * time.Hour}, }, }, } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index eda2a0d7d..8464e3a0b 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -762,12 +762,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if err != nil { return } - // either annotation has been deleted, set to empty or no preserve annotation exists. - // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. - if preserveValue == "" { - err = c.stopMachinePreservationIfPreserved(ctx, machine, true) - return - } // if preserve value differs from machine's preserve value, overwrite the value in the machine clone := machine.DeepCopy() if machine.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { @@ -776,6 +770,12 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } } + // either annotation has been deleted, set to empty or no preserve annotation exists. + // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. + if preserveValue == "" { + err = c.stopMachinePreservationIfPreserved(ctx, machine, true) + return + } if !machineutils.AllowedPreserveAnnotationValues.Has(preserveValue) { klog.Warningf("Preserve annotation value %q on machine %q is invalid", preserveValue, machine.Name) return @@ -798,10 +798,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // In this case, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. // If the machine fails again, since preserve annotation is present, it will be preserved again. - // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, so that - // CA does not scale down the node due to under-utilization immediately after recovery. - // This allows pods to get scheduled onto the recovered node - if machine.Labels[v1alpha1.NodeLabelKey] != "" { var node *corev1.Node node, err = c.nodeLister.Get(machine.Labels[v1alpha1.NodeLabelKey]) @@ -809,6 +805,9 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a klog.Errorf("error getting node %q for machine %q: %v", machine.Labels[v1alpha1.NodeLabelKey], machine.Name, err) return } + // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, so that + // CA does not scale down the node due to under-utilization immediately after recovery. + // This allows pods to get scheduled onto the recovered node _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { return @@ -818,25 +817,21 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if err != nil { return } - // If the machine is running and has a backing node, uncordon the node if cordoned - // this is to handle the scenario where a preserved machine recovers from Failed to Running - // in which case, pods should be allowed to be scheduled onto the node - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { - err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) - } + } } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow { err = c.preserveMachine(ctx, clone, preserveValue) if err != nil { return } - // If the machine is running and has a backing node, uncordon the node if cordoned - // this is to handle the scenario where a preserved machine recovers from Failed to Running - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { - err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) - } - // since the preserve value is 'now', machine preservation need not be stopped. - return + } + // At this point, the machine is annotated either with preserve=now or preserve=when-failed or preserve=auto-preserved, + // and machine preservation has been stopped if applicable. + // If the machine is running and has a backing node, uncordon the node if cordoned. + // This is to handle the scenario where a preserved machine recovers from Failed to Running + // in which case, pods should be allowed to be scheduled onto the node + if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { + err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) } return } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 4a521e193..8cf024b9e 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2373,7 +2373,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } if nodeName == "" { // Machine has no backing node, preservation is complete - klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil } // Machine has a backing node @@ -2395,7 +2395,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } var drainErr error - if c.shouldPreservedNodeBeDrained(updatedMachine, existingNodePreservedCondition) { + if shouldPreservedNodeBeDrained(existingNodePreservedCondition, updatedMachine.Status.CurrentStatus.Phase) { // Step 3: If machine is in Failed Phase, drain the backing node drainErr = c.drainPreservedNode(ctx, machine) } @@ -2411,7 +2411,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } } - klog.V(2).Infof("Machine %s preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) return nil } @@ -2427,6 +2427,18 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac // Machine has a backing node node, err := c.nodeLister.Get(nodeName) if err != nil { + // if node is not found and error is simply returned, then preservation will never be stopped on machine + // therefore, this error is handled specifically + if apierrors.IsNotFound(err) { + // Node not found, proceed to clear preserveExpiryTime on machine + klog.Warningf("Node %q of machine %q not found. Proceeding to clear preserve expiry time on machine.", nodeName, machine.Name) + err := c.clearMachinePreserveExpiryTime(ctx, machine) + if err != nil { + return err + } + klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + return nil + } klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) return err } @@ -2572,8 +2584,8 @@ func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preser } // shouldPreservedNodeBeDrained returns true if the machine's backing node must be drained, else false -func (c *controller) shouldPreservedNodeBeDrained(machine *v1alpha1.Machine, existingCondition *v1.NodeCondition) bool { - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed { +func shouldPreservedNodeBeDrained(existingCondition *v1.NodeCondition, machinePhase v1alpha1.MachinePhase) bool { + if machinePhase == v1alpha1.MachineFailed { if existingCondition == nil { return true } diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index c3c0e918e..97430dadf 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4288,7 +4288,7 @@ var _ = Describe("machine_util", func() { Expect(getErr).To(BeNil()) Expect(updatedMachine.Status.CurrentStatus.PreserveExpiryTime.IsZero()).To(BeTrue()) - if machine.Labels[machinev1.NodeLabelKey] == "" || machine.Labels[machinev1.NodeLabelKey] == "err-backing-node" { + if machine.Labels[machinev1.NodeLabelKey] == "" || machine.Labels[machinev1.NodeLabelKey] == "no-backing-node" { return } updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) @@ -4298,7 +4298,6 @@ var _ = Describe("machine_util", func() { } else { Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("true")) } - updatedNodeCondition := nodeops.GetCondition(updatedNode, machinev1.NodePreserved) Expect(updatedNodeCondition).ToNot(BeNil()) Expect(updatedNodeCondition.Status).To(Equal(corev1.ConditionFalse)) @@ -4327,7 +4326,7 @@ var _ = Describe("machine_util", func() { nodeName: "no-backing-node", }, expect: expect{ - err: fmt.Errorf("node \"no-backing-node\" not found"), + err: nil, }, }), Entry("when stopping preservation on a preserved machine, but retaining CA annotation", &testCase{ @@ -4507,4 +4506,54 @@ var _ = Describe("machine_util", func() { }), ) }) + Describe("#shouldPreservedNodeBeDrained", func() { + type setup struct { + machinePhase machinev1.MachinePhase + existingCondition *corev1.NodeCondition + } + type expect struct { + shouldDrain bool + } + type testCase struct { + setup setup + expect expect + } + + DescribeTable("##shouldPreservedNodeBeDrained behaviour scenarios", + func(tc *testCase) { + shouldDrain := shouldPreservedNodeBeDrained(tc.setup.existingCondition, tc.setup.machinePhase) + Expect(shouldDrain).To(Equal(tc.expect.shouldDrain)) + }, + Entry("should return false when machine is Running", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineRunning, + }, + expect: expect{ + shouldDrain: false, + }, + }), + Entry("should return true when machine is Failed and no existing condition", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + }, + expect: expect{ + shouldDrain: true, + }, + }), + Entry("should return true when machine is Failed and existing condition message is PreservedNodeDrainUnsuccessful", &testCase{ + setup: setup{ + machinePhase: machinev1.MachineFailed, + existingCondition: &corev1.NodeCondition{ + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: machinev1.PreservedNodeDrainUnsuccessful, + }, + }, + expect: expect{ + shouldDrain: true, + }, + }), + ) + }) }) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 8b233de41..7a65b1640 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -9,7 +9,6 @@ import ( "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/klog/v2" "time" ) @@ -104,7 +103,7 @@ const ( ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation -var AllowedPreserveAnnotationValues = sets.New[string](PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) +var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration @@ -152,26 +151,3 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { return newAnnotations[PreserveMachineAnnotationKey] != oldAnnotations[PreserveMachineAnnotationKey] } - -// IsFailedMachineCandidateForPreservation checks if the failed machine is already preserved, in the process of being preserved -// or if it is a candidate for auto-preservation -func IsFailedMachineCandidateForPreservation(machine *v1alpha1.Machine) bool { - // if preserve expiry time is set and is in the future, machine is already preserved - if machine.Status.CurrentStatus.PreserveExpiryTime != nil { - if machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { - klog.V(3).Infof("Failed machine %q is preserved until %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return true - } - klog.V(3).Infof("Preservation of failed machine %q has timed out at %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return false - } - // if the machine preservation is not complete yet even though the machine is annotated, consider it as a candidate for preservation - switch machine.Annotations[PreserveMachineAnnotationKey] { - case PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet - return true - case PreserveMachineAnnotationValueFalse: - return false - default: - return false - } -} diff --git a/pkg/util/provider/machineutils/utils_test.go b/pkg/util/provider/machineutils/utils_test.go index eed5019a0..7efa3c740 100644 --- a/pkg/util/provider/machineutils/utils_test.go +++ b/pkg/util/provider/machineutils/utils_test.go @@ -2,13 +2,10 @@ package machineutils import ( "flag" - machinev1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" "testing" - "time" ) func TestMachineUtilsSuite(t *testing.T) { @@ -28,82 +25,6 @@ var _ = BeforeSuite(func() { }) var _ = Describe("utils.go", func() { - Describe("#isFailedMachineCandidateForPreservation", func() { - - type setup struct { - preserveExpiryTime *metav1.Time - annotationValue string - } - type expect struct { - result bool - } - type testCase struct { - setup setup - expect expect - } - DescribeTable("isFailedMachineCandidateForPreservation test cases", func(tc testCase) { - machine := machinev1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine", - Namespace: "default", - Annotations: map[string]string{ - PreserveMachineAnnotationKey: tc.setup.annotationValue, - }, - }, - Status: machinev1.MachineStatus{ - CurrentStatus: machinev1.CurrentStatus{ - Phase: machinev1.MachineFailed, - PreserveExpiryTime: tc.setup.preserveExpiryTime, - }, - }, - } - result := IsFailedMachineCandidateForPreservation(&machine) - Expect(result).To(Equal(tc.expect.result)) - }, - Entry("should return true if preserve expiry time is in the future", testCase{ - setup: setup{ - preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, - annotationValue: PreserveMachineAnnotationValueNow, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return false if machine is annotated with preserve=false", testCase{ - setup: setup{ - annotationValue: PreserveMachineAnnotationValueFalse, - }, - expect: expect{ - result: false, - }, - }), - Entry("should return true if machine is annotated with preserve=now", testCase{ - setup: setup{ - annotationValue: PreserveMachineAnnotationValueNow, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return true if machine is annotated with preserve=when-failed", testCase{ - setup: setup{ - annotationValue: PreserveMachineAnnotationValueWhenFailed, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return false if preservation has timed out", testCase{ - setup: setup{ - preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Second)}, - annotationValue: PreserveMachineAnnotationValueNow, - }, - expect: expect{ - result: false, - }, - }), - ) - }) Describe("#PreserveAnnotationsChanged", func() { type setup struct { oldAnnotations map[string]string From 988018531849fbeb9397da8e7a1f3f07cdb670c7 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 23 Jan 2026 10:19:12 +0530 Subject: [PATCH 59/79] Address review comments - part 12: - Reuse function to write annotation on machine - Minor refactoring --- pkg/controller/controller_utils.go | 2 +- pkg/controller/deployment_util.go | 2 +- pkg/controller/machineset.go | 108 +++++++++--------- pkg/controller/machineset_util.go | 6 +- .../provider/machinecontroller/machine.go | 26 +++-- .../machinecontroller/machine_util.go | 99 ++++++++-------- pkg/util/provider/machineutils/utils.go | 56 +++++++++ 7 files changed, 181 insertions(+), 118 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 4ceffb004..acd4df5c2 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -738,7 +738,7 @@ func (s ActiveMachines) Less(i, j int) bool { // If both are preserved or both are not preserved, we move to the next criteria. now := metav1.Now() isPreserved := func(m *v1alpha1.Machine) bool { - return m.Status.CurrentStatus.PreserveExpiryTime != nil && m.Status.CurrentStatus.PreserveExpiryTime.Time.After(now.Time) + return m.Status.CurrentStatus.PreserveExpiryTime != nil && m.Status.CurrentStatus.PreserveExpiryTime.After(now.Time) } isPreservedI := isPreserved(s[i]) isPreservedJ := isPreserved(s[j]) diff --git a/pkg/controller/deployment_util.go b/pkg/controller/deployment_util.go index 690b4feb1..002fd2a7f 100644 --- a/pkg/controller/deployment_util.go +++ b/pkg/controller/deployment_util.go @@ -969,7 +969,7 @@ func LabelMachinesWithHash(ctx context.Context, machineList *v1alpha1.MachineLis } // Only label the machine that doesn't already have the new hash if machine.Labels[v1alpha1.DefaultMachineDeploymentUniqueLabelKey] != hash { - _, err := UpdateMachineWithRetries(ctx, c.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, + _, err := machineutils.UpdateMachineWithRetries(ctx, c.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, func(machineToUpdate *v1alpha1.Machine) error { // Precondition: the machine doesn't contain the new hash in its label. if machineToUpdate.Labels[v1alpha1.DefaultMachineDeploymentUniqueLabelKey] == hash { diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index f009512b9..1e5f1d4bc 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -35,19 +35,15 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - errorsutil "k8s.io/apimachinery/pkg/util/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "k8s.io/utils/integer" "github.com/gardener/machine-controller-manager/pkg/apis/machine" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" "github.com/gardener/machine-controller-manager/pkg/apis/machine/validation" - v1alpha1client "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1" - v1alpha1listers "github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alpha1" "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" ) @@ -887,38 +883,39 @@ func isMachineStatusEqual(s1, s2 v1alpha1.MachineStatus) bool { return apiequality.Semantic.DeepEqual(s1Copy.LastOperation, s2Copy.LastOperation) && apiequality.Semantic.DeepEqual(s1Copy.CurrentStatus, s2Copy.CurrentStatus) } -// see https://github.com/kubernetes/kubernetes/issues/21479 -type updateMachineFunc func(machine *v1alpha1.Machine) error - -// UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored. -// The returned bool value can be used to tell if the machine is actually updated. -func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) { - var machine *v1alpha1.Machine - - retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - var err error - machine, err = machineLister.Machines(namespace).Get(name) - if err != nil { - return err - } - machine = machine.DeepCopy() - // Apply the update, then attempt to push it to the apiserver. - if applyErr := applyUpdate(machine); applyErr != nil { - return applyErr - } - machine, err = machineClient.Update(ctx, machine, metav1.UpdateOptions{}) - return err - }) - - // Ignore the precondition violated error, this machine is already updated - // with the desired label. - if retryErr == errorsutil.ErrPreconditionViolated { - klog.V(4).Infof("Machine %s precondition doesn't hold, skip updating it.", name) - retryErr = nil - } - - return machine, retryErr -} +// +//// see https://github.com/kubernetes/kubernetes/issues/21479 +//type updateMachineFunc func(machine *v1alpha1.Machine) error +// +//// UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored. +//// The returned bool value can be used to tell if the machine is actually updated. +//func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) { +// var machine *v1alpha1.Machine +// +// retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { +// var err error +// machine, err = machineLister.Machines(namespace).Get(name) +// if err != nil { +// return err +// } +// machine = machine.DeepCopy() +// // Apply the update, then attempt to push it to the apiserver. +// if applyErr := applyUpdate(machine); applyErr != nil { +// return applyErr +// } +// machine, err = machineClient.Update(ctx, machine, metav1.UpdateOptions{}) +// return err +// }) +// +// // Ignore the precondition violated error, this machine is already updated +// // with the desired label. +// if retryErr == errorsutil.ErrPreconditionViolated { +// klog.V(4).Infof("Machine %s precondition doesn't hold, skip updating it.", name) +// retryErr = nil +// } +// +// return machine, retryErr +//} // shouldFailedMachineBeTerminated checks if the failed machine is already preserved, in the process of being preserved // or if it is a candidate for auto-preservation. If none of these conditions are met, it returns true indicating @@ -963,32 +960,31 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context if autoPreservationCapacityRemaining == 0 { return } - klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - err := c.annotateMachineForAutoPreservation(ctx, m) + _, err := machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m, machineutils.PreserveMachineAnnotationValuePreservedByMCM) if err != nil { klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) - // since annotateForAutoPreservation uses retries internally, we can continue with other machines + // since AnnotateMachineWithPreserveValueWithRetries uses retries internally, we can continue with other machines continue } - autoPreservationCapacityRemaining = autoPreservationCapacityRemaining - 1 + autoPreservationCapacityRemaining-- } } } -// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger -// preservation of the machine by the machine controller. -func (c *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { - _, err := UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } - clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM - return nil - }) - if err != nil { - return err - } - klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) - return nil -} +//// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger +//// preservation of the machine by the machine controller. +//func (c *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { +// _, err := UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { +// if clone.Annotations == nil { +// clone.Annotations = make(map[string]string) +// } +// clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM +// return nil +// }) +// if err != nil { +// return err +// } +// klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) +// return nil +//} diff --git a/pkg/controller/machineset_util.go b/pkg/controller/machineset_util.go index 78ce9d803..aa5e5b5cd 100644 --- a/pkg/controller/machineset_util.go +++ b/pkg/controller/machineset_util.go @@ -95,7 +95,7 @@ func (c *controller) syncMachinesNodeTemplates(ctx context.Context, machineList nodeTemplateChanged := copyMachineSetNodeTemplatesToMachines(machineSet, machine) // Only sync the machine that doesn't already have the latest nodeTemplate. if nodeTemplateChanged { - _, err := UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, + _, err := machineutils.UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, func(_ *v1alpha1.Machine) error { return nil }) @@ -118,7 +118,7 @@ func (c *controller) syncMachinesClassKind(ctx context.Context, machineList []*v classKindChanged := copyMachineSetClassKindToMachines(machineSet, machine) // Only sync the machine that doesn't already have the matching classKind. if classKindChanged { - _, err := UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, + _, err := machineutils.UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, func(_ *v1alpha1.Machine) error { return nil }) @@ -161,7 +161,7 @@ func (c *controller) syncMachinesConfig(ctx context.Context, machineList []*v1al configChanged := copyMachineSetConfigToMachines(machineSet, machine) // Only sync the machine that doesn't already have the latest config. if configChanged { - _, err := UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, + _, err := machineutils.UpdateMachineWithRetries(ctx, controlClient.Machines(machine.Namespace), machineLister, machine.Namespace, machine.Name, func(_ *v1alpha1.Machine) error { return nil }) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 8464e3a0b..e30cf68ae 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -63,9 +63,12 @@ func (c *controller) updateMachine(oldObj, newObj any) { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } - // this check is required to enqueue a previously failed preserved machine when its phase changes to Running on recovery + // this check is required to enqueue a machine, annotated with the preservation key, whose phase has changed. + // if annotated with "when-failed", the machine should be preserved on Failure + // if annotated with "now", the machine should be drained on Failure + // if machine phase changes from Failed to Running, machine preservation should be stopped _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey] - if exists && oldMachine.Status.CurrentStatus.Phase == v1alpha1.MachineFailed && newMachine.Status.CurrentStatus.Phase != v1alpha1.MachineFailed { + if exists && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { c.enqueueMachine(newObj, "handling preserved machine phase update") } @@ -764,20 +767,21 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } // if preserve value differs from machine's preserve value, overwrite the value in the machine clone := machine.DeepCopy() - if machine.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { - clone, err = c.writePreserveAnnotationValueOnMachine(ctx, clone, preserveValue) + if clone.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { + clone, err = machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(clone.Namespace), c.machineLister, clone, preserveValue) if err != nil { + klog.Errorf("error annotating machine %q with preserve annotation value %q: %v", clone.Name, preserveValue, err) return } } // either annotation has been deleted, set to empty or no preserve annotation exists. // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. if preserveValue == "" { - err = c.stopMachinePreservationIfPreserved(ctx, machine, true) + err = c.stopMachinePreservationIfPreserved(ctx, clone, true) return } if !machineutils.AllowedPreserveAnnotationValues.Has(preserveValue) { - klog.Warningf("Preserve annotation value %q on machine %q is invalid", preserveValue, machine.Name) + klog.Warningf("Preserve annotation value %q on machine %q is invalid", preserveValue, clone.Name) return } // if preserve=false or if preservation has expired, stop preservation @@ -798,11 +802,11 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // In this case, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. // If the machine fails again, since preserve annotation is present, it will be preserved again. - if machine.Labels[v1alpha1.NodeLabelKey] != "" { + if clone.Labels[v1alpha1.NodeLabelKey] != "" { var node *corev1.Node - node, err = c.nodeLister.Get(machine.Labels[v1alpha1.NodeLabelKey]) + node, err = c.nodeLister.Get(clone.Labels[v1alpha1.NodeLabelKey]) if err != nil { - klog.Errorf("error getting node %q for machine %q: %v", machine.Labels[v1alpha1.NodeLabelKey], machine.Name, err) + klog.Errorf("error getting node %q for machine %q: %v", clone.Labels[v1alpha1.NodeLabelKey], clone.Name, err) return } // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, so that @@ -830,8 +834,8 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // If the machine is running and has a backing node, uncordon the node if cordoned. // This is to handle the scenario where a preserved machine recovers from Failed to Running // in which case, pods should be allowed to be scheduled onto the node - if machine.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && machine.Labels[v1alpha1.NodeLabelKey] != "" { - err = c.uncordonNodeIfCordoned(ctx, machine.Labels[v1alpha1.NodeLabelKey]) + if clone.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && clone.Labels[v1alpha1.NodeLabelKey] != "" { + err = c.uncordonNodeIfCordoned(ctx, clone.Labels[v1alpha1.NodeLabelKey]) } return } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 8cf024b9e..24a7b5fdb 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2362,24 +2362,24 @@ Utility Functions for Machine Preservation func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { var err error nodeName := machine.Labels[v1alpha1.NodeLabelKey] - updatedMachine := machine.DeepCopy() + machineClone := machine.DeepCopy() if machine.Status.CurrentStatus.PreserveExpiryTime == nil { - klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) + klog.V(4).Infof("Starting preservation flow for machine %q.", machineClone.Name) // Step 1: Add preserveExpiryTime to machine status - updatedMachine, err = c.setPreserveExpiryTimeOnMachine(ctx, updatedMachine) + machineClone, err = c.setPreserveExpiryTimeOnMachine(ctx, machineClone) if err != nil { return err } } if nodeName == "" { // Machine has no backing node, preservation is complete - klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machineClone.Name, machineClone.Status.CurrentStatus.PreserveExpiryTime) return nil } // Machine has a backing node node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machineClone.Name, err) return err } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) @@ -2387,7 +2387,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if existingNodePreservedCondition != nil && existingNodePreservedCondition.Status == v1.ConditionTrue { return nil } - // Preservation incomplete - either the flow is just starting or in progress + // Preservation incomplete - either the flow is being run for the first time, or previous attempt failed midway // Step 2: Add annotations to prevent scale down of node by CA updatedNode, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) @@ -2395,11 +2395,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } var drainErr error - if shouldPreservedNodeBeDrained(existingNodePreservedCondition, updatedMachine.Status.CurrentStatus.Phase) { + if shouldPreservedNodeBeDrained(existingNodePreservedCondition, machineClone.Status.CurrentStatus.Phase) { // Step 3: If machine is in Failed Phase, drain the backing node drainErr = c.drainPreservedNode(ctx, machine) } - newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus.Phase, preserveValue, drainErr, existingNodePreservedCondition) + newCond, needsUpdate := computeNewNodePreservedCondition(machineClone.Status.CurrentStatus.Phase, preserveValue, drainErr, existingNodePreservedCondition) if needsUpdate { // Step 4: Update NodePreserved Condition on Node, with drain status _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) @@ -2411,7 +2411,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } } - klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, updatedMachine.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, machineClone.Status.CurrentStatus.PreserveExpiryTime) return nil } @@ -2423,49 +2423,56 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac return nil } nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName != "" { - // Machine has a backing node - node, err := c.nodeLister.Get(nodeName) - if err != nil { - // if node is not found and error is simply returned, then preservation will never be stopped on machine - // therefore, this error is handled specifically - if apierrors.IsNotFound(err) { - // Node not found, proceed to clear preserveExpiryTime on machine - klog.Warningf("Node %q of machine %q not found. Proceeding to clear preserve expiry time on machine.", nodeName, machine.Name) - err := c.clearMachinePreserveExpiryTime(ctx, machine) - if err != nil { - return err - } - klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) - return nil - } - klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return err - } - // prepare NodeCondition to set preservation as stopped - preservedConditionFalse := v1.NodeCondition{ - Type: v1alpha1.NodePreserved, - Status: v1.ConditionFalse, - LastTransitionTime: metav1.Now(), - Reason: v1alpha1.PreservationStopped, - } - // Step 1: change node condition to reflect that preservation has stopped - updatedNode, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, node.Name, preservedConditionFalse) + if nodeName == "" { + err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return err } - // Step 2: remove CA scale down disabled annotation from node - // only remove if removeCAScaleDownDisabledAnnotation is not "when-failed" since in that case, - // scale down should remain disabled even after preservation is stopped - if removeCAScaleDownDisabledAnnotation { - err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) + klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + return nil + } + + // Machine has a backing node + node, err := c.nodeLister.Get(nodeName) + if err != nil { + // if node is not found and error is simply returned, then preservation will never be stopped on machine + // therefore, this error is handled specifically + if apierrors.IsNotFound(err) { + // Node not found, proceed to clear preserveExpiryTime on machine + klog.Warningf("Node %q of machine %q not found. Proceeding to clear preserve expiry time on machine.", nodeName, machine.Name) + err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return err } + klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + return nil + } + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + return err + } + // prepare NodeCondition to set preservation as stopped + preservedConditionFalse := v1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: v1.ConditionFalse, + LastTransitionTime: metav1.Now(), + Reason: v1alpha1.PreservationStopped, + } + // Step 1: change node condition to reflect that preservation has stopped + updatedNode, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, node.Name, preservedConditionFalse) + if err != nil { + return err + } + // Step 2: remove CA scale down disabled annotation from node + // only remove if removeCAScaleDownDisabledAnnotation is not "when-failed" since in that case, + // scale down should remain disabled even after preservation is stopped + if removeCAScaleDownDisabledAnnotation { + err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) + if err != nil { + return err } } // Step 3: update machine status to set preserve expiry time to nil - err := c.clearMachinePreserveExpiryTime(ctx, machine) + err = c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return err } @@ -2539,9 +2546,9 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string if !node.Spec.Unschedulable { return nil } - clonedNode := node.DeepCopy() - clonedNode.Spec.Unschedulable = false - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, clonedNode, metav1.UpdateOptions{}) + nodeClone := node.DeepCopy() + nodeClone.Spec.Unschedulable = false + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) return err } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 7a65b1640..fb7a1a843 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -6,9 +6,16 @@ package machineutils import ( + "context" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + v1alpha1client "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1" + v1alpha1listers "github.com/gardener/machine-controller-manager/pkg/client/listers/machine/v1alpha1" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + errorsutil "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" "time" ) @@ -151,3 +158,52 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { return newAnnotations[PreserveMachineAnnotationKey] != oldAnnotations[PreserveMachineAnnotationKey] } + +// AnnotateMachineWithPreserveValueWithRetries annotates the given machine with the preservation annotation value +func AnnotateMachineWithPreserveValueWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, m *v1alpha1.Machine, preserveValue string) (*v1alpha1.Machine, error) { + updatedMachine, err := UpdateMachineWithRetries(ctx, machineClient, machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { + if clone.Annotations == nil { + clone.Annotations = make(map[string]string) + } + clone.Annotations[PreserveMachineAnnotationKey] = preserveValue + return nil + }) + if err != nil { + return nil, err + } + klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, PreserveMachineAnnotationKey, preserveValue) + return updatedMachine, nil +} + +// see https://github.com/kubernetes/kubernetes/issues/21479 +type updateMachineFunc func(machine *v1alpha1.Machine) error + +// UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored. +// The returned bool value can be used to tell if the machine is actually updated. +func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) { + var machine *v1alpha1.Machine + + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + var err error + machine, err = machineLister.Machines(namespace).Get(name) + if err != nil { + return err + } + machine = machine.DeepCopy() + // Apply the update, then attempt to push it to the apiserver. + if applyErr := applyUpdate(machine); applyErr != nil { + return applyErr + } + machine, err = machineClient.Update(ctx, machine, metav1.UpdateOptions{}) + return err + }) + + // Ignore the precondition violated error, this machine is already updated + // with the desired label. + if retryErr == errorsutil.ErrPreconditionViolated { + klog.V(4).Infof("Machine %s precondition doesn't hold, skip updating it.", name) + retryErr = nil + } + + return machine, retryErr +} From a56c49c4e7ba52ea7341d1445475b936f64b02d8 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 23 Jan 2026 16:18:09 +0530 Subject: [PATCH 60/79] Handle edge cases: - Make changes to add auto-preserve-stopped on recovered, auto-preserved previously failed machines. - Change stopMachinePreservationIfPreserved to removeCA annotation when preserve=false on a recovered failed, preserved machine --- pkg/controller/deployment_machineset_util.go | 8 ++-- pkg/controller/machineset.go | 7 +-- .../provider/machinecontroller/machine.go | 48 ++++++++----------- .../machinecontroller/machine_util.go | 21 +++++++- pkg/util/provider/machineutils/utils.go | 6 ++- 5 files changed, 53 insertions(+), 37 deletions(-) diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index e4f301576..7d7fcc9f6 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -128,11 +128,9 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al } failedMachines = append(failedMachines, machineSummary) } - // Count machines which are auto-preserved by MCM - // we count based on number of machines annotated with PreserveMachineAnnotationValuePreservedByMCM - // this is because, the actual preservation of the machine may not have completed yet - // if triggered very recently, and hence we cannot rely on the Preserved Condition Reason - if machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + // Count number of failed machines annotated with PreserveMachineAnnotationValuePreservedByMCM + // Previously auto-preserved failed machines that have recovered to `Running` should count towards this + if machineutils.IsMachineFailed(machine) && machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { autoPreserveFailedMachineCount++ } } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 1e5f1d4bc..ca3577217 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -858,7 +858,7 @@ func (c *controller) updateMachineStatus( clone, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) if err != nil { // Keep retrying until update goes through - klog.V(3).Infof("Warning: Updated failed, retrying, error: %q", err) + klog.V(3).Infof("Warning: Update failed, retrying, error: %q", err) return c.updateMachineStatus(ctx, machine, lastOperation, currentStatus) } return clone, nil @@ -951,7 +951,7 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context // no capacity remaining, nothing to do return } - for _, m := range machines { + for index, m := range machines { if machineutils.IsMachineFailed(m) { // check if machine is already annotated for preservation, if yes, skip. Machine controller will take care of the rest. if machineutils.AllowedPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { @@ -961,12 +961,13 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context return } klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - _, err := machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + updatedMachine, err := machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m, machineutils.PreserveMachineAnnotationValuePreservedByMCM) if err != nil { klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) // since AnnotateMachineWithPreserveValueWithRetries uses retries internally, we can continue with other machines continue } + machines[index] = updatedMachine autoPreservationCapacityRemaining-- } } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index e30cf68ae..eb03328b1 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -774,7 +774,11 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } } - // either annotation has been deleted, set to empty or no preserve annotation exists. + if preserveValue == machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM { + // since preservation is stopped before adding the annotation, nothing more to be done here + return + } + // The annotation has either been deleted, set to empty or no preserve annotation exists. // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. if preserveValue == "" { err = c.stopMachinePreservationIfPreserved(ctx, clone, true) @@ -793,14 +797,10 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) } else { - // Here, if the preserve value is when-failed, the preserveExpiry is set, but the machine is not in Failed Phase, there are 2 scenarios that need to be handled: - // 1. The machine was initially annotated with preserve=now and has been preserved, but later the annotation was changed to when-failed. - // 2. The machine was initially annotated with preserve=when-failed, was preserved on failure and has recovered from Failed to Running. - // In both cases, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. - - // If the preserve value is auto-preserved, and the machine is Running, it would mean the machine has recovered from Failed phase to Running phase. - // In this case, we need to clear preserveExpiryTime and update Node condition if applicable. However, the CA annotation needs to be retained. - // If the machine fails again, since preserve annotation is present, it will be preserved again. + // Here, if the preserve value is when-failed or auto-preserved, but the machine is not in Failed Phase, + // we need to stop preservation if preserved, but retain the CA scale-down disabled annotation. + // This is done so that CA does not scale down the node due to under-utilization immediately after recovery, + // thus allowing pods to get scheduled onto the recovered node. if clone.Labels[v1alpha1.NodeLabelKey] != "" { var node *corev1.Node @@ -809,9 +809,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a klog.Errorf("error getting node %q for machine %q: %v", clone.Labels[v1alpha1.NodeLabelKey], clone.Name, err) return } - // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, so that - // CA does not scale down the node due to under-utilization immediately after recovery. - // This allows pods to get scheduled onto the recovered node + // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { return @@ -821,7 +819,17 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if err != nil { return } - + // To prevent erroneous re-preservation of a recovered, previously auto-preserved machine on future failures + // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), + // in addition to stopping preservation we also change annotation to auto-preserve-stopped. + // If the machine fails again, it may be preserved again based on the autoPreserveFailedMachineCount value at that time. + if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + clone, err = machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(clone.Namespace), c.machineLister, clone, machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM) + if err != nil { + klog.Errorf("error annotating machine %q with preserve annotation value %q: %v", clone.Name, machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM, err) + return + } + } } } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow { err = c.preserveMachine(ctx, clone, preserveValue) @@ -871,17 +879,3 @@ func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.M } return "", nil } - -// writePreserveAnnotationValueOnMachine syncs the effective preserve value on the machine objects -func (c *controller) writePreserveAnnotationValueOnMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (*v1alpha1.Machine, error) { - if machine.Annotations == nil { - machine.Annotations = make(map[string]string) - } - machine.Annotations[machineutils.PreserveMachineAnnotationKey] = preserveValue - updatedMachine, err := c.controlMachineClient.Machines(c.namespace).Update(ctx, machine, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error updating machine %q with preserve annotation %q: %v", machine.Name, preserveValue, err) - return machine, err - } - return updatedMachine, nil -} diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 24a7b5fdb..7ed93a657 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2419,10 +2419,29 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, removeCAScaleDownDisabledAnnotation bool) error { // removal of preserveExpiryTime is the last step of stopping preservation // therefore, if preserveExpiryTime is not set, machine is not preserved + nodeName := machine.Labels[v1alpha1.NodeLabelKey] if machine.Status.CurrentStatus.PreserveExpiryTime == nil { + if !removeCAScaleDownDisabledAnnotation || nodeName == "" { + return nil + } + // if preserveExpiryTime is nil, but removeCAScaleDownDisabledAnnotation is true, + // then we need to remove the annotation from node + node, err := c.nodeLister.Get(nodeName) + if err != nil { + // if node is not found, nothing to remove annotation from + if apierrors.IsNotFound(err) { + return nil + } + klog.Errorf("error trying to get node %q of machine %q: %v.", nodeName, machine.Name, err) + return err + } + // remove CA scale down disabled annotation from node + err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, node) + if err != nil { + return err + } return nil } - nodeName := machine.Labels[v1alpha1.NodeLabelKey] if nodeName == "" { err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index fb7a1a843..9352a3199 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -104,13 +104,17 @@ const ( // The AutoPreserveFailedMachineMax, set on the MCD, is enforced based on the number of machines annotated with this value. PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" + // PreserveMachineAnnotationValuePreserveStoppedByMCM is the annotation value used to indicate that + // the auto-preservation of a Machine was stopped. + PreserveMachineAnnotationValuePreserveStoppedByMCM = "auto-preserve-stopped" + //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that // a Machine should not be preserved any longer, even if the expiry timeout has not been reached PreserveMachineAnnotationValueFalse = "false" ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation -var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) +var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse, PreserveMachineAnnotationValuePreserveStoppedByMCM) // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration From 62d4348907d0ac88a474695162d4c6dbe8185f36 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 3 Feb 2026 12:09:42 +0530 Subject: [PATCH 61/79] Ensure reconcileClusterMachineSafetyAPIServer does not overwrite PreserveExpiryTime --- pkg/util/provider/machinecontroller/machine_safety.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_safety.go b/pkg/util/provider/machinecontroller/machine_safety.go index b8ad153e2..3785f07c1 100644 --- a/pkg/util/provider/machinecontroller/machine_safety.go +++ b/pkg/util/provider/machinecontroller/machine_safety.go @@ -76,9 +76,10 @@ func (c *controller) reconcileClusterMachineSafetyAPIServer(_ string) error { } machine.Status.CurrentStatus = v1alpha1.CurrentStatus{ - Phase: v1alpha1.MachineRunning, - TimeoutActive: false, - LastUpdateTime: metav1.Now(), + Phase: v1alpha1.MachineRunning, + TimeoutActive: false, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: machine.Status.CurrentStatus.PreserveExpiryTime, } machine.Status.LastOperation = v1alpha1.LastOperation{ Description: "Machine Health Timeout was reset due to APIServer being unreachable", From d3e2fa8a3c0d0dabc06db5de53495fc67f3edd60 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 10 Feb 2026 13:40:07 +0530 Subject: [PATCH 62/79] Remove PreserveMachineAnnotationValuePreserveStoppedByMCM annotation value --- pkg/apis/machine/v1alpha1/machine_types.go | 6 +++--- pkg/util/provider/machineutils/utils.go | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index 859a4df27..b55057874 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -249,13 +249,13 @@ const ( NodePreserved corev1.NodeConditionType = "Preserved" // PreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM - PreservedByMCM string = "PreservedByMCM" + PreservedByMCM string = "Preserved by MCM" // PreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user - PreservedByUser string = "PreservedByUser" + PreservedByUser string = "Preserved by user" // PreservationStopped is a node condition reason to indicate that a machine/node preservation has been stopped due to annotation update or timeout - PreservationStopped string = "PreservationStopped" + PreservationStopped string = "Preservation stopped" // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful PreservedNodeDrainSuccessful string = "Preserved node drained successfully" diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 9352a3199..eba2cce64 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -104,9 +104,9 @@ const ( // The AutoPreserveFailedMachineMax, set on the MCD, is enforced based on the number of machines annotated with this value. PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" - // PreserveMachineAnnotationValuePreserveStoppedByMCM is the annotation value used to indicate that - // the auto-preservation of a Machine was stopped. - PreserveMachineAnnotationValuePreserveStoppedByMCM = "auto-preserve-stopped" + //// PreserveMachineAnnotationValuePreserveStoppedByMCM is the annotation value used to indicate that + //// the auto-preservation of a Machine was stopped. + //PreserveMachineAnnotationValuePreserveStoppedByMCM = "auto-preserve-stopped" //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that // a Machine should not be preserved any longer, even if the expiry timeout has not been reached @@ -114,7 +114,7 @@ const ( ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation -var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse, PreserveMachineAnnotationValuePreserveStoppedByMCM) +var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration From 7b274b10f1635a15a36dae4299a26a990d4b54a5 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 10 Feb 2026 16:39:31 +0530 Subject: [PATCH 63/79] Add usage doc for preservation feature --- docs/usage/machine-preservation.md | 132 +++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 docs/usage/machine-preservation.md diff --git a/docs/usage/machine-preservation.md b/docs/usage/machine-preservation.md new file mode 100644 index 000000000..7bdef05fc --- /dev/null +++ b/docs/usage/machine-preservation.md @@ -0,0 +1,132 @@ +# Machine Preservation — Usage Guide + +This document explains how to **use machine preservation** to retain machines and their backing VMs. + +### What is preservation in MCM? + +A machine and its backing node can be preserved by an end-user/SRE/operator to retain machines and their backing VMs for debugging, analysis, or operational safety. + +A preserved machine/node has the following properties: +- In case Node is `Unhealthy` for duration longer than `machineHealthTimeout` and the machine moves to `Failed` state, the machine stays in `Failed` state until `machinePreserveTimeout` runs out, without getting terminated. +- When the machineset is scaled down, machines in the machineset marked for preservation are de-prioritized for deletion. +- If a machine is preserved in its `Running` phase, the MCM adds the CA scale-down-disabled annotation to prevent the CA from scaling down the machine in case of underutilization. +- If a machine is preserved and is in its `Failed` phase, MCM drains the backing node of all pods, but the daemonset pods remain on the node. + +> Note: If a user sets a deletion timestamp (by using tools such as kubectl), the machine and backing node will be deleted. Preservation will not prevent this. + +> Note: If the desired replica count for a machineset cannot be met without scaling down preserved machines, the required number of preserved machines will be scaled-down. + +### Changes in a machine/node object on preservation: + +- If the machine is preserved in `Running` phase: + - The `PreserveExpiryTime` is set in the machine's status to indicate when preservation will end. + - The CA scale-down-disabled annotation is added. + - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation was successful. + - If a machine has no backing node, only `PreserveExpiryTime` is set. +- If the machine is preserved and in `Failed` phase: + - The `PreserveExpiryTime` is set in the machine's status to indicate when preservation will end. + - The CA scale-down-disabled annotation is added. + - The backing node is drained of all pods but the daemonset pdos remain. + - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation was successful. + - If a machine has no backing node, only `PreserveExpiryTime` is set. + +### Changes in a machine/node object when preservation stops: +- If the machine is in `Running` phase: + - The `PreserveExpiryTime` is cleared. + - The CA scale-down-disabled annotation is removed. + - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation has stopped. + - If a machine has no backing node, only `PreserveExpiryTime` is cleared. +- If the machine is in `Failed` phase: + - The `PreserveExpiryTime` is cleared. + - The CA scale-down-disabled annotation is removed. + - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation has stopped. + - If a machine has no backing node, only `PreserveExpiryTime` is cleared. + - The machine is moved to `Terminating` phase by MCM. + +--- +### How can a machine be preserved? + +- The preservation feature offers two modes of preservation: + - Manual preservation by adding annotations + - Auto-preservation by MCM by specifying `AutoPreserveFailedMachineMax` for a workerpool. This value is distributed evenly across zones (MCD). +- For manual preservation, the end-user and operators must annotate the backing node of a machine. +- If there is no backing node, the machine object can be annotated. + +#### Configuration (Shoot Spec) + +Preservation is enabled and controlled per **worker pool**: + +```yaml +apiVersion: core.gardener.cloud/v1beta1 +kind: Shoot +... +spec: + workers: + - cri: + name: containerd + name: worker1 + machineControllerManager: + machinePreserveTimeout: 72h + autoPreserveFailedMachineMax: 1 +``` + +#### Configuration Semantics +- `AutoPreserveFailedMachineMax` : Maximum number of failed machines that can be auto-preserved concurrently in a worker pool. This value is distributed across machineDeployments (zones) in the worker pool. If the limit is reached, additional failed machines will not be preserved and will proceed to termination as usual. +- `machinePreserveTimeout` : Duration after which preserved machines are automatically released + +> Note: ⚠️ Changes to `machinePreserveTimeout` apply only to machine preservations after the change. + +### Preservation annotations + +annotation key: `node.machine.sapcloud.io/preserve` + +**Manual Annotation values:** + +| Annotation value | purpose | +| ---------------- | --------------------------------------------------------------------------- | +| when-failed | To be added when the machine/node needs to be preserved **only on failure** | +| now | To be added when the machine/node needs to be preserved **now** | +| false | To be added if a machine should not be auto-preserved by mcm on failure | + +**Auto-preservation Annotation values added by MCM:** + +| Annotation value | purpose | +| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| auto-preserve | Added by MCM to indicate that a machine has been **auto-preserved** on failure. This machine will be counted towards **AutoPreserveFailedMachineMax** | + +--- +### How to manually stop preservation before PreserveExpiryTime: + +To manually stop preservation, the preservation annotation must be deleted from the node object if the backing node exists. If it does not, the preservation annotation must be deleted from the machine object. + +--- + +### How to prevent a machine from being auto-preserved by MCM: + +To prevent a machine from being auto-preserved on moving to `Failed` phase, the node must be annotated with the value `false`. If the backing node does not exist, the machine must be annotated with the value `false`. If a currently preserved machine is annotated with `false`, the preservation will be stopped. + + +> Note: if a machine's backing node exists, the *node*'s annotation value is honoured. Therefore, if a backing node exists, but has no annotation value, the machine's annotation value is cleared even if explicitly annotated, and preservation is stopped. + +--- +### What happens when a machine recovers from failure and moves to `Running` during preservation? + +Depending on the annotation value - (`now/when-failed/auto-preserve`), the behaviour differs. This is to reflect the meaning behind the annotation value. + +1. `now`: on recovery from failure, machine preservation continues until `PreserveExpiryTime` +2. `when-failed`: on recovery from failure, machine preservation stops. This is because the annotation value clearly expresses that a machine must be preserved only when `Failed`. If the annotation is not explicitly changed, and the machine fails again, the machine is preserved again. +3. `auto-preserve`: since MCM performs auto-preservation of `Failed` machines only, on recovery, the machine preservation is stopped. + +In all the cases, when the machine moves to `Running` during preservation, the backing node is uncordoned to allow pods to be scheduled on it again. + +>Note: When a machine recovers to `Running` and preservation is stopped, CA's `scale-down-unneeded-time` comes into play. If the node's utilization is below the utilization threshold configured after `scale-down-unneeded-time`, CA will scale down the machine. + +--- +### Important Notes & Limitations +- Rolling updates: Preservation is ignored; Failed machines are replaced as usual. +- Shoot hibernation overrides preservation. +- Replica enforcement: Preserved machines count towards MachineDeployment replicas. +- Scale-down preference: Preserved machines are the last to be scaled down. +- Preservation status is visible via Node Conditions and Machine Status fields. +- Timeout changes: Do not affect existing preserved machines. +- Manual extension: Operators may edit PreserveExpiryTime directly if required. From a6743ed8ca4d02bb28081ff7ef014a34a2a7d70d Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 10 Feb 2026 16:46:09 +0530 Subject: [PATCH 64/79] Make changes to simplify design: * remove stop annotation value * remove CA scale-down annotation when preservation stops * change preservation annotation handling semantics for machine and node * remove auto-preserve-stopped annotation value * Add preserveExpiryTime to NodeCondition.Message * modify test cases --- docs/usage/machine-preservation.md | 30 ++-- pkg/controller/machineset.go | 63 ++----- .../provider/machinecontroller/machine.go | 118 +++++-------- .../machinecontroller/machine_test.go | 161 ++++++++---------- .../machinecontroller/machine_util.go | 140 +++++++++------ .../machinecontroller/machine_util_test.go | 97 ++++++----- pkg/util/provider/machineutils/utils.go | 37 +++- 7 files changed, 325 insertions(+), 321 deletions(-) diff --git a/docs/usage/machine-preservation.md b/docs/usage/machine-preservation.md index 7bdef05fc..3b13f8dc2 100644 --- a/docs/usage/machine-preservation.md +++ b/docs/usage/machine-preservation.md @@ -7,10 +7,12 @@ This document explains how to **use machine preservation** to retain machines an A machine and its backing node can be preserved by an end-user/SRE/operator to retain machines and their backing VMs for debugging, analysis, or operational safety. A preserved machine/node has the following properties: -- In case Node is `Unhealthy` for duration longer than `machineHealthTimeout` and the machine moves to `Failed` state, the machine stays in `Failed` state until `machinePreserveTimeout` runs out, without getting terminated. +- In case Node is `Unhealthy` for duration longer than `machineHealthTimeout` and the machine moves to `Failed` state, the machine stays in `Failed` state until `machinePreserveTimeout` runs out, without getting terminated.This allows end-users and SREs to debug the machine and backing node, and take necessary actions to recover the machine if needed. +- If a machine is in its `Failed` phase and is preserved, on recovering from failure, the machine can be moved to `Running` phase and the backing node can be uncordoned to allow scheduling of pods again. - When the machineset is scaled down, machines in the machineset marked for preservation are de-prioritized for deletion. - If a machine is preserved in its `Running` phase, the MCM adds the CA scale-down-disabled annotation to prevent the CA from scaling down the machine in case of underutilization. - If a machine is preserved and is in its `Failed` phase, MCM drains the backing node of all pods, but the daemonset pods remain on the node. + > Note: If a user sets a deletion timestamp (by using tools such as kubectl), the machine and backing node will be deleted. Preservation will not prevent this. @@ -65,16 +67,17 @@ spec: - cri: name: containerd name: worker1 + autoPreserveFailedMachineMax: 1 machineControllerManager: machinePreserveTimeout: 72h - autoPreserveFailedMachineMax: 1 + ``` #### Configuration Semantics - `AutoPreserveFailedMachineMax` : Maximum number of failed machines that can be auto-preserved concurrently in a worker pool. This value is distributed across machineDeployments (zones) in the worker pool. If the limit is reached, additional failed machines will not be preserved and will proceed to termination as usual. - `machinePreserveTimeout` : Duration after which preserved machines are automatically released -> Note: ⚠️ Changes to `machinePreserveTimeout` apply only to machine preservations after the change. +> Note: ⚠️ Changes to `machinePreserveTimeout` apply only to preservation done after the change. ### Preservation annotations @@ -82,32 +85,35 @@ annotation key: `node.machine.sapcloud.io/preserve` **Manual Annotation values:** -| Annotation value | purpose | -| ---------------- | --------------------------------------------------------------------------- | +| Annotation value | Purpose | +| ---------------- |-----------------------------------------------------------------------------| | when-failed | To be added when the machine/node needs to be preserved **only on failure** | | now | To be added when the machine/node needs to be preserved **now** | -| false | To be added if a machine should not be auto-preserved by mcm on failure | +| false | To be added if a machine should not be auto-preserved by MCM on failure | **Auto-preservation Annotation values added by MCM:** -| Annotation value | purpose | -| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| Annotation value | Purpose | +| ---------------- |-------------------------------------------------------------------------------------------------------------------------------------------------------| | auto-preserve | Added by MCM to indicate that a machine has been **auto-preserved** on failure. This machine will be counted towards **AutoPreserveFailedMachineMax** | +### ⚠️ Preservation Annotation semantics: +Both node and machine objects can be annotated for preservation. +However, if both machine and node have the preservation annotation, the node's annotation value (even if set to "") is honoured and the machine's annotation is deleted. +To prevent confusion and unintended behaviour, it is advised to use the feature by annotating only the node or the machine, and not both. --- ### How to manually stop preservation before PreserveExpiryTime: -To manually stop preservation, the preservation annotation must be deleted from the node object if the backing node exists. If it does not, the preservation annotation must be deleted from the machine object. +To manually stop preservation, the preservation annotation must be deleted from whichever object (node/machine) is annotated for preservation. --- ### How to prevent a machine from being auto-preserved by MCM: -To prevent a machine from being auto-preserved on moving to `Failed` phase, the node must be annotated with the value `false`. If the backing node does not exist, the machine must be annotated with the value `false`. If a currently preserved machine is annotated with `false`, the preservation will be stopped. +To prevent a machine from being auto-preserved on moving to `Failed` phase, the node/machine object must be annotated with the value `false`. If a currently preserved machine is annotated with `false`, the preservation will be stopped. +Here too, the preservation annotation semantics from above applies - if both machine and node are annotated, the node's annotation value is honoured and the machine's annotation is deleted. -> Note: if a machine's backing node exists, the *node*'s annotation value is honoured. Therefore, if a backing node exists, but has no annotation value, the machine's annotation value is cleared even if explicitly annotated, and preservation is stopped. - --- ### What happens when a machine recovers from failure and moves to `Running` during preservation? diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index ca3577217..78d926004 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -883,40 +883,6 @@ func isMachineStatusEqual(s1, s2 v1alpha1.MachineStatus) bool { return apiequality.Semantic.DeepEqual(s1Copy.LastOperation, s2Copy.LastOperation) && apiequality.Semantic.DeepEqual(s1Copy.CurrentStatus, s2Copy.CurrentStatus) } -// -//// see https://github.com/kubernetes/kubernetes/issues/21479 -//type updateMachineFunc func(machine *v1alpha1.Machine) error -// -//// UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored. -//// The returned bool value can be used to tell if the machine is actually updated. -//func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) { -// var machine *v1alpha1.Machine -// -// retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { -// var err error -// machine, err = machineLister.Machines(namespace).Get(name) -// if err != nil { -// return err -// } -// machine = machine.DeepCopy() -// // Apply the update, then attempt to push it to the apiserver. -// if applyErr := applyUpdate(machine); applyErr != nil { -// return applyErr -// } -// machine, err = machineClient.Update(ctx, machine, metav1.UpdateOptions{}) -// return err -// }) -// -// // Ignore the precondition violated error, this machine is already updated -// // with the desired label. -// if retryErr == errorsutil.ErrPreconditionViolated { -// klog.V(4).Infof("Machine %s precondition doesn't hold, skip updating it.", name) -// retryErr = nil -// } -// -// return machine, retryErr -//} - // shouldFailedMachineBeTerminated checks if the failed machine is already preserved, in the process of being preserved // or if it is a candidate for auto-preservation. If none of these conditions are met, it returns true indicating // that the failed machine should be terminated. @@ -961,10 +927,10 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context return } klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - updatedMachine, err := machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + updatedMachine, err := c.addAutoPreserveAnnotation(ctx, m) if err != nil { klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) - // since AnnotateMachineWithPreserveValueWithRetries uses retries internally, we can continue with other machines + // since addAutoPreserveAnnotation uses retries internally, on error we can continue with other machines continue } machines[index] = updatedMachine @@ -973,19 +939,12 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context } } -//// annotateMachineForAutoPreservation annotates the given machine with the auto-preservation annotation to trigger -//// preservation of the machine by the machine controller. -//func (c *controller) annotateMachineForAutoPreservation(ctx context.Context, m *v1alpha1.Machine) error { -// _, err := UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { -// if clone.Annotations == nil { -// clone.Annotations = make(map[string]string) -// } -// clone.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM -// return nil -// }) -// if err != nil { -// return err -// } -// klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM) -// return nil -//} +func (c *controller) addAutoPreserveAnnotation(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { + // if machine has a backing node, the node must be annotated + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName != "" { + return machine, machineutils.AnnotateNodeForAutoPreservationWithRetries(ctx, c.targetCoreClient.CoreV1().Nodes(), c.nodeLister, nodeName) + } + // else, the machine must be annotated to trigger preservation + return machineutils.AnnotateMachineForAutoPreservationWithRetries(ctx, c.controlMachineClient.Machines(machine.Namespace), c.machineLister, machine) +} diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index eb03328b1..92a98b01f 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -760,28 +760,17 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a retry = machineutils.LongRetry } }() - - preserveValue, err := c.computeEffectivePreserveAnnotationValue(machine) - if err != nil { - return - } - // if preserve value differs from machine's preserve value, overwrite the value in the machine clone := machine.DeepCopy() - if clone.Annotations[machineutils.PreserveMachineAnnotationKey] != preserveValue { - clone, err = machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(clone.Namespace), c.machineLister, clone, preserveValue) - if err != nil { - klog.Errorf("error annotating machine %q with preserve annotation value %q: %v", clone.Name, preserveValue, err) - return - } - } - if preserveValue == machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM { - // since preservation is stopped before adding the annotation, nothing more to be done here + preserveValue, clone, err := c.reconcilePreserveAnnotationValueForMachine(ctx, clone) + if err != nil { return } + // delete preserve annotation from machine if the backing node exists + // The annotation has either been deleted, set to empty or no preserve annotation exists. // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. if preserveValue == "" { - err = c.stopMachinePreservationIfPreserved(ctx, clone, true) + err = c.stopMachinePreservationIfPreserved(ctx, clone) return } if !machineutils.AllowedPreserveAnnotationValues.Has(preserveValue) { @@ -790,45 +779,23 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } // if preserve=false or if preservation has expired, stop preservation if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { - err = c.stopMachinePreservationIfPreserved(ctx, clone, true) + err = c.stopMachinePreservationIfPreserved(ctx, clone) return } if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { if machineutils.IsMachineFailed(clone) { err = c.preserveMachine(ctx, clone, preserveValue) } else { - // Here, if the preserve value is when-failed or auto-preserved, but the machine is not in Failed Phase, - // we need to stop preservation if preserved, but retain the CA scale-down disabled annotation. - // This is done so that CA does not scale down the node due to under-utilization immediately after recovery, - // thus allowing pods to get scheduled onto the recovered node. - - if clone.Labels[v1alpha1.NodeLabelKey] != "" { - var node *corev1.Node - node, err = c.nodeLister.Get(clone.Labels[v1alpha1.NodeLabelKey]) - if err != nil { - klog.Errorf("error getting node %q for machine %q: %v", clone.Labels[v1alpha1.NodeLabelKey], clone.Name, err) - return - } - // CA scale down disabled annotation is retained on a machine on recovery from Failed to Running, - _, err = c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) - if err != nil { - return - } - } - err = c.stopMachinePreservationIfPreserved(ctx, clone, false) + err = c.stopMachinePreservationIfPreserved(ctx, clone) if err != nil { return } // To prevent erroneous re-preservation of a recovered, previously auto-preserved machine on future failures // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), - // in addition to stopping preservation we also change annotation to auto-preserve-stopped. - // If the machine fails again, it may be preserved again based on the autoPreserveFailedMachineCount value at that time. + // in addition to stopping preservation we also remove the preservation annotation. + // If the machine fails again, it can be preserved again based on the autoPreserveFailedMachineCount value at that time. if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - clone, err = machineutils.AnnotateMachineWithPreserveValueWithRetries(ctx, c.controlMachineClient.Machines(clone.Namespace), c.machineLister, clone, machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM) - if err != nil { - klog.Errorf("error annotating machine %q with preserve annotation value %q: %v", clone.Name, machineutils.PreserveMachineAnnotationValuePreserveStoppedByMCM, err) - return - } + err = c.deletePreserveAnnotation(ctx, clone) } } } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow { @@ -838,44 +805,45 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } } // At this point, the machine is annotated either with preserve=now or preserve=when-failed or preserve=auto-preserved, - // and machine preservation has been stopped if applicable. + // and machine preservation has been stopped, if applicable. // If the machine is running and has a backing node, uncordon the node if cordoned. - // This is to handle the scenario where a preserved machine recovers from Failed to Running + // This is to handle the case where a preserved machine recovers from Failed to Running // in which case, pods should be allowed to be scheduled onto the node - if clone.Status.CurrentStatus.Phase == v1alpha1.MachineRunning && clone.Labels[v1alpha1.NodeLabelKey] != "" { - err = c.uncordonNodeIfCordoned(ctx, clone.Labels[v1alpha1.NodeLabelKey]) + nodeName := clone.Labels[v1alpha1.NodeLabelKey] + if machineutils.IsMachineActive(clone) && nodeName != "" { + err = c.uncordonNodeIfCordoned(ctx, nodeName) } return } -func (c *controller) getNodePreserveAnnotationValue(machine *v1alpha1.Machine) (nodeAnnotationValue string, existsOnNode bool, err error) { +// reconcilePreserveAnnotationValueForMachine returns the effective preservation value. +// If the backing node exists and has the preserve annotation (even if set to ""), the node's annotation value is returned. +// If the node has no preserve annotation, and the machine has the preserve annotation set(even if set to "") , the machine's annotation value is returned. +// If there is no backing node, the machine's annotation value is returned. +// If neither machine nor node has the preserve annotation set, an empty string is returned, indicating that preservation is not desired. +func (c *controller) reconcilePreserveAnnotationValueForMachine(ctx context.Context, machine *v1alpha1.Machine) (string, *v1alpha1.Machine, error) { + machineAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName == "" { - return - } - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) - return - } - nodeAnnotationValue, existsOnNode = node.Annotations[machineutils.PreserveMachineAnnotationKey] - return -} - -// computeEffectivePreserveAnnotationValue returns the effective preservation value based on node's and machine's annotations. -// if the backing node is annotated with preserve annotation, the node's preserve value will be honoured -// if there is no backing node, or the node has no preserve annotation, then the machine's preserve value is honoured -// if both machine and node objects have conflicting preserve annotation values, the node's value will be honoured -func (c *controller) computeEffectivePreserveAnnotationValue(machine *v1alpha1.Machine) (string, error) { - machineAnnotationValue, existsOnMachine := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - nodeAnnotationValue, existsOnNode, err := c.getNodePreserveAnnotationValue(machine) - if err != nil { - return "", err - } - if existsOnNode { - return nodeAnnotationValue, nil - } else if existsOnMachine { - return machineAnnotationValue, nil + if nodeName != "" { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error trying to get node %q: %v", nodeName, err) + return "", machine, err + } + nodeAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] + if !nExists { + return machineAnnotationValue, machine, nil + } + var updatedMachine = machine + if mExists { + klog.Warningf("Node %q annotated with %q=%q. Proceeding with the value on node and deleting annotation on machine %q.", nodeName, machineutils.PreserveMachineAnnotationKey, nodeAnnotationValue, machine.Name) + updatedMachine, err = c.deletePreserveAnnotationValueOnMachine(ctx, machine) + if err != nil { + return "", nil, err + } + } + return nodeAnnotationValue, updatedMachine, nil } - return "", nil + // if no backing node exists + return machineAnnotationValue, machine, nil } diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index f05787e2c..efdbdb936 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -7,7 +7,6 @@ package controller import ( "context" "fmt" - "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" k8stesting "k8s.io/client-go/testing" "math" "time" @@ -4000,22 +3999,23 @@ var _ = Describe("machine", func() { ) }) - Describe("#computeEffectivePreserveAnnotationValue", func() { + Describe("#reconcilePreserveAnnotationValueForMachine", func() { type setup struct { machinePreserveAnnotation string nodePreserveAnnotation string nodeName string } type expect struct { - preserveValue string - err error + preserveValue string + err error + deleteMachinePreserveAnnotation bool } type testCase struct { setup setup expect expect } - DescribeTable("computeEffectivePreserveAnnotationValue behavior", + DescribeTable("reconcilePreserveAnnotationValueForMachine behavior", func(tc testCase) { stop := make(chan struct{}) @@ -4058,7 +4058,7 @@ var _ = Describe("machine", func() { defer trackers.Stop() waitForCacheSync(stop, c) - value, err := c.computeEffectivePreserveAnnotationValue(machine) + value, updatedMachine, err := c.reconcilePreserveAnnotationValueForMachine(context.TODO(), machine) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) @@ -4067,8 +4067,14 @@ var _ = Describe("machine", func() { } Expect(err).ToNot(HaveOccurred()) Expect(value).To(Equal(tc.expect.preserveValue)) + machineValue, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey] + if tc.expect.deleteMachinePreserveAnnotation { + Expect(exists).To(Equal(false)) + } else if exists { + Expect(machineValue).To(Equal(tc.expect.preserveValue)) + } }, - Entry("neither machine nor node has preserve annotation", testCase{ + Entry("should return empty string if neither machine nor node has preserve annotation", testCase{ setup: setup{ nodeName: "node-1", }, @@ -4077,17 +4083,18 @@ var _ = Describe("machine", func() { err: nil, }, }), - Entry("only machine has preserve annotation", testCase{ + Entry("should delete annotation value on node and return empty string if only machine has preserve annotation and backing node exists", testCase{ setup: setup{ machinePreserveAnnotation: "machineValue", nodeName: "node-1", }, expect: expect{ - preserveValue: "machineValue", - err: nil, + preserveValue: "", + err: nil, + deleteMachinePreserveAnnotation: true, }, }), - Entry("only node has preserve annotation", testCase{ + Entry("should return node's annotation value if only node has preserve annotation", testCase{ setup: setup{ nodePreserveAnnotation: "nodeValue", nodeName: "node-1", @@ -4097,20 +4104,21 @@ var _ = Describe("machine", func() { err: nil, }, }), - Entry("both machine and node have preserve annotation - node takes precedence", testCase{ + Entry("should return node's annotation value and delete machine's annotation value if both machine and node have preserve annotation", testCase{ setup: setup{ machinePreserveAnnotation: "machineValue", nodePreserveAnnotation: "nodeValue", nodeName: "node-1", }, expect: expect{ - preserveValue: "nodeValue", - err: nil, + preserveValue: "nodeValue", + err: nil, + deleteMachinePreserveAnnotation: true, }, }), - Entry("machine has node label but node object is not found", testCase{ + Entry("should return an error if machine has node label but node object is not found", testCase{ setup: setup{ - machinePreserveAnnotation: "machineValue", + machinePreserveAnnotation: "", nodeName: "invalid", }, expect: expect{ @@ -4118,7 +4126,7 @@ var _ = Describe("machine", func() { err: fmt.Errorf("node %q not found", "invalid"), }, }), - Entry("machine does not have node label", testCase{ + Entry("should return machine's annotation value if backing node does not exist", testCase{ setup: setup{ machinePreserveAnnotation: "machineValue", }, @@ -4139,11 +4147,10 @@ var _ = Describe("machine", func() { preserveExpiryTime *metav1.Time } type expect struct { - retry machineutils.RetryPeriod - preserveExpiryTimeIsSet bool - nodeCondition *corev1.NodeCondition - CAScaleDownDisabledAnnotationPresent bool - err error + retry machineutils.RetryPeriod + preserveExpiryTimeIsSet bool + nodeCondition *corev1.NodeCondition + err error } type testCase struct { setup setup @@ -4232,14 +4239,6 @@ var _ = Describe("machine", func() { } else { Expect(found).To(BeFalse()) } - if tc.expect.CAScaleDownDisabledAnnotationPresent { - val, ok := updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] - Expect(ok).To(BeTrue()) - Expect(val).To(Equal(autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue)) - } else { - _, ok := updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] - Expect(ok).To(BeFalse()) - } } }, Entry("no preserve annotation on machine and node", testCase{ @@ -4247,53 +4246,49 @@ var _ = Describe("machine", func() { nodeName: "node-1", }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: false, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'now' added on Running machine", testCase{ + Entry("preserve annotation 'now' added to node of Running machine", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'when-failed' added on Running machine", testCase{ + Entry("preserve annotation 'when-failed' added to node of Running machine", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, }, }), - Entry("Failed machine annotated with when-failed", testCase{ + Entry("node of Failed machine annotated with when-failed", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - nodeName: "node-1", - machinePhase: v1alpha1.MachineFailed, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "node-1", + machinePhase: v1alpha1.MachineFailed, }, expect: expect{ preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + retry: machineutils.LongRetry, }, }), Entry("preserve annotation 'now' added on Healthy node ", testCase{ @@ -4307,8 +4302,7 @@ var _ = Describe("machine", func() { nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + retry: machineutils.LongRetry, }, }), Entry("preserve annotation 'when-failed' added on Healthy node ", testCase{ @@ -4318,10 +4312,9 @@ var _ = Describe("machine", func() { machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, }}), Entry("preserve annotation 'false' added on backing node of preserved machine", testCase{ setup: setup{ @@ -4331,26 +4324,23 @@ var _ = Describe("machine", func() { preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: false, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, }, }), - Entry("machine auto-preserved by MCM", testCase{ + Entry("node annotated for auto-preservation by MCM", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, - nodeAnnotationValue: "", - nodeName: "node-1", - machinePhase: v1alpha1.MachineFailed, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeName: "node-1", + machinePhase: v1alpha1.MachineFailed, }, expect: expect{ preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: true, + retry: machineutils.LongRetry, }, }), Entry("preservation timed out", testCase{ @@ -4362,10 +4352,9 @@ var _ = Describe("machine", func() { preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, - retry: machineutils.LongRetry, - CAScaleDownDisabledAnnotationPresent: false, + preserveExpiryTimeIsSet: false, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + retry: machineutils.LongRetry, }, }), Entry("invalid preserve annotation on node of un-preserved machine", testCase{ @@ -4376,11 +4365,10 @@ var _ = Describe("machine", func() { machinePhase: v1alpha1.MachineRunning, }, expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - err: nil, - CAScaleDownDisabledAnnotationPresent: false, + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, }, }), Entry("machine annotated with preserve=now, but has no backing node", testCase{ @@ -4411,18 +4399,17 @@ var _ = Describe("machine", func() { err: fmt.Errorf("node %q not found", "invalid"), }, }), - Entry("machine annotated with auto-preserved and in Running phase after recovery from failure", testCase{ + Entry("node annotated with auto-preserved and in Running phase after recovery from failure", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, - nodeAnnotationValue: "", - nodeName: "invalid", - machinePhase: v1alpha1.MachineRunning, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ preserveExpiryTimeIsSet: false, nodeCondition: nil, - retry: machineutils.ShortRetry, - err: fmt.Errorf("node %q not found", "invalid"), + retry: machineutils.LongRetry, + err: nil, }, }), ) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 7ed93a657..27eace4e2 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2362,24 +2362,23 @@ Utility Functions for Machine Preservation func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { var err error nodeName := machine.Labels[v1alpha1.NodeLabelKey] - machineClone := machine.DeepCopy() if machine.Status.CurrentStatus.PreserveExpiryTime == nil { - klog.V(4).Infof("Starting preservation flow for machine %q.", machineClone.Name) + klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) // Step 1: Add preserveExpiryTime to machine status - machineClone, err = c.setPreserveExpiryTimeOnMachine(ctx, machineClone) + machine, err = c.setPreserveExpiryTimeOnMachine(ctx, machine) if err != nil { return err } } if nodeName == "" { // Machine has no backing node, preservation is complete - klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machineClone.Name, machineClone.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return nil } // Machine has a backing node node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machineClone.Name, err) + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) return err } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) @@ -2395,11 +2394,11 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } var drainErr error - if shouldPreservedNodeBeDrained(existingNodePreservedCondition, machineClone.Status.CurrentStatus.Phase) { + if shouldPreservedNodeBeDrained(existingNodePreservedCondition, machine.Status.CurrentStatus.Phase) { // Step 3: If machine is in Failed Phase, drain the backing node drainErr = c.drainPreservedNode(ctx, machine) } - newCond, needsUpdate := computeNewNodePreservedCondition(machineClone.Status.CurrentStatus.Phase, preserveValue, drainErr, existingNodePreservedCondition) + newCond, needsUpdate := computeNewNodePreservedCondition(machine.Status.CurrentStatus, preserveValue, drainErr, existingNodePreservedCondition) if needsUpdate { // Step 4: Update NodePreserved Condition on Node, with drain status _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) @@ -2411,43 +2410,25 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return err } } - klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, machineClone.Status.CurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return nil } // stopMachinePreservationIfPreserved stops the preservation of the machine and node -func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, removeCAScaleDownDisabledAnnotation bool) error { +func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine) error { // removal of preserveExpiryTime is the last step of stopping preservation // therefore, if preserveExpiryTime is not set, machine is not preserved nodeName := machine.Labels[v1alpha1.NodeLabelKey] if machine.Status.CurrentStatus.PreserveExpiryTime == nil { - if !removeCAScaleDownDisabledAnnotation || nodeName == "" { - return nil - } - // if preserveExpiryTime is nil, but removeCAScaleDownDisabledAnnotation is true, - // then we need to remove the annotation from node - node, err := c.nodeLister.Get(nodeName) - if err != nil { - // if node is not found, nothing to remove annotation from - if apierrors.IsNotFound(err) { - return nil - } - klog.Errorf("error trying to get node %q of machine %q: %v.", nodeName, machine.Name, err) - return err - } - // remove CA scale down disabled annotation from node - err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, node) - if err != nil { - return err - } return nil } + // if there is no backing node, then preservation can be stopped by just removing preserveExpiryTime from machine status if nodeName == "" { err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return err } - klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + klog.V(2).Infof("Preservation of machine %q with no backing node has stopped.", machine.Name) return nil } @@ -2458,7 +2439,7 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac // therefore, this error is handled specifically if apierrors.IsNotFound(err) { // Node not found, proceed to clear preserveExpiryTime on machine - klog.Warningf("Node %q of machine %q not found. Proceeding to clear preserve expiry time on machine.", nodeName, machine.Name) + klog.Warningf("Node %q of machine %q not found. Proceeding to clear PreserveExpiryTime on machine.", nodeName, machine.Name) err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return err @@ -2482,13 +2463,9 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac return err } // Step 2: remove CA scale down disabled annotation from node - // only remove if removeCAScaleDownDisabledAnnotation is not "when-failed" since in that case, - // scale down should remain disabled even after preservation is stopped - if removeCAScaleDownDisabledAnnotation { - err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) - if err != nil { - return err - } + err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) + if err != nil { + return err } // Step 3: update machine status to set preserve expiry time to nil err = c.clearMachinePreserveExpiryTime(ctx, machine) @@ -2513,7 +2490,7 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return nil, err } - klog.V(4).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) + klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) return updatedMachine, nil } @@ -2523,8 +2500,11 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, if node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { return node, nil } + // Add annotation to disable CA scale down. + // Also add annotation expressing that MCM is the one who added this annotation, so that it can be removed safely when preservation is stopped. CAScaleDownAnnotation := map[string]string{ - autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue, } nodeCopy := node.DeepCopy() updatedNode, _, err := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) @@ -2548,11 +2528,16 @@ func (c *controller) removeCAScaleDownDisabledAnnotationOnNode(ctx context.Conte return nil } nodeCopy := node.DeepCopy() - delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) - return err + // If CA scale-down disabled annotation was added by MCM, it can be safely removed. + // If the annotation was added by some other entity, then it should not be removed. + if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue { + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) + return err + } } return nil } @@ -2572,7 +2557,8 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string } // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation -func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { +func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { + const preserveExpiryMessageSuffix = "Machine preserved until " var newNodePreservedCondition *v1.NodeCondition var needsUpdate bool if existingNodeCondition == nil { @@ -2585,20 +2571,23 @@ func computeNewNodePreservedCondition(machinePhase v1alpha1.MachinePhase, preser } else { newNodePreservedCondition = existingNodeCondition.DeepCopy() } + machinePhase := currentStatus.Phase if machinePhase == v1alpha1.MachineFailed { if drainErr == nil { - if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainSuccessful { - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + + if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainSuccessful) { + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionTrue needsUpdate = true } - } else if newNodePreservedCondition.Message != v1alpha1.PreservedNodeDrainUnsuccessful { - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + } else if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainUnsuccessful) { + newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionFalse needsUpdate = true } } else if newNodePreservedCondition.Status != v1.ConditionTrue { newNodePreservedCondition.Status = v1.ConditionTrue + newNodePreservedCondition.Message = preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() needsUpdate = true } if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { @@ -2625,10 +2614,9 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return nil } - clone := machine.DeepCopy() - clone.Status.CurrentStatus.PreserveExpiryTime = nil - clone.Status.CurrentStatus.LastUpdateTime = metav1.Now() - _, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{}) + machine.Status.CurrentStatus.PreserveExpiryTime = nil + machine.Status.CurrentStatus.LastUpdateTime = metav1.Now() + _, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, machine, metav1.UpdateOptions{}) if err != nil { klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) return err @@ -2636,6 +2624,52 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine return nil } +func (c *controller) deletePreserveAnnotation(ctx context.Context, machine *v1alpha1.Machine) error { + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName == "" { + _, err := c.deletePreserveAnnotationValueOnMachine(ctx, machine) + return err + } + node, err := c.nodeLister.Get(nodeName) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) + return err + } + return c.deletePreserveAnnotationValueOnNode(ctx, node) +} + +func (c *controller) deletePreserveAnnotationValueOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { + + if machine.Annotations == nil || machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" { + return machine, nil + } + clone := machine.DeepCopy() + delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) + return nil, err + } + return updatedClone, nil +} + +func (c *controller) deletePreserveAnnotationValueOnNode(ctx context.Context, node *v1.Node) error { + nodeClone := node.DeepCopy() + if nodeClone.Annotations == nil || nodeClone.Annotations[machineutils.PreserveMachineAnnotationKey] == "" { + return nil + } + delete(nodeClone.Annotations, machineutils.PreserveMachineAnnotationKey) + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to delete preserve annotation on node %q. error : %v", node.Name, err) + return err + } + return nil +} + // drainPreservedNode attempts to drain the node backing a preserved machine func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { var ( diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 97430dadf..44fb479d2 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4213,10 +4213,8 @@ var _ = Describe("machine_util", func() { }) Describe("#stopMachinePreservationIfPreserved", func() { type setup struct { - nodeName string - removeCAAnnotation bool + nodeName string } - type expect struct { err error } @@ -4277,7 +4275,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.stopMachinePreservationIfPreserved(context.TODO(), machine, tc.setup.removeCAAnnotation) + err := c.stopMachinePreservationIfPreserved(context.TODO(), machine) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) @@ -4293,11 +4291,6 @@ var _ = Describe("machine_util", func() { } updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) Expect(getErr).To(BeNil()) - if tc.setup.removeCAAnnotation { - Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("")) - } else { - Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal("true")) - } updatedNodeCondition := nodeops.GetCondition(updatedNode, machinev1.NodePreserved) Expect(updatedNodeCondition).ToNot(BeNil()) Expect(updatedNodeCondition.Status).To(Equal(corev1.ConditionFalse)) @@ -4305,8 +4298,7 @@ var _ = Describe("machine_util", func() { }, Entry("when stopping preservation on a preserved machine with backing node", &testCase{ setup: setup{ - nodeName: "node-1", - removeCAAnnotation: true, + nodeName: "node-1", }, expect: expect{ err: nil, @@ -4314,8 +4306,7 @@ var _ = Describe("machine_util", func() { }), Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ setup: setup{ - nodeName: "", - removeCAAnnotation: true, + nodeName: "", }, expect: expect{ err: nil, @@ -4331,8 +4322,7 @@ var _ = Describe("machine_util", func() { }), Entry("when stopping preservation on a preserved machine, but retaining CA annotation", &testCase{ setup: setup{ - nodeName: "node-1", - removeCAAnnotation: false, + nodeName: "node-1", }, expect: expect{ err: nil, @@ -4341,8 +4331,9 @@ var _ = Describe("machine_util", func() { ) }) Describe("#computeNewNodePreservedCondition", func() { + preserveExpiryTime := &metav1.Time{Time: time.Now().Add(2 * time.Hour)} type setup struct { - machinePhase machinev1.MachinePhase + currentStatus machinev1.CurrentStatus preserveValue string drainErr error existingNodeCondition *corev1.NodeCondition @@ -4358,7 +4349,7 @@ var _ = Describe("machine_util", func() { DescribeTable("##computeNewNodePreservedCondition behaviour scenarios", func(tc *testCase) { newNodeCondition, needsUpdate := computeNewNodePreservedCondition( - tc.setup.machinePhase, + tc.setup.currentStatus, tc.setup.preserveValue, tc.setup.drainErr, tc.setup.existingNodeCondition, @@ -4375,22 +4366,31 @@ var _ = Describe("machine_util", func() { }, Entry("when preserve=now, machine is Running, no existing condition", &testCase{ setup: setup{ - machinePhase: machinev1.MachineRunning, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineRunning, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: preserveExpiryTime, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, existingNodeCondition: nil, }, expect: expect{ newNodeCondition: &corev1.NodeCondition{ - Type: machinev1.NodePreserved, - Status: corev1.ConditionTrue, - Reason: machinev1.PreservedByUser, + Type: machinev1.NodePreserved, + Status: corev1.ConditionTrue, + Reason: machinev1.PreservedByUser, + Message: "Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: true, }, }), Entry("when preserve=now, machine is Failed, drain successful, no existing condition", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: preserveExpiryTime, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, drainErr: nil, existingNodeCondition: nil, @@ -4400,14 +4400,18 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful, + Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: true, }, }), Entry("when preserve=now, machine is Failed, drain is unsuccessful, no existing condition", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: preserveExpiryTime, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, drainErr: fmt.Errorf("test drain error"), existingNodeCondition: nil, @@ -4417,14 +4421,18 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful, + Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: true, }, }), Entry("when machine auto-preserved by MCM, machine is Failed, drain is successful, no existing condition", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: preserveExpiryTime, + }, preserveValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, drainErr: nil, existingNodeCondition: nil, @@ -4434,20 +4442,25 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByMCM, - Message: machinev1.PreservedNodeDrainSuccessful, + Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: true, }, }), Entry("when preserve=now, machine is Failed, drain is unsuccessful, existing condition present", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: preserveExpiryTime, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, drainErr: fmt.Errorf("test drain error"), existingNodeCondition: &corev1.NodeCondition{ - Type: machinev1.NodePreserved, - Status: corev1.ConditionFalse, - Reason: machinev1.PreservedByUser, + Type: machinev1.NodePreserved, + Status: corev1.ConditionFalse, + Reason: machinev1.PreservedByUser, + Message: "Machine preserved until " + preserveExpiryTime.String(), }, }, expect: expect{ @@ -4455,21 +4468,25 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful, + Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: true, }, }), Entry("when preserve=now, machine is Failed, drain is unsuccessful for the second time, existing condition present", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: &metav1.Time{Time: time.Now().Add(2 * time.Hour)}, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, drainErr: fmt.Errorf("test drain error"), existingNodeCondition: &corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful, + Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, }, expect: expect{ @@ -4477,21 +4494,25 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful, + Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: false, }, }), Entry("when preserve=now, machine is Failed, drain is successful, existing condition present and status is true", &testCase{ setup: setup{ - machinePhase: machinev1.MachineFailed, + currentStatus: machinev1.CurrentStatus{ + Phase: machinev1.MachineFailed, + LastUpdateTime: metav1.Now(), + PreserveExpiryTime: &metav1.Time{Time: time.Now().Add(2 * time.Hour)}, + }, preserveValue: machineutils.PreserveMachineAnnotationValueNow, drainErr: nil, existingNodeCondition: &corev1.NodeCondition{ Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful, + Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, }, expect: expect{ @@ -4499,7 +4520,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful, + Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), }, needsUpdate: false, }, diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index eba2cce64..35368b47e 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -14,6 +14,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" errorsutil "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" + v2 "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "time" @@ -163,19 +165,46 @@ func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string return newAnnotations[PreserveMachineAnnotationKey] != oldAnnotations[PreserveMachineAnnotationKey] } -// AnnotateMachineWithPreserveValueWithRetries annotates the given machine with the preservation annotation value -func AnnotateMachineWithPreserveValueWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, m *v1alpha1.Machine, preserveValue string) (*v1alpha1.Machine, error) { +// AnnotateNodeForAutoPreservationWithRetries annotates the given node with the preservation annotation value +func AnnotateNodeForAutoPreservationWithRetries(ctx context.Context, nodeClient v2.NodeInterface, nodeLister corelisters.NodeLister, nodeName string) error { + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + node, err := nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error getting node %q: %v", nodeName, err) + return err + } + nodeCopy := node.DeepCopy() + if nodeCopy.Annotations == nil { + nodeCopy.Annotations = make(map[string]string) + } + nodeCopy.Annotations[PreserveMachineAnnotationKey] = PreserveMachineAnnotationValuePreservedByMCM + _, err = nodeClient.Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + return err + } + klog.V(2).Infof("Annotated node %q with %q=%q.", nodeName, PreserveMachineAnnotationKey, PreserveMachineAnnotationValuePreservedByMCM) + return nil + }) + if retryErr != nil { + klog.Errorf("error annotating node %q for auto-preservation: %v", nodeName, retryErr) + } + return retryErr +} + +// AnnotateMachineForAutoPreservationWithRetries annotates the given machine with the preservation annotation value +func AnnotateMachineForAutoPreservationWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, m *v1alpha1.Machine) (*v1alpha1.Machine, error) { updatedMachine, err := UpdateMachineWithRetries(ctx, machineClient, machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { if clone.Annotations == nil { clone.Annotations = make(map[string]string) } - clone.Annotations[PreserveMachineAnnotationKey] = preserveValue + clone.Annotations[PreserveMachineAnnotationKey] = PreserveMachineAnnotationValuePreservedByMCM return nil }) if err != nil { + klog.Errorf("error annotating machine %q for auto-preservation: %v", m.Name, err) return nil, err } - klog.V(2).Infof("Updated machine %q with %q=%q.", m.Name, PreserveMachineAnnotationKey, preserveValue) + klog.V(2).Infof("Annotated machine %q with %q=%q.", m.Name, PreserveMachineAnnotationKey, PreserveMachineAnnotationValuePreservedByMCM) return updatedMachine, nil } From 084835b62353b9bd0bda64c48228e124fcefb4d9 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 12 Feb 2026 14:07:55 +0530 Subject: [PATCH 65/79] Add code to reconcile auto preservation, and reduce number of auto-preserved machines if autoPreservedFailedMachineMax is decreased in the shoot spec. --- pkg/controller/controller_utils.go | 13 ++++ pkg/controller/deployment_machineset_util.go | 3 +- pkg/controller/machineset.go | 70 +++++++++++++++---- pkg/controller/machineset_test.go | 6 +- .../provider/machinecontroller/machine.go | 2 +- pkg/util/provider/machinecontroller/node.go | 2 +- pkg/util/provider/machineutils/utils.go | 50 ------------- 7 files changed, 75 insertions(+), 71 deletions(-) diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index acd4df5c2..2bb9d1c21 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -797,6 +797,19 @@ func (s ActiveMachines) Less(i, j int) bool { return false } +// AutoPreservedMachines type allows custom sorting of machines so a controller can pick the best ones to delete. +type AutoPreservedMachines []*v1alpha1.Machine + +func (s AutoPreservedMachines) Len() int { return len(s) } +func (s AutoPreservedMachines) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func (s AutoPreservedMachines) Less(i, j int) bool { + if s[i].CreationTimestamp != s[j].CreationTimestamp { + return s[i].CreationTimestamp.Before(&s[j].CreationTimestamp) + } + return false +} + // MachineKey is the function used to get the machine name from machine object // ToCheck : as machine-namespace does not matter func MachineKey(machine *v1alpha1.Machine) string { diff --git a/pkg/controller/deployment_machineset_util.go b/pkg/controller/deployment_machineset_util.go index 7d7fcc9f6..e0ca3f797 100644 --- a/pkg/controller/deployment_machineset_util.go +++ b/pkg/controller/deployment_machineset_util.go @@ -129,8 +129,7 @@ func calculateMachineSetStatus(is *v1alpha1.MachineSet, filteredMachines []*v1al failedMachines = append(failedMachines, machineSummary) } // Count number of failed machines annotated with PreserveMachineAnnotationValuePreservedByMCM - // Previously auto-preserved failed machines that have recovered to `Running` should count towards this - if machineutils.IsMachineFailed(machine) && machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + if machine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { autoPreserveFailedMachineCount++ } } diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 78d926004..284eb58b4 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -577,7 +577,7 @@ func (c *controller) reconcileClusterMachineSet(key string) error { // to Failed machines to trigger auto-preservation, if applicable. // We do not update machineSet.Status.AutoPreserveFailedMachineCount in the function, as it will be calculated // and updated in the succeeding calls to calculateMachineSetStatus() and updateMachineSetStatus() - c.triggerAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) + filteredMachines = c.reconcileAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) // TODO: Fix working of expectations to reflect correct behaviour // machineSetNeedsSync := c.expectations.SatisfiedExpectations(key) @@ -908,14 +908,20 @@ func shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { } } -// triggerAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation +// reconcileAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation // to trigger preservation of the machines, by the machine controller, up to the limit defined in the // MachineSet's AutoPreserveFailedMachineMax field. -func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) { +func (c *controller) reconcileAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) []*v1alpha1.Machine { autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount - if autoPreservationCapacityRemaining <= 0 { + if autoPreservationCapacityRemaining == 0 { // no capacity remaining, nothing to do - return + return machines + } else if autoPreservationCapacityRemaining < 0 { // when autoPreserveFailedMachineMax is decreased, it can be negative. + numExceeding := c.stopAutoPreservationForMachines(ctx, machines, int(-autoPreservationCapacityRemaining)) + if numExceeding > 0 { + klog.V(2).Infof("Attempted to decrease count of auto-preserved machines, but there are still %d violations of AutoPreserveFailedMachineMax.", numExceeding) + } + return machines } for index, m := range machines { if machineutils.IsMachineFailed(m) { @@ -924,10 +930,10 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context continue } if autoPreservationCapacityRemaining == 0 { - return + break } klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - updatedMachine, err := c.addAutoPreserveAnnotation(ctx, m) + updatedMachine, err := machineutils.UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, addAutoPreserveAnnotationOnMachine) if err != nil { klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) // since addAutoPreserveAnnotation uses retries internally, on error we can continue with other machines @@ -937,14 +943,50 @@ func (c *controller) triggerAutoPreservationOfFailedMachines(ctx context.Context autoPreservationCapacityRemaining-- } } + return machines } -func (c *controller) addAutoPreserveAnnotation(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - // if machine has a backing node, the node must be annotated - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName != "" { - return machine, machineutils.AnnotateNodeForAutoPreservationWithRetries(ctx, c.targetCoreClient.CoreV1().Nodes(), c.nodeLister, nodeName) +func (c *controller) stopAutoPreservationForMachines(ctx context.Context, machines []*v1alpha1.Machine, numToStop int) int { + var autoPreservedMachines []*v1alpha1.Machine + var otherMachines []*v1alpha1.Machine + for _, m := range machines { + if m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + autoPreservedMachines = append(autoPreservedMachines, m) + } else { + otherMachines = append(otherMachines, m) + } + } + numOfAutoPreservedMachines := len(autoPreservedMachines) + if numOfAutoPreservedMachines == 0 { + return numToStop + } + if numOfAutoPreservedMachines > numToStop { + sort.Sort(AutoPreservedMachines(autoPreservedMachines)) } - // else, the machine must be annotated to trigger preservation - return machineutils.AnnotateMachineForAutoPreservationWithRetries(ctx, c.controlMachineClient.Machines(machine.Namespace), c.machineLister, machine) + for index, m := range autoPreservedMachines { + if numToStop == 0 { + break + } + klog.V(2).Infof("Removing auto-preservation annotation from machine %q as AutoPreserveFailedMachineMax is breached", m.Name) + updatedMachine, err := machineutils.UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, removeAutoPreserveAnnotationFromMachine) + if err != nil { + continue + } + autoPreservedMachines[index] = updatedMachine + numToStop-- + } + return numToStop +} + +func addAutoPreserveAnnotationOnMachine(machineToUpdate *v1alpha1.Machine) error { + if machineToUpdate.Annotations == nil { + machineToUpdate.Annotations = make(map[string]string) + } + machineToUpdate.Annotations[machineutils.PreserveMachineAnnotationKey] = machineutils.PreserveMachineAnnotationValuePreservedByMCM + return nil +} + +func removeAutoPreserveAnnotationFromMachine(machineToUpdate *v1alpha1.Machine) error { + delete(machineToUpdate.Annotations, machineutils.PreserveMachineAnnotationKey) + return nil } diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index b8da6afc3..5f2933a67 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1836,7 +1836,7 @@ var _ = Describe("machineset", func() { }) }) - Describe("#triggerAutoPreservationOfFailedMachines", func() { + Describe("#reconcileAutoPreservationOfFailedMachines", func() { type setup struct { autoPreserveFailedMachineCount int32 autoPreserveFailedMachineMax int32 @@ -1849,7 +1849,7 @@ var _ = Describe("machineset", func() { expect expect } - DescribeTable("#triggerAutoPreservationOfFailedMachines scenarios", func(tc testCase) { + DescribeTable("#reconcileAutoPreservationOfFailedMachines scenarios", func(tc testCase) { stop := make(chan struct{}) defer close(stop) testMachineSet := &machinev1.MachineSet{ @@ -1935,7 +1935,7 @@ var _ = Describe("machineset", func() { waitForCacheSync(stop, c) machinesList := []*machinev1.Machine{testMachine1, testMachine2} - c.triggerAutoPreservationOfFailedMachines(context.TODO(), machinesList, testMachineSet) + c.reconcileAutoPreservationOfFailedMachines(context.TODO(), machinesList, testMachineSet) waitForCacheSync(stop, c) updatedMachine1, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine1.Name, metav1.GetOptions{}) updatedMachine2, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine2.Name, metav1.GetOptions{}) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 92a98b01f..113ab9ab9 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -59,7 +59,7 @@ func (c *controller) updateMachine(oldObj, newObj any) { return } // to reconcile on change in annotations related to preservation - if machineutils.PreserveAnnotationsChanged(oldMachine.Annotations, newMachine.Annotations) { + if oldMachine.Annotations[machineutils.PreserveMachineAnnotationKey] != newMachine.Annotations[machineutils.PreserveMachineAnnotationKey] { c.enqueueMachine(newObj, "handling machine object preservation related UPDATE event") return } diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index 7cdf53e73..51da3c7e8 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -101,7 +101,7 @@ func (c *controller) updateNode(oldObj, newObj any) { c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Conditions of node %q differ from machine status", node.Name)) } // to reconcile on change in annotations related to preservation - if machineutils.PreserveAnnotationsChanged(oldNode.Annotations, node.Annotations) { + if node.Annotations[machineutils.PreserveMachineAnnotationKey] != oldNode.Annotations[machineutils.PreserveMachineAnnotationKey] { c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. Preserve annotations added or updated for node %q", getNodeName(machine))) return } diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 35368b47e..10ca5ea98 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -14,8 +14,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" errorsutil "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" - v2 "k8s.io/client-go/kubernetes/typed/core/v1" - corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/util/retry" "k8s.io/klog/v2" "time" @@ -160,54 +158,6 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } -// PreserveAnnotationsChanged returns true if there is a change in preserve annotations -func PreserveAnnotationsChanged(oldAnnotations, newAnnotations map[string]string) bool { - return newAnnotations[PreserveMachineAnnotationKey] != oldAnnotations[PreserveMachineAnnotationKey] -} - -// AnnotateNodeForAutoPreservationWithRetries annotates the given node with the preservation annotation value -func AnnotateNodeForAutoPreservationWithRetries(ctx context.Context, nodeClient v2.NodeInterface, nodeLister corelisters.NodeLister, nodeName string) error { - retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - node, err := nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error getting node %q: %v", nodeName, err) - return err - } - nodeCopy := node.DeepCopy() - if nodeCopy.Annotations == nil { - nodeCopy.Annotations = make(map[string]string) - } - nodeCopy.Annotations[PreserveMachineAnnotationKey] = PreserveMachineAnnotationValuePreservedByMCM - _, err = nodeClient.Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - return err - } - klog.V(2).Infof("Annotated node %q with %q=%q.", nodeName, PreserveMachineAnnotationKey, PreserveMachineAnnotationValuePreservedByMCM) - return nil - }) - if retryErr != nil { - klog.Errorf("error annotating node %q for auto-preservation: %v", nodeName, retryErr) - } - return retryErr -} - -// AnnotateMachineForAutoPreservationWithRetries annotates the given machine with the preservation annotation value -func AnnotateMachineForAutoPreservationWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, m *v1alpha1.Machine) (*v1alpha1.Machine, error) { - updatedMachine, err := UpdateMachineWithRetries(ctx, machineClient, machineLister, m.Namespace, m.Name, func(clone *v1alpha1.Machine) error { - if clone.Annotations == nil { - clone.Annotations = make(map[string]string) - } - clone.Annotations[PreserveMachineAnnotationKey] = PreserveMachineAnnotationValuePreservedByMCM - return nil - }) - if err != nil { - klog.Errorf("error annotating machine %q for auto-preservation: %v", m.Name, err) - return nil, err - } - klog.V(2).Infof("Annotated machine %q with %q=%q.", m.Name, PreserveMachineAnnotationKey, PreserveMachineAnnotationValuePreservedByMCM) - return updatedMachine, nil -} - // see https://github.com/kubernetes/kubernetes/issues/21479 type updateMachineFunc func(machine *v1alpha1.Machine) error From 68716678ba1532266e6fb963c80d5f238dc84277 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 12 Feb 2026 16:47:19 +0530 Subject: [PATCH 66/79] Modify annotation handling to improve determinism: Introduced lastAppliedNodePreserveValue for persisting node annotation values that have been applied. --- .../provider/machinecontroller/machine.go | 122 ++++++++++-------- .../machinecontroller/machine_util.go | 6 +- .../machinecontroller/machine_util_test.go | 6 +- 3 files changed, 78 insertions(+), 56 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 113ab9ab9..293868ce3 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -727,7 +727,6 @@ func (c *controller) shouldMachineBeMovedToTerminatingQueue(machine *v1alpha1.Ma if machine.DeletionTimestamp != nil && c.isCreationProcessing(machine) { klog.Warningf("Cannot delete machine %q, its deletionTimestamp is set but it is currently being processed by the creation flow\n", getMachineKey(machine)) } - return !c.isCreationProcessing(machine) && machine.DeletionTimestamp != nil } @@ -749,7 +748,16 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // manageMachinePreservation manages machine preservation based on the preserve annotation value. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { + machineAnnotationValue := machine.Annotations[machineutils.PreserveMachineAnnotationKey] + laNodePreserveValue := machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] + clone := machine.DeepCopy() defer func() { + if err == nil { + err = c.updatePreserveAnnotations(ctx, clone, machineAnnotationValue, laNodePreserveValue) + if err == nil { + retry = machineutils.LongRetry + } + } if err != nil { if apierrors.IsConflict(err) { retry = machineutils.ConflictRetry @@ -760,31 +768,37 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a retry = machineutils.LongRetry } }() - clone := machine.DeepCopy() - preserveValue, clone, err := c.reconcilePreserveAnnotationValueForMachine(ctx, clone) - if err != nil { - return + nodeName := clone.Labels[v1alpha1.NodeLabelKey] + nodeAnnotationValue := "" + if nodeName != "" { + nodeAnnotationValue, err = c.getNodeAnnotationValue(nodeName) + if err != nil { + return + } } - // delete preserve annotation from machine if the backing node exists - + effectivePreserveValue, laNodePreserveValue := c.computeEffectivePreserveValues(nodeAnnotationValue, laNodePreserveValue, machineAnnotationValue) // The annotation has either been deleted, set to empty or no preserve annotation exists. // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. - if preserveValue == "" { + if effectivePreserveValue == "" { err = c.stopMachinePreservationIfPreserved(ctx, clone) return } - if !machineutils.AllowedPreserveAnnotationValues.Has(preserveValue) { - klog.Warningf("Preserve annotation value %q on machine %q is invalid", preserveValue, clone.Name) + if !machineutils.AllowedPreserveAnnotationValues.Has(effectivePreserveValue) { + if effectivePreserveValue == nodeAnnotationValue { + klog.Warningf("Preserve annotation value %q on node %q is invalid", effectivePreserveValue, nodeName) + } else { + klog.Warningf("Preserve annotation value %q on machine %q is invalid", effectivePreserveValue, clone.Name) + } return } // if preserve=false or if preservation has expired, stop preservation - if preserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { + if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { err = c.stopMachinePreservationIfPreserved(ctx, clone) return } - if preserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { if machineutils.IsMachineFailed(clone) { - err = c.preserveMachine(ctx, clone, preserveValue) + err = c.preserveMachine(ctx, clone, effectivePreserveValue) } else { err = c.stopMachinePreservationIfPreserved(ctx, clone) if err != nil { @@ -792,58 +806,64 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } // To prevent erroneous re-preservation of a recovered, previously auto-preserved machine on future failures // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), - // in addition to stopping preservation we also remove the preservation annotation. + // in addition to stopping preservation we also remove the preservation annotation on the machine. // If the machine fails again, it can be preserved again based on the autoPreserveFailedMachineCount value at that time. - if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - err = c.deletePreserveAnnotation(ctx, clone) + if effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + machineAnnotationValue = "" } } - } else if preserveValue == machineutils.PreserveMachineAnnotationValueNow { - err = c.preserveMachine(ctx, clone, preserveValue) + } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueNow { + err = c.preserveMachine(ctx, clone, effectivePreserveValue) if err != nil { return } } - // At this point, the machine is annotated either with preserve=now or preserve=when-failed or preserve=auto-preserved, - // and machine preservation has been stopped, if applicable. - // If the machine is running and has a backing node, uncordon the node if cordoned. // This is to handle the case where a preserved machine recovers from Failed to Running // in which case, pods should be allowed to be scheduled onto the node - nodeName := clone.Labels[v1alpha1.NodeLabelKey] if machineutils.IsMachineActive(clone) && nodeName != "" { err = c.uncordonNodeIfCordoned(ctx, nodeName) } return } -// reconcilePreserveAnnotationValueForMachine returns the effective preservation value. -// If the backing node exists and has the preserve annotation (even if set to ""), the node's annotation value is returned. -// If the node has no preserve annotation, and the machine has the preserve annotation set(even if set to "") , the machine's annotation value is returned. -// If there is no backing node, the machine's annotation value is returned. -// If neither machine nor node has the preserve annotation set, an empty string is returned, indicating that preservation is not desired. -func (c *controller) reconcilePreserveAnnotationValueForMachine(ctx context.Context, machine *v1alpha1.Machine) (string, *v1alpha1.Machine, error) { - machineAnnotationValue, mExists := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName != "" { - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error trying to get node %q: %v", nodeName, err) - return "", machine, err - } - nodeAnnotationValue, nExists := node.Annotations[machineutils.PreserveMachineAnnotationKey] - if !nExists { - return machineAnnotationValue, machine, nil - } - var updatedMachine = machine - if mExists { - klog.Warningf("Node %q annotated with %q=%q. Proceeding with the value on node and deleting annotation on machine %q.", nodeName, machineutils.PreserveMachineAnnotationKey, nodeAnnotationValue, machine.Name) - updatedMachine, err = c.deletePreserveAnnotationValueOnMachine(ctx, machine) - if err != nil { - return "", nil, err - } - } - return nodeAnnotationValue, updatedMachine, nil +// computeEffectivePreserveValue returns the effective preservation value, and the updated lastAppliedNodeAnnotationValue +func (c *controller) computeEffectivePreserveValues(nodeAnnotationValue, lastAppliedNodeAnnotationValue, machineAnnotationValue string) (string, string) { + if nodeAnnotationValue == "" && lastAppliedNodeAnnotationValue == "" { + return machineAnnotationValue, "" + } + return nodeAnnotationValue, nodeAnnotationValue +} + +func (c *controller) getNodeAnnotationValue(nodeName string) (string, error) { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error fetching node %q: %v", nodeName, err) + return "", err + } + return node.Annotations[machineutils.PreserveMachineAnnotationKey], nil +} + +// updatePreserveAnnotations updates the last applied node annotation value and the preservation annotation values on the machine +func (c *controller) updatePreserveAnnotations(ctx context.Context, machine *v1alpha1.Machine, machineAnnotationValue, laNodePreserveValue string) error { + update := false + if machineAnnotationValue == "" { + delete(machine.Annotations, machineutils.PreserveMachineAnnotationKey) + update = true + } else if machine.Annotations[machineutils.PreserveMachineAnnotationKey] != machineAnnotationValue { + machine.Annotations[machineutils.PreserveMachineAnnotationKey] = machineAnnotationValue + update = true + } + + if machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != laNodePreserveValue { + machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = laNodePreserveValue + update = true + } + if !update { + return nil + } + _, err := c.controlMachineClient.Machines(machine.Name).Update(ctx, machine, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error updating preserve annotations values on machine %q.", machine.Name) } - // if no backing node exists - return machineAnnotationValue, machine, nil + return err } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 27eace4e2..5265f8e2d 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2403,6 +2403,7 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach // Step 4: Update NodePreserved Condition on Node, with drain status _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) if drainErr != nil { + klog.Errorf("error draining preserved node %q for machine %q : %v", nodeName, machine.Name, drainErr) return drainErr } if err != nil { @@ -2487,7 +2488,7 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine machine.Status.CurrentStatus = preservedCurrentStatus updatedMachine, err := c.controlMachineClient.Machines(machine.Namespace).UpdateStatus(ctx, machine, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("machine/status UPDATE failed for machine %q. Retrying, error: %s", machine.Name, err) + klog.Errorf("error updating preserveExpiryTime on machine %q: %v", machine.Name, err) return nil, err } klog.V(2).Infof("Machine %q preserved till %v.", machine.Name, preservedCurrentStatus.PreserveExpiryTime) @@ -2553,6 +2554,9 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string nodeClone := node.DeepCopy() nodeClone.Spec.Unschedulable = false _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error uncordoning node %q: %v", nodeName, err) + } return err } diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 44fb479d2..c12c0d761 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4042,14 +4042,12 @@ var _ = Describe("machine_util", func() { } updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), tc.setup.nodeName, metav1.GetOptions{}) Expect(getErr).To(BeNil()) - if tc.expect.isCAAnnotationPresent { - Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal(autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue)) - } + Expect(updatedNode.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey]).To(Equal(autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue)) if tc.expect.preserveNodeCondition.Type != "" { updatedNodeCondition := nodeops.GetCondition(updatedNode, tc.expect.preserveNodeCondition.Type) Expect(updatedNodeCondition.Status).To(Equal(tc.expect.preserveNodeCondition.Status)) Expect(updatedNodeCondition.Reason).To(Equal(tc.expect.preserveNodeCondition.Reason)) - Expect(updatedNodeCondition.Message).To(Equal(tc.expect.preserveNodeCondition.Message)) + Expect(updatedNodeCondition.Message).To(ContainSubstring(tc.expect.preserveNodeCondition.Message)) } }, Entry("when preserve=now and there is no backing node", &testCase{ From cc4404aefa309c2a8530a931b9690ffb2812214d Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 16 Feb 2026 15:45:01 +0530 Subject: [PATCH 67/79] Update tests and handle edge cases --- pkg/controller/machineset.go | 15 +- pkg/controller/machineset_test.go | 100 ++++- .../provider/machinecontroller/machine.go | 112 +++--- .../machinecontroller/machine_test.go | 344 ++++++++++-------- .../machinecontroller/machine_util.go | 88 ++--- .../machinecontroller/machine_util_test.go | 156 +++++++- pkg/util/provider/machineutils/utils.go | 3 + pkg/util/provider/machineutils/utils_test.go | 95 ----- 8 files changed, 527 insertions(+), 386 deletions(-) delete mode 100644 pkg/util/provider/machineutils/utils_test.go diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 284eb58b4..905dba8e6 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -577,7 +577,7 @@ func (c *controller) reconcileClusterMachineSet(key string) error { // to Failed machines to trigger auto-preservation, if applicable. // We do not update machineSet.Status.AutoPreserveFailedMachineCount in the function, as it will be calculated // and updated in the succeeding calls to calculateMachineSetStatus() and updateMachineSetStatus() - filteredMachines = c.reconcileAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) + filteredMachines = c.manageAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) // TODO: Fix working of expectations to reflect correct behaviour // machineSetNeedsSync := c.expectations.SatisfiedExpectations(key) @@ -908,18 +908,18 @@ func shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { } } -// reconcileAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation +// manageAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation // to trigger preservation of the machines, by the machine controller, up to the limit defined in the // MachineSet's AutoPreserveFailedMachineMax field. -func (c *controller) reconcileAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) []*v1alpha1.Machine { +func (c *controller) manageAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) []*v1alpha1.Machine { autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount if autoPreservationCapacityRemaining == 0 { // no capacity remaining, nothing to do return machines } else if autoPreservationCapacityRemaining < 0 { // when autoPreserveFailedMachineMax is decreased, it can be negative. - numExceeding := c.stopAutoPreservationForMachines(ctx, machines, int(-autoPreservationCapacityRemaining)) - if numExceeding > 0 { - klog.V(2).Infof("Attempted to decrease count of auto-preserved machines, but there are still %d violations of AutoPreserveFailedMachineMax.", numExceeding) + numStillExceeding := c.stopAutoPreservationForMachines(ctx, machines, int(-autoPreservationCapacityRemaining)) + if numStillExceeding > 0 { + klog.V(2).Infof("Attempted to decrease count of auto-preserved machines, but there are still %d violations of AutoPreserveFailedMachineMax.", numStillExceeding) } return machines } @@ -948,12 +948,9 @@ func (c *controller) reconcileAutoPreservationOfFailedMachines(ctx context.Conte func (c *controller) stopAutoPreservationForMachines(ctx context.Context, machines []*v1alpha1.Machine, numToStop int) int { var autoPreservedMachines []*v1alpha1.Machine - var otherMachines []*v1alpha1.Machine for _, m := range machines { if m.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { autoPreservedMachines = append(autoPreservedMachines, m) - } else { - otherMachines = append(otherMachines, m) } } numOfAutoPreservedMachines := len(autoPreservedMachines) diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 5f2933a67..4a0ae4b2f 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -8,6 +8,7 @@ import ( "context" "errors" "fmt" + "sort" "sync" "time" @@ -1836,10 +1837,12 @@ var _ = Describe("machineset", func() { }) }) - Describe("#reconcileAutoPreservationOfFailedMachines", func() { + Describe("#manageAutoPreservationOfFailedMachines", func() { type setup struct { autoPreserveFailedMachineCount int32 autoPreserveFailedMachineMax int32 + additionalMachines []*machinev1.Machine + replicas int32 } type expect struct { preservedMachineCount int @@ -1849,7 +1852,7 @@ var _ = Describe("machineset", func() { expect expect } - DescribeTable("#reconcileAutoPreservationOfFailedMachines scenarios", func(tc testCase) { + DescribeTable("#manageAutoPreservationOfFailedMachines scenarios", func(tc testCase) { stop := make(chan struct{}) defer close(stop) testMachineSet := &machinev1.MachineSet{ @@ -1862,7 +1865,7 @@ var _ = Describe("machineset", func() { UID: "1234567", }, Spec: machinev1.MachineSetSpec{ - Replicas: 4, + Replicas: tc.setup.replicas, Template: machinev1.MachineTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ @@ -1930,12 +1933,15 @@ var _ = Describe("machineset", func() { } objects := []runtime.Object{} objects = append(objects, testMachineSet, testMachine1, testMachine2, testMachine3, testMachine4) + for _, m := range tc.setup.additionalMachines { + objects = append(objects, m) + } c, trackers := createController(stop, testNamespace, objects, nil, nil) defer trackers.Stop() waitForCacheSync(stop, c) - machinesList := []*machinev1.Machine{testMachine1, testMachine2} - - c.reconcileAutoPreservationOfFailedMachines(context.TODO(), machinesList, testMachineSet) + machinesList := []*machinev1.Machine{testMachine1, testMachine2, testMachine3, testMachine4} + machinesList = append(machinesList, tc.setup.additionalMachines...) + c.manageAutoPreservationOfFailedMachines(context.TODO(), machinesList, testMachineSet) waitForCacheSync(stop, c) updatedMachine1, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine1.Name, metav1.GetOptions{}) updatedMachine2, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), testMachine2.Name, metav1.GetOptions{}) @@ -1948,12 +1954,18 @@ var _ = Describe("machineset", func() { if updatedMachine2.Annotations != nil && updatedMachine2.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { preservedCount++ } - Expect(preservedCount).To(Equal(tc.expect.preservedMachineCount)) // Running machine should not be auto-preserved in any of the cases Expect(updatedMachine3.Annotations[machineutils.PreserveMachineAnnotationKey]).To(BeEmpty()) // Machine with explicit preserve annotation set to false should not be auto-preserved Expect(updatedMachine4.Annotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(machineutils.PreserveMachineAnnotationValueFalse)) + for _, m := range tc.setup.additionalMachines { + updatedMachine, _ := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), m.Name, metav1.GetOptions{}) + if updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey] == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + preservedCount++ + } + } + Expect(preservedCount).To(Equal(tc.expect.preservedMachineCount)) }, Entry("should trigger auto preservation of 1 failed machine if AutoPreserveFailedMachineMax is 1 and AutoPreserveFailedMachineCount is 0", testCase{ setup: setup{ @@ -2000,8 +2012,82 @@ var _ = Describe("machineset", func() { preservedMachineCount: 2, }, }), + Entry("should not trigger auto preservation of failed machine annotated with preserve=false even if AutoPreserveFailedMachineCount < AutoPreserveFailedMachineMax", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 0, + autoPreserveFailedMachineMax: 3, + }, + expect: expect{ + preservedMachineCount: 2, + }, + }), + Entry("should stop auto preservation of machines annotated with preserve=auto-preserve if AutoPreserveFailedMachineCount > AutoPreserveFailedMachineMax", testCase{ + setup: setup{ + autoPreserveFailedMachineCount: 1, + autoPreserveFailedMachineMax: 0, + additionalMachines: []*machinev1.Machine{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-5", + Namespace: testNamespace, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + }, + }, + Status: machinev1.MachineStatus{ + CurrentStatus: machinev1.CurrentStatus{ + Phase: MachineFailed, + PreserveExpiryTime: &metav1.Time{Time: time.Now().Add(1 * time.Hour)}, + }, + }, + }, + }, + }, + expect: expect{ + preservedMachineCount: 0, + }, + }), ) }) + Describe("#AutoPreservedMachinesSorting ", func() { + It("should sort auto-preserved failed machines in the order of increasing creation timestamp", func() { + machines := []*machinev1.Machine{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-1", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-2", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-4 * time.Hour)}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-3", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-3 * time.Hour)}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-4", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-5 * time.Hour)}, + }, + }, + } + sort.Sort(AutoPreservedMachines(machines)) + for index := range machines[:len(machines)-1] { + Expect(machines[index].CreationTimestamp.Time.Before(machines[index+1].CreationTimestamp.Time)).To(BeTrue()) + } + }) + }) + Describe("#shouldFailedMachineBeTerminated", func() { type setup struct { diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 293868ce3..1a4766e58 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -746,16 +746,16 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { Machine Preservation operations */ -// manageMachinePreservation manages machine preservation based on the preserve annotation value. +// manageMachinePreservation manages machine preservation based on the preserve annotation values on the node and machine objects. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { - machineAnnotationValue := machine.Annotations[machineutils.PreserveMachineAnnotationKey] - laNodePreserveValue := machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] + machineObjectUpdated := false clone := machine.DeepCopy() defer func() { - if err == nil { - err = c.updatePreserveAnnotations(ctx, clone, machineAnnotationValue, laNodePreserveValue) - if err == nil { - retry = machineutils.LongRetry + // this needs to be done for cases when machine is neither preserved nor un-preserved, but the LastAppliedNodePreserveValueAnnotation needs to be updated + if err == nil && !machineObjectUpdated && clone.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] { + _, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error updating LastAppliedNodePreserveValueAnnotation value on machine %q: %v", machine.Name, err) } } if err != nil { @@ -768,19 +768,20 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a retry = machineutils.LongRetry } }() + nodeName := clone.Labels[v1alpha1.NodeLabelKey] nodeAnnotationValue := "" if nodeName != "" { - nodeAnnotationValue, err = c.getNodeAnnotationValue(nodeName) + nodeAnnotationValue, err = c.getNodePreserveAnnotationValue(nodeName) if err != nil { return } } - effectivePreserveValue, laNodePreserveValue := c.computeEffectivePreserveValues(nodeAnnotationValue, laNodePreserveValue, machineAnnotationValue) + effectivePreserveValue := reconcilePreservationAnnotations(nodeAnnotationValue, clone.Annotations) // The annotation has either been deleted, set to empty or no preserve annotation exists. // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. if effectivePreserveValue == "" { - err = c.stopMachinePreservationIfPreserved(ctx, clone) + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) return } if !machineutils.AllowedPreserveAnnotationValues.Has(effectivePreserveValue) { @@ -789,31 +790,47 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } else { klog.Warningf("Preserve annotation value %q on machine %q is invalid", effectivePreserveValue, clone.Name) } + // invalid annotation value will be synced to lastAppliedNodePreserveValue in the defer() call. + // This is to prevent MCM from missing updates on the node object in case of crashes return } - // if preserve=false or if preservation has expired, stop preservation - if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueFalse || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { - err = c.stopMachinePreservationIfPreserved(ctx, clone) + if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueFalse { + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) return } - if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed || effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - if machineutils.IsMachineFailed(clone) { - err = c.preserveMachine(ctx, clone, effectivePreserveValue) + if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { + if !machineutils.IsMachineFailed(clone) || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) + // if not preserved, and lastAppliedNodePreserveValue is different from current preserve annotation value on node, + // the defer() call will update the lastAppliedNodePreserveValue } else { - err = c.stopMachinePreservationIfPreserved(ctx, clone) - if err != nil { - return - } - // To prevent erroneous re-preservation of a recovered, previously auto-preserved machine on future failures - // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), - // in addition to stopping preservation we also remove the preservation annotation on the machine. - // If the machine fails again, it can be preserved again based on the autoPreserveFailedMachineCount value at that time. - if effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - machineAnnotationValue = "" - } + machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + } + if err != nil { + return } } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueNow { - err = c.preserveMachine(ctx, clone, effectivePreserveValue) + if clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + // on timing out, remove preserve annotation to prevent incorrect re-preservation + delete(clone.Annotations, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, true) + } else { + machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + } + if err != nil { + return + } + } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { + if !machineutils.IsMachineFailed(clone) || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { + // To prevent incorrect re-preservation of a recovered, previously auto-preserved machine on future failures + // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), + // in addition to stopping preservation, we also remove the preservation annotation on the machine. + delete(clone.Annotations, machineutils.PreserveMachineAnnotationValuePreservedByMCM) + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) + } else { + machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + } if err != nil { return } @@ -826,15 +843,17 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } -// computeEffectivePreserveValue returns the effective preservation value, and the updated lastAppliedNodeAnnotationValue -func (c *controller) computeEffectivePreserveValues(nodeAnnotationValue, lastAppliedNodeAnnotationValue, machineAnnotationValue string) (string, string) { - if nodeAnnotationValue == "" && lastAppliedNodeAnnotationValue == "" { - return machineAnnotationValue, "" +// reconcilePreservationAnnotations returns the effective preservation value, and updates the machine Annotations related to preservation +func reconcilePreservationAnnotations(nodeAnnotationValue string, machineAnnotations map[string]string) string { + if nodeAnnotationValue == "" && machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "" { + return machineAnnotations[machineutils.PreserveMachineAnnotationKey] } - return nodeAnnotationValue, nodeAnnotationValue + delete(machineAnnotations, machineutils.PreserveMachineAnnotationKey) + machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = nodeAnnotationValue + return nodeAnnotationValue } -func (c *controller) getNodeAnnotationValue(nodeName string) (string, error) { +func (c *controller) getNodePreserveAnnotationValue(nodeName string) (string, error) { node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error fetching node %q: %v", nodeName, err) @@ -842,28 +861,3 @@ func (c *controller) getNodeAnnotationValue(nodeName string) (string, error) { } return node.Annotations[machineutils.PreserveMachineAnnotationKey], nil } - -// updatePreserveAnnotations updates the last applied node annotation value and the preservation annotation values on the machine -func (c *controller) updatePreserveAnnotations(ctx context.Context, machine *v1alpha1.Machine, machineAnnotationValue, laNodePreserveValue string) error { - update := false - if machineAnnotationValue == "" { - delete(machine.Annotations, machineutils.PreserveMachineAnnotationKey) - update = true - } else if machine.Annotations[machineutils.PreserveMachineAnnotationKey] != machineAnnotationValue { - machine.Annotations[machineutils.PreserveMachineAnnotationKey] = machineAnnotationValue - update = true - } - - if machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != laNodePreserveValue { - machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = laNodePreserveValue - update = true - } - if !update { - return nil - } - _, err := c.controlMachineClient.Machines(machine.Name).Update(ctx, machine, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error updating preserve annotations values on machine %q.", machine.Name) - } - return err -} diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index efdbdb936..53d57216e 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -3998,150 +3998,148 @@ var _ = Describe("machine", func() { }), ) }) - - Describe("#reconcilePreserveAnnotationValueForMachine", func() { + Describe("#reconcilePreservationAnnotations", func() { type setup struct { - machinePreserveAnnotation string - nodePreserveAnnotation string - nodeName string + nodeAnnotationValue string + machineAnnotations map[string]string } type expect struct { - preserveValue string - err error - deleteMachinePreserveAnnotation bool + effectivePreserveValue string + machineAnnotations map[string]string } + type testCase struct { setup setup expect expect } - DescribeTable("reconcilePreserveAnnotationValueForMachine behavior", + DescribeTable("reconcilePreservationAnnotations scenarios", func(tc testCase) { - - stop := make(chan struct{}) - defer close(stop) - - var controlMachineObjects []runtime.Object - var targetCoreObjects []runtime.Object - - // Build machine - machine := &v1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: testNamespace, - Name: "m1", - Labels: map[string]string{ - v1alpha1.NodeLabelKey: tc.setup.nodeName, - }, - Annotations: map[string]string{}, - }, - } - if tc.setup.machinePreserveAnnotation != "" { - machine.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.machinePreserveAnnotation - } - - controlMachineObjects = append(controlMachineObjects, machine) - // Build node - if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: tc.setup.nodeName, - Annotations: map[string]string{}, - }, - } - if tc.setup.nodePreserveAnnotation != "" { - node.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.nodePreserveAnnotation - } - targetCoreObjects = append(targetCoreObjects, node) - } - - c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) - defer trackers.Stop() - - waitForCacheSync(stop, c) - value, updatedMachine, err := c.reconcilePreserveAnnotationValueForMachine(context.TODO(), machine) - - if tc.expect.err != nil { - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal(tc.expect.err.Error())) - return - } - Expect(err).ToNot(HaveOccurred()) - Expect(value).To(Equal(tc.expect.preserveValue)) - machineValue, exists := updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey] - if tc.expect.deleteMachinePreserveAnnotation { - Expect(exists).To(Equal(false)) - } else if exists { - Expect(machineValue).To(Equal(tc.expect.preserveValue)) - } + preserveValue := reconcilePreservationAnnotations(tc.setup.nodeAnnotationValue, tc.setup.machineAnnotations) + Expect(preserveValue).To(Equal(tc.expect.effectivePreserveValue)) + Expect(tc.setup.machineAnnotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.PreserveMachineAnnotationKey])) + Expect(tc.setup.machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey])) }, - Entry("should return empty string if neither machine nor node has preserve annotation", testCase{ + Entry("when node is not annotated and laNodeAnnotationValue is empty, should return machine's annotation value and empty string", testCase{ setup: setup{ - nodeName: "node-1", + nodeAnnotationValue: "", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "A", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, expect: expect{ - preserveValue: "", - err: nil, + effectivePreserveValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "A", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, }), - Entry("should delete annotation value on node and return empty string if only machine has preserve annotation and backing node exists", testCase{ + Entry("when neither node nor machine is not annotated and laNodeAnnotationValue is empty, should return two empty strings", testCase{ setup: setup{ - machinePreserveAnnotation: "machineValue", - nodeName: "node-1", + nodeAnnotationValue: "", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, expect: expect{ - preserveValue: "", - err: nil, - deleteMachinePreserveAnnotation: true, + effectivePreserveValue: "", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, }), - Entry("should return node's annotation value if only node has preserve annotation", testCase{ + Entry("when neither node nor machine is annotated and laNodeAnnotationValue is \"A\", should return two empty strings", testCase{ setup: setup{ - nodePreserveAnnotation: "nodeValue", - nodeName: "node-1", + nodeAnnotationValue: "", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, }, expect: expect{ - preserveValue: "nodeValue", - err: nil, + effectivePreserveValue: "", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, }), - Entry("should return node's annotation value and delete machine's annotation value if both machine and node have preserve annotation", testCase{ + Entry("when node is annotated, laNodeAnnotationValue is empty, and machine is not annotated, should return node's annotation value as effective value and last applied value", testCase{ setup: setup{ - machinePreserveAnnotation: "machineValue", - nodePreserveAnnotation: "nodeValue", - nodeName: "node-1", + nodeAnnotationValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, expect: expect{ - preserveValue: "nodeValue", - err: nil, - deleteMachinePreserveAnnotation: true, + effectivePreserveValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, }, }), - Entry("should return an error if machine has node label but node object is not found", testCase{ + Entry("when node is annotated, laNodeAnnotationValue is empty, and machine is annotated differently, should return node's annotation value as effective value and last applied value", testCase{ setup: setup{ - machinePreserveAnnotation: "", - nodeName: "invalid", + nodeAnnotationValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "B", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "", + }, }, expect: expect{ - preserveValue: "", - err: fmt.Errorf("node %q not found", "invalid"), + effectivePreserveValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, }, }), - Entry("should return machine's annotation value if backing node does not exist", testCase{ + Entry("when node, machine annotation values and laNodeAnnotationValue are the same, should return node's annotation value as effective value and last applied value", testCase{ setup: setup{ - machinePreserveAnnotation: "machineValue", + nodeAnnotationValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "A", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, }, expect: expect{ - preserveValue: "machineValue", - err: nil, + effectivePreserveValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, + }, + }), + Entry("when node, machine annotation values are the same and laNodeAnnotationValue differs, should return node's annotation value as effective value and last applied value", testCase{ + setup: setup{ + nodeAnnotationValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "A", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "B", + }, + }, + expect: expect{ + effectivePreserveValue: "A", + machineAnnotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: "", + machineutils.LastAppliedNodePreserveValueAnnotationKey: "A", + }, }, }), ) - }) + }) Describe("#manageMachinePreservation", func() { type setup struct { machineAnnotationValue string nodeAnnotationValue string + laNodePreserveValue string nodeName string machinePhase v1alpha1.MachinePhase preserveExpiryTime *metav1.Time @@ -4150,6 +4148,8 @@ var _ = Describe("machine", func() { retry machineutils.RetryPeriod preserveExpiryTimeIsSet bool nodeCondition *corev1.NodeCondition + machineAnnotationValue string + laNodePreserveValue string err error } type testCase struct { @@ -4174,7 +4174,10 @@ var _ = Describe("machine", func() { Labels: map[string]string{ v1alpha1.NodeLabelKey: tc.setup.nodeName, }, - Annotations: map[string]string{}, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: tc.setup.machineAnnotationValue, + machineutils.LastAppliedNodePreserveValueAnnotationKey: tc.setup.laNodePreserveValue, + }, }, Status: v1alpha1.MachineStatus{ CurrentStatus: v1alpha1.CurrentStatus{ Phase: tc.setup.machinePhase, @@ -4183,23 +4186,20 @@ var _ = Describe("machine", func() { }, }, } - if tc.setup.machineAnnotationValue != "" { - machine.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.machineAnnotationValue - } + controlMachineObjects = append(controlMachineObjects, machine) if tc.setup.nodeName != "" && tc.setup.nodeName != "invalid" { node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: tc.setup.nodeName, - Annotations: map[string]string{}, + Name: tc.setup.nodeName, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: tc.setup.nodeAnnotationValue, + }, }, Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{}, }, } - if tc.setup.nodeAnnotationValue != "" { - node.Annotations[machineutils.PreserveMachineAnnotationKey] = tc.setup.nodeAnnotationValue - } targetCoreObjects = append(targetCoreObjects, node) } c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) @@ -4214,6 +4214,7 @@ var _ = Describe("machine", func() { return } Expect(err).ToNot(HaveOccurred()) + waitForCacheSync(stop, c) updatedMachine, err := c.controlMachineClient.Machines(testNamespace).Get(context.TODO(), machine.Name, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) if tc.expect.preserveExpiryTimeIsSet { @@ -4240,8 +4241,10 @@ var _ = Describe("machine", func() { Expect(found).To(BeFalse()) } } + Expect(updatedMachine.Annotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(tc.expect.machineAnnotationValue)) + Expect(updatedMachine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey]).To(Equal(tc.expect.laNodePreserveValue)) }, - Entry("no preserve annotation on machine and node", testCase{ + Entry("when no preserve annotation is set on machine and node, should return LongRetry", testCase{ setup: setup{ nodeName: "node-1", }, @@ -4251,13 +4254,14 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'now' added to node of Running machine", testCase{ + Entry("when preserve annotation 'now' is added to node of Running machine, should successfully start preservation", testCase{ setup: setup{ nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, nodeName: "node-1", machinePhase: v1alpha1.MachineRunning, }, expect: expect{ + laNodePreserveValue: machineutils.PreserveMachineAnnotationValueNow, preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, @@ -4265,25 +4269,27 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'when-failed' added to node of Running machine", testCase{ + Entry("when preserve annotation 'when-failed' added to node of Running machine, should not start preservation", testCase{ setup: setup{ nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, nodeName: "node-1", machinePhase: v1alpha1.MachineRunning, }, expect: expect{ + laNodePreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, preserveExpiryTimeIsSet: false, nodeCondition: nil, retry: machineutils.LongRetry, }, }), - Entry("node of Failed machine annotated with when-failed", testCase{ + Entry("when node of Failed machine is annotated with `when-failed`, should start preservation", testCase{ setup: setup{ nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, nodeName: "node-1", machinePhase: v1alpha1.MachineFailed, }, expect: expect{ + laNodePreserveValue: machineutils.PreserveMachineAnnotationValueWhenFailed, preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, @@ -4291,65 +4297,41 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), - Entry("preserve annotation 'now' added on Healthy node ", testCase{ - setup: setup{ - nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, - }, - expect: expect{ - preserveExpiryTimeIsSet: true, - nodeCondition: &corev1.NodeCondition{ - Type: v1alpha1.NodePreserved, - Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, - }, - }), - Entry("preserve annotation 'when-failed' added on Healthy node ", testCase{ + Entry("when node of preserved machine is annotated with preserve value 'false', should stop preservation", testCase{ setup: setup{ - nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, - }, - expect: expect{ - preserveExpiryTimeIsSet: false, - nodeCondition: nil, - retry: machineutils.LongRetry, - }}), - Entry("preserve annotation 'false' added on backing node of preserved machine", testCase{ - setup: setup{ - nodeAnnotationValue: "false", + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueFalse, nodeName: "node-1", machinePhase: v1alpha1.MachineRunning, preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, }, expect: expect{ + laNodePreserveValue: machineutils.PreserveMachineAnnotationValueFalse, preserveExpiryTimeIsSet: false, nodeCondition: nil, retry: machineutils.LongRetry, }, }), - Entry("node annotated for auto-preservation by MCM", testCase{ + Entry("when machine is annotated for auto-preservation by MCM, should start preservation", testCase{ setup: setup{ - nodeAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, - nodeName: "node-1", - machinePhase: v1alpha1.MachineFailed, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeName: "node-1", + machinePhase: v1alpha1.MachineFailed, }, expect: expect{ preserveExpiryTimeIsSet: true, nodeCondition: &corev1.NodeCondition{ Type: v1alpha1.NodePreserved, Status: corev1.ConditionTrue}, - retry: machineutils.LongRetry, + retry: machineutils.LongRetry, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, }, }), - Entry("preservation timed out", testCase{ + Entry("when preservation times out, should stop preservation", testCase{ setup: setup{ - machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, - nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, - preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, }, expect: expect{ preserveExpiryTimeIsSet: false, @@ -4357,21 +4339,21 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), - Entry("invalid preserve annotation on node of un-preserved machine", testCase{ + Entry("when invalid preserve annotation is added on node of un-preserved machine, should do nothing ", testCase{ setup: setup{ - machineAnnotationValue: "", - nodeAnnotationValue: "invalidValue", - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, + nodeAnnotationValue: "invalidValue", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, }, expect: expect{ + laNodePreserveValue: "invalidValue", preserveExpiryTimeIsSet: false, nodeCondition: nil, retry: machineutils.LongRetry, err: nil, }, }), - Entry("machine annotated with preserve=now, but has no backing node", testCase{ + Entry("when a machine is annotated with preserve=now, but has no backing node, should start preservation", testCase{ setup: setup{ machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, nodeAnnotationValue: "", @@ -4382,10 +4364,11 @@ var _ = Describe("machine", func() { preserveExpiryTimeIsSet: true, nodeCondition: nil, retry: machineutils.LongRetry, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, err: nil, }, }), - Entry("machine with backing node, but node retrieval fails", testCase{ + Entry("when a machine has a backing node, but node retrieval fails", testCase{ setup: setup{ machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, nodeAnnotationValue: "", @@ -4395,19 +4378,60 @@ var _ = Describe("machine", func() { expect: expect{ preserveExpiryTimeIsSet: false, nodeCondition: nil, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, retry: machineutils.ShortRetry, err: fmt.Errorf("node %q not found", "invalid"), }, }), - Entry("node annotated with auto-preserved and in Running phase after recovery from failure", testCase{ + Entry("when auto-preserved machine moves to Running, should stop preservation and remove auto-preserve annotation", testCase{ setup: setup{ - nodeAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, - nodeName: "node-1", - machinePhase: v1alpha1.MachineRunning, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, }, expect: expect{ preserveExpiryTimeIsSet: false, - nodeCondition: nil, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + retry: machineutils.LongRetry, + err: nil, + }, + }), + Entry("when node is annotated with 'now' and machine is annotated with 'when-failed', should start preservation and remove annotation from machine", testCase{ + setup: setup{ + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + laNodePreserveValue: "", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + }, + expect: expect{ + laNodePreserveValue: machineutils.PreserveMachineAnnotationValueNow, + preserveExpiryTimeIsSet: true, + nodeCondition: &corev1.NodeCondition{ + Type: v1alpha1.NodePreserved, + Status: corev1.ConditionTrue}, + machineAnnotationValue: "", + retry: machineutils.LongRetry, + err: nil, + }, + }), + // case possible when MCM goes down and node annotation value is cleared and machine is annotated + Entry("when node and machine are found to be annotated with \"\", and 'now', respectively and last applied node perserve value is 'now', should stop preservation", testCase{ + setup: setup{ + nodeAnnotationValue: "", + laNodePreserveValue: "now", + machineAnnotationValue: "now", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + machineAnnotationValue: "", + laNodePreserveValue: "", retry: machineutils.LongRetry, err: nil, }, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 5265f8e2d..c0535f51e 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -29,7 +29,6 @@ import ( "errors" "fmt" "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" - "github.com/gardener/machine-controller-manager/pkg/util/annotations" "maps" "math" "runtime" @@ -2359,39 +2358,41 @@ Utility Functions for Machine Preservation */ // preserveMachine contains logic to start the preservation of a machine and node. -func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) error { +func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Machine, preserveValue string) (bool, error) { var err error + machineObjectUpdated := false nodeName := machine.Labels[v1alpha1.NodeLabelKey] if machine.Status.CurrentStatus.PreserveExpiryTime == nil { klog.V(4).Infof("Starting preservation flow for machine %q.", machine.Name) // Step 1: Add preserveExpiryTime to machine status machine, err = c.setPreserveExpiryTimeOnMachine(ctx, machine) if err != nil { - return err + return machineObjectUpdated, err } + machineObjectUpdated = true } if nodeName == "" { // Machine has no backing node, preservation is complete klog.V(2).Infof("Machine %q without backing node is preserved successfully till %v.", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return nil + return machineObjectUpdated, nil } // Machine has a backing node node, err := c.nodeLister.Get(nodeName) if err != nil { klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return err + return machineObjectUpdated, err } existingNodePreservedCondition := nodeops.GetCondition(node, v1alpha1.NodePreserved) // checks if preservation is already complete if existingNodePreservedCondition != nil && existingNodePreservedCondition.Status == v1.ConditionTrue { - return nil + return machineObjectUpdated, nil } // Preservation incomplete - either the flow is being run for the first time, or previous attempt failed midway // Step 2: Add annotations to prevent scale down of node by CA updatedNode, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { - return err + return machineObjectUpdated, err } var drainErr error if shouldPreservedNodeBeDrained(existingNodePreservedCondition, machine.Status.CurrentStatus.Phase) { @@ -2404,33 +2405,33 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach _, err = nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, updatedNode.Name, *newCond) if drainErr != nil { klog.Errorf("error draining preserved node %q for machine %q : %v", nodeName, machine.Name, drainErr) - return drainErr + return machineObjectUpdated, drainErr } if err != nil { klog.Errorf("error trying to update node preserved condition for node %q of machine %q : %v", nodeName, machine.Name, err) - return err + return machineObjectUpdated, err } } klog.V(2).Infof("Machine %q and backing node preserved successfully till %v.", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) - return nil + return machineObjectUpdated, nil } -// stopMachinePreservationIfPreserved stops the preservation of the machine and node -func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine) error { +// stopMachinePreservationIfPreserved stops the preservation of the machine and node, if preserved, and returns true if machine object has been updated +func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, removePreservationAnnotations bool) (bool, error) { // removal of preserveExpiryTime is the last step of stopping preservation // therefore, if preserveExpiryTime is not set, machine is not preserved nodeName := machine.Labels[v1alpha1.NodeLabelKey] if machine.Status.CurrentStatus.PreserveExpiryTime == nil { - return nil + return false, nil } // if there is no backing node, then preservation can be stopped by just removing preserveExpiryTime from machine status if nodeName == "" { err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { - return err + return false, err } klog.V(2).Infof("Preservation of machine %q with no backing node has stopped.", machine.Name) - return nil + return true, nil } // Machine has a backing node @@ -2443,13 +2444,13 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac klog.Warningf("Node %q of machine %q not found. Proceeding to clear PreserveExpiryTime on machine.", nodeName, machine.Name) err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { - return err + return false, err } klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) - return nil + return true, nil } klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return err + return false, err } // prepare NodeCondition to set preservation as stopped preservedConditionFalse := v1.NodeCondition{ @@ -2461,20 +2462,20 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac // Step 1: change node condition to reflect that preservation has stopped updatedNode, err := nodeops.AddOrUpdateConditionsOnNode(ctx, c.targetCoreClient, node.Name, preservedConditionFalse) if err != nil { - return err + return false, err } - // Step 2: remove CA scale down disabled annotation from node - err = c.removeCAScaleDownDisabledAnnotationOnNode(ctx, updatedNode) + // Step 2: remove annotations from node + err = c.removePreservationRelatedAnnotationsOnNode(ctx, updatedNode, removePreservationAnnotations) if err != nil { - return err + return false, err } // Step 3: update machine status to set preserve expiry time to nil err = c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { - return err + return false, err } klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) - return nil + return true, nil } // setPreserveExpiryTimeOnMachine sets the PreserveExpiryTime on the machine object's Status.CurrentStatus to now + preserve timeout @@ -2503,17 +2504,10 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, } // Add annotation to disable CA scale down. // Also add annotation expressing that MCM is the one who added this annotation, so that it can be removed safely when preservation is stopped. - CAScaleDownAnnotation := map[string]string{ - autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, - autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue, - } nodeCopy := node.DeepCopy() - updatedNode, _, err := annotations.AddOrUpdateAnnotation(nodeCopy, CAScaleDownAnnotation) - if err != nil { - klog.Errorf("error trying to add CA annotation on node %q: %v", node.Name, err) - return nil, err - } - updatedNode, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, updatedNode, metav1.UpdateOptions{}) + nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error trying to update CA annotation on node %q: %v", node.Name, err) return nil, err @@ -2521,24 +2515,32 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, return updatedNode, nil } -// removeCAScaleDownDisabledAnnotationOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node -func (c *controller) removeCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) error { +// removePreserveAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node +func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { // Check if annotation already absent - if node.Annotations == nil || - node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == "" { + if node.Annotations == nil { return nil } + updateRequired := false nodeCopy := node.DeepCopy() // If CA scale-down disabled annotation was added by MCM, it can be safely removed. // If the annotation was added by some other entity, then it should not be removed. if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue { delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) - _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) - return err - } + updateRequired = true + } + if removePreserveAnnotation { + delete(nodeCopy.Annotations, machineutils.PreserveMachineAnnotationKey) + updateRequired = true + } + if !updateRequired { + return nil + } + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) + return err } return nil } diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index c12c0d761..86b7ec5fa 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4023,7 +4023,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.preserveMachine(context.TODO(), machine, tc.setup.preserveValue) + _, err := c.preserveMachine(context.TODO(), machine, tc.setup.preserveValue) if tc.expect.err == nil { Expect(err).To(BeNil()) } else { @@ -4211,7 +4211,8 @@ var _ = Describe("machine_util", func() { }) Describe("#stopMachinePreservationIfPreserved", func() { type setup struct { - nodeName string + nodeName string + removePreserveAnnotation bool } type expect struct { err error @@ -4249,6 +4250,7 @@ var _ = Describe("machine_util", func() { ObjectMeta: metav1.ObjectMeta{ Name: "node-1", Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey: autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue, }, }, @@ -4273,7 +4275,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.stopMachinePreservationIfPreserved(context.TODO(), machine) + _, err := c.stopMachinePreservationIfPreserved(context.TODO(), machine, tc.setup.removePreserveAnnotation) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) @@ -4293,34 +4295,42 @@ var _ = Describe("machine_util", func() { Expect(updatedNodeCondition).ToNot(BeNil()) Expect(updatedNodeCondition.Status).To(Equal(corev1.ConditionFalse)) Expect(updatedNodeCondition.Reason).To(Equal(machinev1.PreservationStopped)) + if tc.setup.removePreserveAnnotation { + Expect(updatedNode.Annotations).NotTo(HaveKey(machineutils.PreserveMachineAnnotationKey)) + } else { + Expect(updatedNode.Annotations).To(HaveKey(machineutils.PreserveMachineAnnotationKey)) + } + }, - Entry("when stopping preservation on a preserved machine with backing node", &testCase{ + Entry("when stopping preservation on a preserved machine with backing node and preserve annotation needs to be removed", &testCase{ setup: setup{ - nodeName: "node-1", + nodeName: "node-1", + removePreserveAnnotation: true, }, expect: expect{ err: nil, }, }), - Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ + Entry("when stopping preservation on a preserved machine with backing node and preserve annotation shouldn't be removed", &testCase{ setup: setup{ - nodeName: "", + nodeName: "node-1", + removePreserveAnnotation: false, }, expect: expect{ err: nil, }, }), - Entry("when stopping preservation on a preserved machine, and the backing node is not found", &testCase{ + Entry("when stopping preservation on a preserved machine with no backing node", &testCase{ setup: setup{ - nodeName: "no-backing-node", + nodeName: "", }, expect: expect{ err: nil, }, }), - Entry("when stopping preservation on a preserved machine, but retaining CA annotation", &testCase{ + Entry("when stopping preservation on a preserved machine, and the backing node is not found", &testCase{ setup: setup{ - nodeName: "node-1", + nodeName: "no-backing-node", }, expect: expect{ err: nil, @@ -4551,7 +4561,7 @@ var _ = Describe("machine_util", func() { shouldDrain: false, }, }), - Entry("should return true when machine is Failed and no existing condition", &testCase{ + Entry("should return true when machine is Failed and there is no existing node condition", &testCase{ setup: setup{ machinePhase: machinev1.MachineFailed, }, @@ -4559,7 +4569,7 @@ var _ = Describe("machine_util", func() { shouldDrain: true, }, }), - Entry("should return true when machine is Failed and existing condition message is PreservedNodeDrainUnsuccessful", &testCase{ + Entry("should return true when machine is Failed and existing node condition message is PreservedNodeDrainUnsuccessful", &testCase{ setup: setup{ machinePhase: machinev1.MachineFailed, existingCondition: &corev1.NodeCondition{ @@ -4575,4 +4585,124 @@ var _ = Describe("machine_util", func() { }), ) }) + Describe("#removePreservationRelatedAnnotationsOnNode", func() { + type setup struct { + removePreserveAnnotation bool + CAAnnotationPresent bool + CAMCMAnnotationPresent bool + } + type expect struct { + err error + hasAnnotationKeys []string + deletedAnnotationKeys []string + } + type testCase struct { + setup setup + expect expect + } + DescribeTable("##removePreservationRelatedAnnotationsOnNode behaviour scenarios", + func(tc *testCase) { + stop := make(chan struct{}) + defer close(stop) + var targetCoreObjects []runtime.Object + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueNow, + }, + }, + } + if tc.setup.CAAnnotationPresent { + node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + } + if tc.setup.CAMCMAnnotationPresent { + node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + } + targetCoreObjects = append(targetCoreObjects, node) + c, trackers := createController(stop, testNamespace, nil, nil, targetCoreObjects, nil, false) + defer trackers.Stop() + waitForCacheSync(stop, c) + err := c.removePreservationRelatedAnnotationsOnNode(context.TODO(), node, tc.setup.removePreserveAnnotation) + waitForCacheSync(stop, c) + updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + Expect(getErr).To(BeNil()) + if tc.expect.err != nil { + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(Equal(tc.expect.err.Error())) + } else { + Expect(err).To(BeNil()) + } + for _, key := range tc.expect.hasAnnotationKeys { + Expect(updatedNode.Annotations).To(HaveKey(key)) + } + for key := range tc.expect.deletedAnnotationKeys { + Expect(updatedNode.Annotations).NotTo(HaveKey(key)) + } + }, + Entry("when removePreserveAnnotation is true and ClusterAutoscalerScaleDownDisabledAnnotationByMCM annotation is present, should delete all preservation related annotations", &testCase{ + setup: setup{ + removePreserveAnnotation: true, + CAAnnotationPresent: true, + CAMCMAnnotationPresent: true, + }, + expect: expect{ + err: nil, + hasAnnotationKeys: []string{}, + deletedAnnotationKeys: []string{ + machineutils.PreserveMachineAnnotationKey, + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey, + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey, + }, + }, + }), + Entry("when removePreserveAnnotation is false and ClusterAutoscalerScaleDownDisabledAnnotationByMCM annotation is present, should delete only CA annotations ", &testCase{ + setup: setup{ + removePreserveAnnotation: false, + CAAnnotationPresent: true, + CAMCMAnnotationPresent: false, + }, + expect: expect{ + err: nil, + hasAnnotationKeys: []string{ + machineutils.PreserveMachineAnnotationKey, + }, + deletedAnnotationKeys: []string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey, + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey, + }, + }, + }), + Entry("when removePreserveAnnotation is true and ClusterAutoscalerScaleDownDisabledAnnotationByMCM is not present, should delete only preserve annotation", &testCase{ + setup: setup{ + removePreserveAnnotation: true, + CAAnnotationPresent: true, + CAMCMAnnotationPresent: false, + }, + expect: expect{ + err: nil, + hasAnnotationKeys: []string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey, + }, + deletedAnnotationKeys: []string{ + machineutils.PreserveMachineAnnotationKey, + }, + }, + }), + Entry("when removePreserveAnnotation is false and ClusterAutoscalerScaleDownDisabledAnnotationByMCM is not present, should not delete any annotations", &testCase{ + setup: setup{ + removePreserveAnnotation: false, + CAAnnotationPresent: true, + CAMCMAnnotationPresent: false, + }, + expect: expect{ + err: nil, + hasAnnotationKeys: []string{ + autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey, + machineutils.PreserveMachineAnnotationKey, + }, + }, + }), + ) + }) }) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 10ca5ea98..723756406 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -91,6 +91,9 @@ const ( // PreserveMachineAnnotationKey is the annotation used to explicitly request that a Machine be preserved PreserveMachineAnnotationKey = "node.machine.sapcloud.io/preserve" + // LastAppliedNodePreserveValueAnnotationKey is the annotation used to store the last preserve value applied by MCM + LastAppliedNodePreserveValueAnnotationKey = "node.machine.sapcloud.io/last-applied-node-preserve-value" + // PreserveMachineAnnotationValueNow is the annotation value used to explicitly request that // a Machine be preserved immediately in its current phase PreserveMachineAnnotationValueNow = "now" diff --git a/pkg/util/provider/machineutils/utils_test.go b/pkg/util/provider/machineutils/utils_test.go deleted file mode 100644 index 7efa3c740..000000000 --- a/pkg/util/provider/machineutils/utils_test.go +++ /dev/null @@ -1,95 +0,0 @@ -package machineutils - -import ( - "flag" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/klog/v2" - "testing" -) - -func TestMachineUtilsSuite(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "Machine Utils Suite") -} - -var _ = BeforeSuite(func() { - klog.SetOutput(GinkgoWriter) - //for filtering out warning logs. Reflector short watch warning logs won't print now - klog.LogToStderr(false) - flags := &flag.FlagSet{} - klog.InitFlags(flags) - Expect(flags.Set("v", "10")).To(Succeed()) - - DeferCleanup(klog.Flush) -}) - -var _ = Describe("utils.go", func() { - Describe("#PreserveAnnotationsChanged", func() { - type setup struct { - oldAnnotations map[string]string - newAnnotations map[string]string - } - type expect struct { - result bool - } - type testCase struct { - setup setup - expect expect - } - DescribeTable("PreserveAnnotationsChanged test cases", func(tc testCase) { - - result := PreserveAnnotationsChanged(tc.setup.oldAnnotations, tc.setup.newAnnotations) - Expect(result).To(Equal(tc.expect.result)) - }, - Entry("should return true if preserve annotation added for the first time", testCase{ - setup: setup{ - oldAnnotations: map[string]string{}, - newAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, - }, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return true if preserve annotation is removed", testCase{ - setup: setup{ - oldAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, - }, - newAnnotations: map[string]string{}, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return true if preserve annotation value is changed", testCase{ - setup: setup{ - oldAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, - }, - newAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueWhenFailed, - }, - }, - expect: expect{ - result: true, - }, - }), - Entry("should return false if preserve annotation is unchanged", testCase{ - setup: setup{ - oldAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, - }, - newAnnotations: map[string]string{ - PreserveMachineAnnotationKey: PreserveMachineAnnotationValueNow, - }, - }, - expect: expect{ - result: false, - }, - }), - ) - }) -}) From bcfc56bdb098d98a2e52016566592c373ed889b2 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 18 Feb 2026 09:54:38 +0530 Subject: [PATCH 68/79] Fix bugs introduced by latest changes --- pkg/apis/machine/v1alpha1/machine_types.go | 10 +- pkg/controller/deployment_rolling.go | 5 +- pkg/controller/machineset.go | 34 +++++- pkg/controller/machineset_test.go | 104 +++++++++++++++--- .../provider/machinecontroller/machine.go | 5 +- .../machinecontroller/machine_test.go | 47 +++++++- .../machinecontroller/machine_util.go | 72 +++++------- .../machinecontroller/machine_util_test.go | 26 ++--- 8 files changed, 216 insertions(+), 87 deletions(-) diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go index b55057874..13db2a7d3 100644 --- a/pkg/apis/machine/v1alpha1/machine_types.go +++ b/pkg/apis/machine/v1alpha1/machine_types.go @@ -249,19 +249,19 @@ const ( NodePreserved corev1.NodeConditionType = "Preserved" // PreservedByMCM is a node condition reason for preservation of machines to indicate that the node is auto-preserved by MCM - PreservedByMCM string = "Preserved by MCM" + PreservedByMCM string = "Preserved by MCM." // PreservedByUser is a node condition reason to indicate that a machine/node has been preserved due to explicit annotation by user - PreservedByUser string = "Preserved by user" + PreservedByUser string = "Preserved by user." // PreservationStopped is a node condition reason to indicate that a machine/node preservation has been stopped due to annotation update or timeout - PreservationStopped string = "Preservation stopped" + PreservationStopped string = "Preservation stopped." // PreservedNodeDrainSuccessful is a constant for the message in condition that indicates that the preserved node's drain is successful - PreservedNodeDrainSuccessful string = "Preserved node drained successfully" + PreservedNodeDrainSuccessful string = "Preserved node drained successfully." // PreservedNodeDrainUnsuccessful is a constant for the message in condition that indicates that the preserved node's drain was not successful - PreservedNodeDrainUnsuccessful string = "Preserved node could not be drained" + PreservedNodeDrainUnsuccessful string = "Preserved node could not be drained." ) // CurrentStatus contains information about the current status of Machine. diff --git a/pkg/controller/deployment_rolling.go b/pkg/controller/deployment_rolling.go index 033be6c31..325f93193 100644 --- a/pkg/controller/deployment_rolling.go +++ b/pkg/controller/deployment_rolling.go @@ -480,10 +480,13 @@ func (dc *controller) removeAutoscalerAnnotationsIfRequired(ctx context.Context, klog.Warningf("Get annotations failed for node: %s, %s", machine.Labels[v1alpha1.NodeLabelKey], err) return err } - // Remove the autoscaler-related annotation only if the by-mcm annotation is already set. If // by-mcm annotation is not set, the original annotation is likely be put by the end-user for their usecases. if _, exists := nodeAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey]; exists { + // do not remove the autoscaler related annotation if it is added due to ongoing machine preservation. + if !machine.Status.CurrentStatus.PreserveExpiryTime.IsZero() { + return nil + } err = RemoveAnnotationsOffNode( ctx, dc.targetCoreClient, diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 905dba8e6..c4a9825fa 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -339,7 +339,7 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 if machineutils.IsMachineTriggeredForDeletion(m) { staleMachines = append(staleMachines, m) } else if machineutils.IsMachineFailed(m) { - if shouldFailedMachineBeTerminated(m) { + if c.shouldFailedMachineBeTerminated(m) { staleMachines = append(staleMachines, m) } else { activeMachines = append(activeMachines, m) @@ -886,7 +886,7 @@ func isMachineStatusEqual(s1, s2 v1alpha1.MachineStatus) bool { // shouldFailedMachineBeTerminated checks if the failed machine is already preserved, in the process of being preserved // or if it is a candidate for auto-preservation. If none of these conditions are met, it returns true indicating // that the failed machine should be terminated. -func shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { +func (c *controller) shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { // if preserve expiry time is set and is in the future, machine is already preserved if machine.Status.CurrentStatus.PreserveExpiryTime != nil { if machine.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { @@ -896,9 +896,13 @@ func shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) bool { klog.V(3).Infof("Preservation of failed machine %q has timed out at %v", machine.Name, machine.Status.CurrentStatus.PreserveExpiryTime) return true } - // if the machine preservation is not complete yet even though the machine is annotated, prevent termination - // so that preservation can complete - switch machine.Annotations[machineutils.PreserveMachineAnnotationKey] { + preserveValue, err := c.findEffectivePreserveValue(machine) + if err != nil { + // in case of error fetching node or annotations, we don't want to block deletion of failed machines, so we return true + klog.V(2).Infof("Error finding effective preserve value for machine %q: %v. Proceeding with termination of the machine.", machine.Name, err) + return true + } + switch preserveValue { case machineutils.PreserveMachineAnnotationValueWhenFailed, machineutils.PreserveMachineAnnotationValueNow, machineutils.PreserveMachineAnnotationValuePreservedByMCM: // this is in case preservation process is not complete yet return false case machineutils.PreserveMachineAnnotationValueFalse: @@ -987,3 +991,23 @@ func removeAutoPreserveAnnotationFromMachine(machineToUpdate *v1alpha1.Machine) delete(machineToUpdate.Annotations, machineutils.PreserveMachineAnnotationKey) return nil } + +func (c *controller) findEffectivePreserveValue(machine *v1alpha1.Machine) (string, error) { + var nodeAnnotationValue, machineAnnotationValue, lANodeAnnotationValue string + machineAnnotationValue = machine.Annotations[machineutils.PreserveMachineAnnotationKey] + lANodeAnnotationValue = machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] + nodeName := machine.Labels[v1alpha1.NodeLabelKey] + if nodeName != "" { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("Error fetching node %q for machine %q: %v", nodeName, machine.Name, err) + return "", err + } + nodeAnnotationValue = node.Annotations[machineutils.PreserveMachineAnnotationKey] + } + if nodeAnnotationValue == "" && lANodeAnnotationValue == "" { + return machineAnnotationValue, nil + } else { + return nodeAnnotationValue, nil + } +} diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 4a0ae4b2f..fe681a9db 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -8,6 +8,7 @@ import ( "context" "errors" "fmt" + corev1 "k8s.io/api/core/v1" "sort" "sync" "time" @@ -2089,10 +2090,12 @@ var _ = Describe("machineset", func() { }) Describe("#shouldFailedMachineBeTerminated", func() { - type setup struct { - preserveExpiryTime *metav1.Time - annotationValue string + preserveExpiryTime *metav1.Time + nodeName string + nodeAnnotationValue string + machineAnnotationValue string + laNodeAnnotationValue string } type expect struct { result bool @@ -2101,13 +2104,24 @@ var _ = Describe("machineset", func() { setup setup expect expect } + DescribeTable("shouldFailedMachineBeTerminated test cases", func(tc testCase) { + stop := make(chan struct{}) + defer close(stop) + + var controlMachineObjects []runtime.Object + var targetCoreObjects []runtime.Object + machine := machinev1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "test-machine", Namespace: "default", Annotations: map[string]string{ - machineutils.PreserveMachineAnnotationKey: tc.setup.annotationValue, + machineutils.PreserveMachineAnnotationKey: tc.setup.machineAnnotationValue, + machineutils.LastAppliedNodePreserveValueAnnotationKey: tc.setup.laNodeAnnotationValue, + }, + Labels: map[string]string{ + machinev1.NodeLabelKey: tc.setup.nodeName, }, }, Status: machinev1.MachineStatus{ @@ -2117,13 +2131,33 @@ var _ = Describe("machineset", func() { }, }, } - result := shouldFailedMachineBeTerminated(&machine) + controlMachineObjects = append(controlMachineObjects, &machine) + if tc.setup.nodeName != "" { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: tc.setup.nodeName, + Annotations: map[string]string{ + machineutils.PreserveMachineAnnotationKey: tc.setup.nodeAnnotationValue, + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{}, + }, + } + targetCoreObjects = append(targetCoreObjects, node) + } + c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects) + defer trackers.Stop() + waitForCacheSync(stop, c) + result := c.shouldFailedMachineBeTerminated(&machine) + Expect(result).To(Equal(tc.expect.result)) }, Entry("should return false if preserve expiry time is in the future", testCase{ setup: setup{ - preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, - annotationValue: machineutils.PreserveMachineAnnotationValueNow, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(1 * time.Hour)}, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "test-node", }, expect: expect{ result: false, @@ -2131,23 +2165,53 @@ var _ = Describe("machineset", func() { }), Entry("should return true if machine is annotated with preserve=false", testCase{ setup: setup{ - annotationValue: machineutils.PreserveMachineAnnotationValueFalse, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueFalse, + nodeName: "test-node", }, expect: expect{ result: true, }, }), - Entry("should return false if machine is annotated with preserve=now", testCase{ + Entry("should return true if node is annotated with preserve=false", testCase{ setup: setup{ - annotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueFalse, + nodeName: "test-node", + }, + expect: expect{ + result: true, + }, + }), + Entry("should return false if machine is annotated with preserve=now, and node has not been annotated, and preserveExpiryTime is not yet set", testCase{ + setup: setup{ + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "test-node", + }, + expect: expect{ + result: false, + }, + }), + Entry("should return false if node is annotated with preserve=now, and preserveExpiryTime is not yet set", testCase{ + setup: setup{ + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "test-node", + }, + expect: expect{ + result: false, + }, + }), + Entry("should return false if machine is annotated with preserve=when-failed, and node has not been annotated", testCase{ + setup: setup{ + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "test-node", }, expect: expect{ result: false, }, }), - Entry("should return false if machine is annotated with preserve=when-failed", testCase{ + Entry("should return false if node is annotated with preserve=when-failed", testCase{ setup: setup{ - annotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "test-node", }, expect: expect{ result: false, @@ -2155,8 +2219,20 @@ var _ = Describe("machineset", func() { }), Entry("should return true if preservation has timed out", testCase{ setup: setup{ - preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Second)}, - annotationValue: machineutils.PreserveMachineAnnotationValueNow, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Second)}, + nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "test-node", + }, + expect: expect{ + result: true, + }, + }), + Entry("should return true if laNodePreserveValue is not empty, machineAnnotationValue is not empty and nodeAnnotationValue is empty, indicating that node Annotation Value was deleted", testCase{ + setup: setup{ + laNodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, + nodeName: "test-node", + nodeAnnotationValue: "", }, expect: expect{ result: true, diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 1a4766e58..0cf2ef625 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -812,8 +812,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueNow { if clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { // on timing out, remove preserve annotation to prevent incorrect re-preservation - delete(clone.Annotations, machineutils.PreserveMachineAnnotationValuePreservedByMCM) - delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, true) } else { machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) @@ -826,8 +824,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a // To prevent incorrect re-preservation of a recovered, previously auto-preserved machine on future failures // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), // in addition to stopping preservation, we also remove the preservation annotation on the machine. - delete(clone.Annotations, machineutils.PreserveMachineAnnotationValuePreservedByMCM) - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) + machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, true) } else { machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 53d57216e..299eb8037 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -4326,7 +4326,7 @@ var _ = Describe("machine", func() { machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, }, }), - Entry("when preservation times out, should stop preservation", testCase{ + Entry("when node is annotated and preservation times out, should stop preservation", testCase{ setup: setup{ nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, nodeName: "node-1", @@ -4339,6 +4339,19 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), + Entry("when machine is annotated and preservation times out, should stop preservation", testCase{ + setup: setup{ + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, + retry: machineutils.LongRetry, + }, + }), Entry("when invalid preserve annotation is added on node of un-preserved machine, should do nothing ", testCase{ setup: setup{ nodeAnnotationValue: "invalidValue", @@ -4353,6 +4366,20 @@ var _ = Describe("machine", func() { err: nil, }, }), + Entry("when invalid preserve annotation is added on machine of un-preserved machine, and node is not annotated should do nothing ", testCase{ + setup: setup{ + machineAnnotationValue: "invalidValue", + nodeName: "node-1", + machinePhase: v1alpha1.MachineRunning, + }, + expect: expect{ + machineAnnotationValue: "invalidValue", + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, + }, + }), Entry("when a machine is annotated with preserve=now, but has no backing node, should start preservation", testCase{ setup: setup{ machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, @@ -4368,6 +4395,20 @@ var _ = Describe("machine", func() { err: nil, }, }), + Entry("when preservation times out for a machine annotated with preserve=now, but has no backing node, should stop preservation", testCase{ + setup: setup{ + machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, + nodeName: "", + machinePhase: v1alpha1.MachineUnknown, + preserveExpiryTime: &metav1.Time{Time: metav1.Now().Add(-1 * time.Minute)}, + }, + expect: expect{ + preserveExpiryTimeIsSet: false, + nodeCondition: nil, + retry: machineutils.LongRetry, + err: nil, + }, + }), Entry("when a machine has a backing node, but node retrieval fails", testCase{ setup: setup{ machineAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, @@ -4393,12 +4434,12 @@ var _ = Describe("machine", func() { expect: expect{ preserveExpiryTimeIsSet: false, nodeCondition: &corev1.NodeCondition{Type: v1alpha1.NodePreserved, Status: corev1.ConditionFalse}, - machineAnnotationValue: machineutils.PreserveMachineAnnotationValuePreservedByMCM, + machineAnnotationValue: "", retry: machineutils.LongRetry, err: nil, }, }), - Entry("when node is annotated with 'now' and machine is annotated with 'when-failed', should start preservation and remove annotation from machine", testCase{ + Entry("when node is annotated with 'now' and machine is annotated with 'when-failed', should start preservation and remove preserve annotation from machine", testCase{ setup: setup{ nodeAnnotationValue: machineutils.PreserveMachineAnnotationValueNow, machineAnnotationValue: machineutils.PreserveMachineAnnotationValueWhenFailed, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index c0535f51e..a02865315 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2417,15 +2417,23 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach } // stopMachinePreservationIfPreserved stops the preservation of the machine and node, if preserved, and returns true if machine object has been updated -func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, removePreservationAnnotations bool) (bool, error) { +func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, deletePreservationAnnotations bool) (bool, error) { // removal of preserveExpiryTime is the last step of stopping preservation // therefore, if preserveExpiryTime is not set, machine is not preserved nodeName := machine.Labels[v1alpha1.NodeLabelKey] if machine.Status.CurrentStatus.PreserveExpiryTime == nil { return false, nil } - // if there is no backing node, then preservation can be stopped by just removing preserveExpiryTime from machine status + // if there is no backing node if nodeName == "" { + // remove annotation from machine if needed + if deletePreservationAnnotations { + var err error + machine, err = c.deletePreserveAnnotationOnMachine(ctx, machine) + if err != nil { + return false, err + } + } err := c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return false, err @@ -2465,11 +2473,18 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac return false, err } // Step 2: remove annotations from node - err = c.removePreservationRelatedAnnotationsOnNode(ctx, updatedNode, removePreservationAnnotations) + err = c.deletePreservationRelatedAnnotationsOnNode(ctx, updatedNode, deletePreservationAnnotations) if err != nil { return false, err } - // Step 3: update machine status to set preserve expiry time to nil + // Step 3: remove annotation from machine if needed + if deletePreservationAnnotations { + machine, err = c.deletePreserveAnnotationOnMachine(ctx, machine) + if err != nil { + return false, err + } + } + // Step 4: update machine status to set preserve expiry time to nil err = c.clearMachinePreserveExpiryTime(ctx, machine) if err != nil { return false, err @@ -2516,7 +2531,7 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, } // removePreserveAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node -func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { +func (c *controller) deletePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { // Check if annotation already absent if node.Annotations == nil { return nil @@ -2564,7 +2579,7 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { - const preserveExpiryMessageSuffix = "Machine preserved until " + const preserveExpiryMessageSuffix = "Machine preserved until" var newNodePreservedCondition *v1.NodeCondition var needsUpdate bool if existingNodeCondition == nil { @@ -2582,18 +2597,21 @@ func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, pres if drainErr == nil { if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainSuccessful) { - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() + newNodePreservedCondition.Message = fmt.Sprintf("%s %s %v.", v1alpha1.PreservedNodeDrainSuccessful, preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) + //newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionTrue needsUpdate = true } } else if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainUnsuccessful) { - newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() + newNodePreservedCondition.Message = fmt.Sprintf("%s %s %v.", v1alpha1.PreservedNodeDrainUnsuccessful, preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) + //newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionFalse needsUpdate = true } } else if newNodePreservedCondition.Status != v1.ConditionTrue { newNodePreservedCondition.Status = v1.ConditionTrue - newNodePreservedCondition.Message = preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() + newNodePreservedCondition.Message = fmt.Sprintf("%s %v.", preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) + //newNodePreservedCondition.Message = preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() needsUpdate = true } if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { @@ -2630,30 +2648,14 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine return nil } -func (c *controller) deletePreserveAnnotation(ctx context.Context, machine *v1alpha1.Machine) error { - nodeName := machine.Labels[v1alpha1.NodeLabelKey] - if nodeName == "" { - _, err := c.deletePreserveAnnotationValueOnMachine(ctx, machine) - return err - } - node, err := c.nodeLister.Get(nodeName) - if err != nil { - if apierrors.IsNotFound(err) { - return nil - } - klog.Errorf("error trying to get node %q of machine %q: %v. Retrying.", nodeName, machine.Name, err) - return err - } - return c.deletePreserveAnnotationValueOnNode(ctx, node) -} - -func (c *controller) deletePreserveAnnotationValueOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { +func (c *controller) deletePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - if machine.Annotations == nil || machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" { + if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { return machine, nil } clone := machine.DeepCopy() delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) @@ -2662,20 +2664,6 @@ func (c *controller) deletePreserveAnnotationValueOnMachine(ctx context.Context, return updatedClone, nil } -func (c *controller) deletePreserveAnnotationValueOnNode(ctx context.Context, node *v1.Node) error { - nodeClone := node.DeepCopy() - if nodeClone.Annotations == nil || nodeClone.Annotations[machineutils.PreserveMachineAnnotationKey] == "" { - return nil - } - delete(nodeClone.Annotations, machineutils.PreserveMachineAnnotationKey) - _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("failed to delete preserve annotation on node %q. error : %v", node.Name, err) - return err - } - return nil -} - // drainPreservedNode attempts to drain the node backing a preserved machine func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { var ( diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index 86b7ec5fa..a093800eb 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4387,7 +4387,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: "Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("Machine preserved until %v.", preserveExpiryTime), }, needsUpdate: true, }, @@ -4408,7 +4408,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainSuccessful, preserveExpiryTime), }, needsUpdate: true, }, @@ -4429,7 +4429,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainUnsuccessful, preserveExpiryTime), }, needsUpdate: true, }, @@ -4450,7 +4450,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByMCM, - Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainSuccessful, preserveExpiryTime), }, needsUpdate: true, }, @@ -4476,7 +4476,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainUnsuccessful, preserveExpiryTime), }, needsUpdate: true, }, @@ -4494,7 +4494,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainUnsuccessful, preserveExpiryTime), }, }, expect: expect{ @@ -4502,7 +4502,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionFalse, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainUnsuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainUnsuccessful, preserveExpiryTime), }, needsUpdate: false, }, @@ -4520,7 +4520,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainSuccessful, preserveExpiryTime), }, }, expect: expect{ @@ -4528,7 +4528,7 @@ var _ = Describe("machine_util", func() { Type: machinev1.NodePreserved, Status: corev1.ConditionTrue, Reason: machinev1.PreservedByUser, - Message: machinev1.PreservedNodeDrainSuccessful + ". Machine preserved until " + preserveExpiryTime.String(), + Message: fmt.Sprintf("%s Machine preserved until %v.", machinev1.PreservedNodeDrainSuccessful, preserveExpiryTime), }, needsUpdate: false, }, @@ -4548,7 +4548,7 @@ var _ = Describe("machine_util", func() { expect expect } - DescribeTable("##shouldPreservedNodeBeDrained behaviour scenarios", + DescribeTable("#shouldPreservedNodeBeDrained behaviour scenarios", func(tc *testCase) { shouldDrain := shouldPreservedNodeBeDrained(tc.setup.existingCondition, tc.setup.machinePhase) Expect(shouldDrain).To(Equal(tc.expect.shouldDrain)) @@ -4585,7 +4585,7 @@ var _ = Describe("machine_util", func() { }), ) }) - Describe("#removePreservationRelatedAnnotationsOnNode", func() { + Describe("#deletePreservationRelatedAnnotationsOnNode", func() { type setup struct { removePreserveAnnotation bool CAAnnotationPresent bool @@ -4600,7 +4600,7 @@ var _ = Describe("machine_util", func() { setup setup expect expect } - DescribeTable("##removePreservationRelatedAnnotationsOnNode behaviour scenarios", + DescribeTable("##deletePreservationRelatedAnnotationsOnNode behaviour scenarios", func(tc *testCase) { stop := make(chan struct{}) defer close(stop) @@ -4623,7 +4623,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, nil, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.removePreservationRelatedAnnotationsOnNode(context.TODO(), node, tc.setup.removePreserveAnnotation) + err := c.deletePreservationRelatedAnnotationsOnNode(context.TODO(), node, tc.setup.removePreserveAnnotation) waitForCacheSync(stop, c) updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) Expect(getErr).To(BeNil()) From 87e14829f68ee7d3f60eb4dc5596eba5c494740f Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 18 Feb 2026 16:18:05 +0530 Subject: [PATCH 69/79] Sync MCD's value of AutoPreserveFailedMachineMax to MCS on change --- pkg/controller/deployment_sync.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/controller/deployment_sync.go b/pkg/controller/deployment_sync.go index e5079c393..82481065a 100644 --- a/pkg/controller/deployment_sync.go +++ b/pkg/controller/deployment_sync.go @@ -250,12 +250,14 @@ func (dc *controller) getNewMachineSet(ctx context.Context, d *v1alpha1.MachineD // Set existing new machine set's annotation annotationsUpdated := SetNewMachineSetAnnotations(d, isCopy, newRevision, true) minReadySecondsNeedsUpdate := isCopy.Spec.MinReadySeconds != d.Spec.MinReadySeconds + autoPreserveFailedMachineMaxNeedsUpdate := isCopy.Spec.AutoPreserveFailedMachineMax != d.Spec.AutoPreserveFailedMachineMax nodeTemplateUpdated := SetNewMachineSetNodeTemplate(d, isCopy, newRevision, true) machineConfigUpdated := SetNewMachineSetConfig(d, isCopy, newRevision, true) updateMachineSetClassKind := UpdateMachineSetClassKind(d, isCopy, newRevision, true) - if annotationsUpdated || minReadySecondsNeedsUpdate || nodeTemplateUpdated || machineConfigUpdated || updateMachineSetClassKind { + if annotationsUpdated || minReadySecondsNeedsUpdate || nodeTemplateUpdated || machineConfigUpdated || updateMachineSetClassKind || autoPreserveFailedMachineMaxNeedsUpdate { isCopy.Spec.MinReadySeconds = d.Spec.MinReadySeconds + isCopy.Spec.AutoPreserveFailedMachineMax = d.Spec.AutoPreserveFailedMachineMax return dc.controlMachineClient.MachineSets(isCopy.Namespace).Update(ctx, isCopy, metav1.UpdateOptions{}) } From 0d385e7521e7dbd318bed6beeee72f717e6f6bca Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 20 Feb 2026 09:48:21 +0530 Subject: [PATCH 70/79] Make changes to machineset controller's preservation logic to sync with bugfix on master --- pkg/controller/machineset.go | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index c4a9825fa..f18d15fd3 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -336,17 +336,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 if m.Labels[v1alpha1.LabelKeyNodeUpdateResult] != v1alpha1.LabelValueNodeUpdateSuccessful { machinesWithoutUpdateSuccessfulLabel = append(machinesWithoutUpdateSuccessfulLabel, m) } - if machineutils.IsMachineTriggeredForDeletion(m) { - staleMachines = append(staleMachines, m) - } else if machineutils.IsMachineFailed(m) { - if c.shouldFailedMachineBeTerminated(m) { - staleMachines = append(staleMachines, m) - } else { - activeMachines = append(activeMachines, m) - } - } else if machineutils.IsMachineActive(m) { - activeMachines = append(activeMachines, m) - } } allMachinesDiff := len(allMachines) - int(machineSet.Spec.Replicas) machinesWithoutUpdateSuccessfulLabelDiff := len(machinesWithoutUpdateSuccessfulLabel) - int(machineSet.Spec.Replicas) @@ -438,18 +427,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 logMachinesWithPriority1(machinesWithoutUpdateSuccessfulLabel) machinesToDelete := getMachinesToDelete(machinesWithoutUpdateSuccessfulLabel, machinesWithoutUpdateSuccessfulLabelDiff) logMachinesToDelete(machinesToDelete) - //if machines are preserved, stop preservation - //for _, mc := range machinesToDelete { - // if machineutils.IsMachinePreserved(mc) { - // - // } - // - //}for _, mc := range machinesToDelete { - // if machineutils.IsMachinePreserved(mc) { - // - // } - // - //} // Snapshot the UIDs (ns/name) of the machines we're expecting to see // deleted, so we know to record their expectations exactly once either @@ -471,7 +448,9 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 var staleMachines []*v1alpha1.Machine for _, m := range machinesWithoutUpdateSuccessfulLabel { if machineutils.IsMachineFailed(m) { - staleMachines = append(staleMachines, m) + if c.shouldFailedMachineBeTerminated(m) { + staleMachines = append(staleMachines, m) + } } } @@ -683,7 +662,6 @@ func slowStartBatch(count int, initialBatchSize int, fn func() error) (int, erro return successes, nil } -// TODO@thiyyakat: ensure preserved machines are the last to be deleted func getMachinesToDelete(filteredMachines []*v1alpha1.Machine, diff int) []*v1alpha1.Machine { // No need to sort machines if we are about to delete all of them. // diff will always be <= len(filteredMachines), so not need to handle > case. From a9dbc21b525398dc7caa204534e237f0efcd768b Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 20 Feb 2026 15:10:05 +0530 Subject: [PATCH 71/79] Change proposal to reflect changes in design --- docs/proposals/machine-preservation.md | 77 +++++++++++++------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/docs/proposals/machine-preservation.md b/docs/proposals/machine-preservation.md index bf92d3c0b..1ab02cdab 100644 --- a/docs/proposals/machine-preservation.md +++ b/docs/proposals/machine-preservation.md @@ -29,17 +29,18 @@ Related Issue: https://github.com/gardener/machine-controller-manager/issues/100 ## Proposal In order to achieve the objectives mentioned, the following are proposed: -1. Enhance `machineControllerManager` configuration in the `ShootSpec`, to specify the max number of machines to be auto-preserved, -and the time duration for which these machines will be preserved. - ``` - machineControllerManager: - autoPreserveFailedMax: 0 - machinePreserveTimeout: 72h - ``` - * This configuration will be set per worker pool. - * Since gardener worker pool can correspond to `1..N` MachineDeployments depending on number of zones, `autoPreserveFailedMax` will be distributed across N machine deployments. - * `autoPreserveFailedMax` must be chosen such that it can be appropriately distributed across the MachineDeployments. - * Example: if `autoPreserveFailedMax` is set to 2, and the worker pool has 2 zones, then the maximum number of machines that will be preserved per zone is 1. +1. Enhance `worker` configuration in the `ShootSpec`, to specify the maximum number of failed machines that will be auto-preserved and the time duration for which machines will be preserved. +``` + workers: + - name: example-worker + autoPreserveFailedMachineMax: 2 + machineControllerManager: + machinePreserveTimeout: 72h +``` + * This configuration will be set per worker pool. + * Since gardener worker pool can correspond to `1..N` MachineDeployments depending on number of zones, `autoPreserveFailedMachineMax` will be distributed across N machine deployments. + * `autoPreserveFailedMachineMax` must be chosen such that it can be appropriately distributed across the MachineDeployments. + * Example: if `autoPreserveFailedMachineMax` is set to 2, and the worker pool has 2 zones, then the maximum number of machines that will be preserved per zone is 1. 2. MCM will be modified to include a new sub-phase `Preserved` to indicate that the machine has been preserved by MCM. 3. Allow user/operator to request for preservation of a specific machine/node with the use of annotations : `node.machine.sapcloud.io/preserve=now` and `node.machine.sapcloud.io/preserve=when-failed`. 4. When annotation `node.machine.sapcloud.io/preserve=now` is added to a `Running` machine, the following will take place: @@ -49,29 +50,28 @@ and the time duration for which these machines will be preserved. - After timeout, the `node.machine.sapcloud.io/preserve=now` and `cluster-autoscaler.kubernetes.io/scale-down-disabled: "true"` are deleted. The `machine.CurrentStatus.PreserveExpiryTime` is set to `nil`. The machine phase is changed to `Running` and the CA may delete the node. - If a machine in `Running:Preserved` fails, it is moved to `Failed:Preserved`. 5. When annotation `node.machine.sapcloud.io/preserve=when-failed` is added to a `Running` machine and the machine goes to `Failed`, the following will take place: - - The machine is drained of pods except for Daemonset pods. + - Pods (other than DaemonSet pods) are drained. - The machine phase is changed to `Failed:Preserved`. - `cluster-autoscaler.kubernetes.io/scale-down-disabled: "true"` is added to the node to prevent CA from scaling it down. - `machine.CurrentStatus.PreserveExpiryTime` is updated by MCM as $machine.CurrentStatus.PreserveExpiryTime = currentTime+machinePreserveTimeout$. - After timeout, the annotations `node.machine.sapcloud.io/preserve=when-failed` and `cluster-autoscaler.kubernetes.io/scale-down-disabled: "true"` are deleted. `machine.CurrentStatus.PreserveExpiryTime` is set to `nil`. The phase is changed to `Terminating`. -6. When an un-annotated machine goes to `Failed` phase and `autoPreserveFailedMax` is not breached: +6. When an un-annotated machine goes to `Failed` phase and `autoPreserveFailedMachineMax` is not breached: - Pods (other than DaemonSet pods) are drained. - The machine's phase is changed to `Failed:Preserved`. - `cluster-autoscaler.kubernetes.io/scale-down-disabled: "true"` is added to the node to prevent CA from scaling it down. - `machine.CurrentStatus.PreserveExpiryTime` is updated by MCM as $machine.CurrentStatus.PreserveExpiryTime = currentTime+machinePreserveTimeout$. - After timeout, the annotation `cluster-autoscaler.kubernetes.io/scale-down-disabled: "true"` is deleted. `machine.CurrentStatus.PreserveExpiryTime` is set to `nil`. The phase is changed to `Terminating`. - - Number of machines in `Failed:Preserved` phase count towards enforcing `autoPreserveFailedMax`. -7. If a failed machine is currently in `Failed:Preserved` and before timeout its VM/node is found to be Healthy, the machine will be moved to `Running:Preserved`. After the timeout, it will be moved to `Running`. -The rationale behind moving the machine to `Running:Preserved` rather than `Running`, is to allow pods to get scheduled on to the healthy node again without the autoscaler scaling it down due to under-utilization. -8. A user/operator can request MCM to stop preserving a machine/node in `Running:Preserved` or `Failed:Preserved` phase using the annotation: `node.machine.sapcloud.io/preserve=false`. + - Number of machines in `Failed:Preserved` phase count towards enforcing `autoPreserveFailedMachineMax`. +. +7. A user/operator can request MCM to stop preserving a machine/node in `Running:Preserved` or `Failed:Preserved` phase by deleting the annotation: `node.machine.sapcloud.io/preserve`. * MCM will move a machine thus annotated either to `Running` phase or `Terminating` depending on the phase of the machine before it was preserved. 9. Machines of a MachineDeployment in `Preserved` sub-phase will also be counted towards the replica count and in the enforcement of maximum machines allowed for the MachineDeployment. -10. MCM will be modified to perform drain in `Failed` phase rather than `Terminating`. +10. MCM will be modified to perform drain in `Failed` phase for preserved machines. ## State Diagrams: 1. State Diagram for when a machine or its node is explicitly annotated for preservation: - ```mermaid +```mermaid stateDiagram-v2 state "Running" as R state "Running + Requested" as RR @@ -86,27 +86,26 @@ The rationale behind moving the machine to `Running:Preserved` rather than `Runn RR --> F: on failure F --> FP FP --> T: on timeout or preserve=false - FP --> RP: if node Healthy before timeout + FP --> R: if node Healthy before timeout T --> [*] R-->RP: annotated with preserve=now RP-->F: if node/VM not healthy - ``` +``` + 2. State Diagram for when an un-annotated `Running` machine fails (Auto-preservation): ```mermaid stateDiagram-v2 state "Running" as R - state "Running:Preserved" as RP state "Failed (node drained)" as F state "Failed:Preserved" as FP state "Terminating" as T [*] --> R R-->F: on failure - F --> FP: if autoPreserveFailedMax not breached - F --> T: if autoPreserveFailedMax breached + F --> FP: if autoPreserveFailedMachineMax not breached + F --> T: if autoPreserveFailedMachineMax breached FP --> T: on timeout or value=false - FP --> RP : if node Healthy before timeout - RP --> R: on timeout or preserve=false + FP --> R : if node Healthy before timeout T --> [*] ``` @@ -128,21 +127,22 @@ The rationale behind moving the machine to `Running:Preserved` rather than `Runn 4. Operator analyzes the VM. -### Use Case 3: Auto-Preservation +### Use Case 3: Auto-Preservation of Failed Machine aiding in Failure Analysis and Recovery **Scenario:** Machine fails unexpectedly, no prior annotation. #### Steps: 1. Machine transitions to `Failed` phase. 2. Machine is drained. -3. If `autoPreserveFailedMax` is not breached, machine moved to `Failed:Preserved` phase by MCM. +3. If `autoPreserveFailedMachineMax` is not breached, machine moved to `Failed:Preserved` phase by MCM. 4. After `machinePreserveTimeout`, machine is terminated by MCM. +5. If machine is brought back to `Running` phase before timeout, pods can be scheduled on it again. ### Use Case 4: Early Release **Scenario:** Operator has performed his analysis and no longer requires machine to be preserved. #### Steps: 1. Machine is in `Running:Preserved` or `Failed:Preserved` phase. -2. Operator adds: `node.machine.sapcloud.io/preserve=false` to node. +2. Operator removes `node.machine.sapcloud.io/preserve` from node. 3. MCM transitions machine to `Running` or `Terminating` for `Running:Preserved` or `Failed:Preserved` respectively, even though `machinePreserveTimeout` has not expired. -4. If machine was in `Failed:Preserved`, capacity becomes available for auto-preservation. +4. If machine was auto-preserved, capacity becomes available for auto-preservation. ## Points to Note @@ -151,13 +151,12 @@ The rationale behind moving the machine to `Running:Preserved` rather than `Runn 3. Consumers (with access to shoot cluster) can annotate Nodes they would like to preserve. 4. Operators (with access to control plane) can additionally annotate Machines that they would like to preserve. This feature can be used when a Machine does not have a backing Node and the operator wishes to preserve the backing VM. 5. If the backing Node object exists but does not have the preservation annotation, preservation annotations added on the Machine will be honoured. -6. However, if a backing Node exists for a Machine and has the preservation annotation, the Node's annotation value will override the Machine annotation value, and be synced to the Machine object. -7. If `autoPreserveFailedMax` is reduced in the Shoot Spec, older machines are moved to `Terminating` phase before newer ones. -8. In case of a scale down of an MCD's replica count, `Preserved` machines will be the last to be scaled down. Replica count will always be honoured. -9. If the value for annotation key `cluster-autoscaler.kubernetes.io/scale-down-disabled` for a machine in `Running:Preserved` is changed to `false` by a user, the value will be overwritten to `true` by MCM. -10. On increase/decrease of timeout, the new value will only apply to machines that go into `Preserved` phase after the change. Operators can always edit `machine.CurrentStatus.PreserveExpiryTime` to prolong the expiry time of existing `Preserved` machines. +6. However, if a backing Node exists for a Machine and has the preservation annotation, the Node's annotation value will override the Machine annotation value. +7. If `autoPreserveFailedMachineMax` is reduced in the Shoot Spec, older machines are moved to `Terminating` phase before newer ones. +8. In case of a scale down of an MCD's replica count, `Preserved` machines will be the last to be scaled down. Replica count will always be honoured. 9 +9. On increase/decrease of `machinePreserveTimeout`, the new value will only apply to machines that go into `Preserved` phase after the change. Operators can always edit `machine.CurrentStatus.PreserveExpiryTime` to prolong the expiry time of existing `Preserved` machines. 11. [Modify CA FAQ](https://github.com/gardener/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node) once feature is developed to use `node.machine.sapcloud.io/preserve=now` instead of the `cluster-autoscaler.kubernetes.io/scale-down-disabled=true` currently suggested. This would: - - harmonise machine flow - - shield from CA's internals - - make it generic and no longer CA specific - - allow a timeout to be specified \ No newline at end of file + - harmonise machine flow + - shield from CA's internals + - make it generic and no longer CA specific + - allow a timeout to be specified \ No newline at end of file From b7415e75e84808ba21694c772897f4c0587bbb2d Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 20 Feb 2026 15:35:34 +0530 Subject: [PATCH 72/79] Update usage doc. --- docs/usage/machine-preservation.md | 47 ++++++++++++++++++------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/docs/usage/machine-preservation.md b/docs/usage/machine-preservation.md index 3b13f8dc2..bb5029bf3 100644 --- a/docs/usage/machine-preservation.md +++ b/docs/usage/machine-preservation.md @@ -7,14 +7,14 @@ This document explains how to **use machine preservation** to retain machines an A machine and its backing node can be preserved by an end-user/SRE/operator to retain machines and their backing VMs for debugging, analysis, or operational safety. A preserved machine/node has the following properties: -- In case Node is `Unhealthy` for duration longer than `machineHealthTimeout` and the machine moves to `Failed` state, the machine stays in `Failed` state until `machinePreserveTimeout` runs out, without getting terminated.This allows end-users and SREs to debug the machine and backing node, and take necessary actions to recover the machine if needed. +- When a machine in `Failed` phase is preserved, it continues to stay in `Failed` state until `machinePreserveTimeout` runs out, without getting terminated. This allows end-users and SREs to debug the machine and backing node, and take necessary actions to recover the machine if needed. - If a machine is in its `Failed` phase and is preserved, on recovering from failure, the machine can be moved to `Running` phase and the backing node can be uncordoned to allow scheduling of pods again. -- When the machineset is scaled down, machines in the machineset marked for preservation are de-prioritized for deletion. -- If a machine is preserved in its `Running` phase, the MCM adds the CA scale-down-disabled annotation to prevent the CA from scaling down the machine in case of underutilization. - If a machine is preserved and is in its `Failed` phase, MCM drains the backing node of all pods, but the daemonset pods remain on the node. - +- If a machine is preserved in its `Running` phase, the MCM adds the CA scale-down-disabled annotation to prevent the CA from scaling down the machine in case of underutilization. +- When the machineset is scaled down, machines in the machineset marked for preservation are de-prioritized for deletion. + -> Note: If a user sets a deletion timestamp (by using tools such as kubectl), the machine and backing node will be deleted. Preservation will not prevent this. +> Note: If a user sets a deletion timestamp on the machine/node (e.g., by using kubectl delete machine/node), the machine and backing node will be deleted. Preservation will not prevent this. > Note: If the desired replica count for a machineset cannot be met without scaling down preserved machines, the required number of preserved machines will be scaled-down. @@ -26,9 +26,9 @@ A preserved machine/node has the following properties: - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation was successful. - If a machine has no backing node, only `PreserveExpiryTime` is set. - If the machine is preserved and in `Failed` phase: - - The `PreserveExpiryTime` is set in the machine's status to indicate when preservation will end. + - The `PreserveExpiryTime` is set in the machine's status to indicate when preservation will end. - The CA scale-down-disabled annotation is added. - - The backing node is drained of all pods but the daemonset pdos remain. + - The backing node is drained of all pods but the daemonset pods remain. - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation was successful. - If a machine has no backing node, only `PreserveExpiryTime` is set. @@ -51,8 +51,8 @@ A preserved machine/node has the following properties: - The preservation feature offers two modes of preservation: - Manual preservation by adding annotations - Auto-preservation by MCM by specifying `AutoPreserveFailedMachineMax` for a workerpool. This value is distributed evenly across zones (MCD). -- For manual preservation, the end-user and operators must annotate the backing node of a machine. -- If there is no backing node, the machine object can be annotated. +- For manual preservation, the end-user and operators must annotate either the node or the machine objects with annotation key: `node.machine.sapcloud.io/preserve` and the desired value, as described in the table below. +- If there is no backing node, the machine object can still be annotated. #### Configuration (Shoot Spec) @@ -89,7 +89,7 @@ annotation key: `node.machine.sapcloud.io/preserve` | ---------------- |-----------------------------------------------------------------------------| | when-failed | To be added when the machine/node needs to be preserved **only on failure** | | now | To be added when the machine/node needs to be preserved **now** | -| false | To be added if a machine should not be auto-preserved by MCM on failure | +| false | To be added if a machine should not be auto-preserved by MCM | **Auto-preservation Annotation values added by MCM:** @@ -98,21 +98,33 @@ annotation key: `node.machine.sapcloud.io/preserve` | auto-preserve | Added by MCM to indicate that a machine has been **auto-preserved** on failure. This machine will be counted towards **AutoPreserveFailedMachineMax** | ### ⚠️ Preservation Annotation semantics: + Both node and machine objects can be annotated for preservation. However, if both machine and node have the preservation annotation, the node's annotation value (even if set to "") is honoured and the machine's annotation is deleted. To prevent confusion and unintended behaviour, it is advised to use the feature by annotating only the node or the machine, and not both. + +When the `PreserveExpiryTime` of a preserved machine is reached, the preservation will be stopped. Additionally, the preservation annotation is removed to prevent undesired re-preservation of the same machine. This is applicable for both manual and auto-preservation. + +When a machine is annotated with value `false`, the annotation and value is not removed by MCM. This is to explicitly indicate that the machine should not be auto-preserved by MCM. If the annotation value is set to empty or the annotation is deleted, MCM will again auto-preserve the machine if it is in `Failed` phase and the `AutoPreserveFailedMachineMax` limit is not reached. + --- ### How to manually stop preservation before PreserveExpiryTime: +**In the case of manual preservation:** +To manually stop preservation, the preservation annotation must be deleted from whichever object (node/machine) is annotated for preservation. -To manually stop preservation, the preservation annotation must be deleted from whichever object (node/machine) is annotated for preservation. +**In the case of auto-preservation:** +To stop auto-preservation, the machine should be annotated with `node.machine.sapcloud.io/preserve=false`. +Deleting the annotation value or setting it to empty will not stop preservation since MCM will again auto-preserve the machine if it is in `Failed` phase and the `AutoPreserveFailedMachineMax` limit is not reached. +Setting the annotation value to `false` explicitly indicates that the machine should not be auto-preserved by MCM anymore, and the preservation will be stopped. --- ### How to prevent a machine from being auto-preserved by MCM: -To prevent a machine from being auto-preserved on moving to `Failed` phase, the node/machine object must be annotated with the value `false`. If a currently preserved machine is annotated with `false`, the preservation will be stopped. +To prevent a `Failed` machine from being auto-preserved the node/machine object must be annotated with `node.machine.sapcloud.io/preserve=false`. If a currently preserved machine is annotated with `false`, the preservation will be stopped. Here too, the preservation annotation semantics from above applies - if both machine and node are annotated, the node's annotation value is honoured and the machine's annotation is deleted. +If the preserve annotation value is set to empty or the annotation is deleted, MCM will again auto-preserve the machine if it is in `Failed` phase and the `AutoPreserveFailedMachineMax` limit is not reached. --- ### What happens when a machine recovers from failure and moves to `Running` during preservation? @@ -120,19 +132,18 @@ Here too, the preservation annotation semantics from above applies - if both mac Depending on the annotation value - (`now/when-failed/auto-preserve`), the behaviour differs. This is to reflect the meaning behind the annotation value. 1. `now`: on recovery from failure, machine preservation continues until `PreserveExpiryTime` -2. `when-failed`: on recovery from failure, machine preservation stops. This is because the annotation value clearly expresses that a machine must be preserved only when `Failed`. If the annotation is not explicitly changed, and the machine fails again, the machine is preserved again. -3. `auto-preserve`: since MCM performs auto-preservation of `Failed` machines only, on recovery, the machine preservation is stopped. +2. `when-failed`: on recovery from failure, machine preservation stops. This is because the annotation value clearly expresses that a machine must be preserved only when `Failed`. If the annotation is not changed, and the machine fails again, the machine is preserved again. +3. `auto-preserve`: since MCM performs auto-preservation of machines only when they are `Failed`, on recovery, the machine preservation is stopped. In all the cases, when the machine moves to `Running` during preservation, the backing node is uncordoned to allow pods to be scheduled on it again. ->Note: When a machine recovers to `Running` and preservation is stopped, CA's `scale-down-unneeded-time` comes into play. If the node's utilization is below the utilization threshold configured after `scale-down-unneeded-time`, CA will scale down the machine. +>Note: When a machine recovers to `Running` and preservation is stopped, CA's `scale-down-unneeded-time` comes into play. If the node's utilization is below the utilization threshold configured even after `scale-down-unneeded-time`, CA will scale down the machine. --- ### Important Notes & Limitations - Rolling updates: Preservation is ignored; Failed machines are replaced as usual. - Shoot hibernation overrides preservation. -- Replica enforcement: Preserved machines count towards MachineDeployment replicas. +- Replica enforcement: Preserved machines count towards MachineDeployment replicas, and can be scaled down if the desired replica count is reduced below the number of preserved machines. - Scale-down preference: Preserved machines are the last to be scaled down. - Preservation status is visible via Node Conditions and Machine Status fields. -- Timeout changes: Do not affect existing preserved machines. -- Manual extension: Operators may edit PreserveExpiryTime directly if required. +- machinePreserveTimeout changes do not affect existing preserved machines. Operators may edit PreserveExpiryTime directly if required to extend preservation. From e7fef733d7e52705e3a4b644762910b35e56c375 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 24 Feb 2026 11:59:52 +0530 Subject: [PATCH 73/79] Clean up comments --- pkg/controller/machineset.go | 6 +----- pkg/util/provider/machinecontroller/machine.go | 6 +----- pkg/util/provider/machinecontroller/machine_util.go | 3 --- pkg/util/provider/machineutils/utils.go | 8 ++------ 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index f18d15fd3..dba58413f 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -552,10 +552,6 @@ func (c *controller) reconcileClusterMachineSet(key string) error { return err } - // triggerAutoPreservation adds the preserve=PreserveMachineAnnotationValuePreservedByMCM annotation - // to Failed machines to trigger auto-preservation, if applicable. - // We do not update machineSet.Status.AutoPreserveFailedMachineCount in the function, as it will be calculated - // and updated in the succeeding calls to calculateMachineSetStatus() and updateMachineSetStatus() filteredMachines = c.manageAutoPreservationOfFailedMachines(ctx, filteredMachines, machineSet) // TODO: Fix working of expectations to reflect correct behaviour @@ -892,7 +888,7 @@ func (c *controller) shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) // manageAutoPreservationOfFailedMachines annotates failed machines with preserve=auto-preserved annotation // to trigger preservation of the machines, by the machine controller, up to the limit defined in the -// MachineSet's AutoPreserveFailedMachineMax field. +// MachineSet's AutoPreserveFailedMachineMax field. If the AutoPreserveFailedMachineMax limit is breached, it removes the preserve=auto-preserved annotation from the oldest annotated machines. func (c *controller) manageAutoPreservationOfFailedMachines(ctx context.Context, machines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) []*v1alpha1.Machine { autoPreservationCapacityRemaining := machineSet.Spec.AutoPreserveFailedMachineMax - machineSet.Status.AutoPreserveFailedMachineCount if autoPreservationCapacityRemaining == 0 { diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 0cf2ef625..658b25eab 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -779,7 +779,7 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } effectivePreserveValue := reconcilePreservationAnnotations(nodeAnnotationValue, clone.Annotations) // The annotation has either been deleted, set to empty or no preserve annotation exists. - // in all these cases, machine preservation should not be done. If machine is preserved, stop preservation. + // in all these cases, the machine should not be preserved. If machine is preserved, stop preservation. if effectivePreserveValue == "" { machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) return @@ -790,8 +790,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } else { klog.Warningf("Preserve annotation value %q on machine %q is invalid", effectivePreserveValue, clone.Name) } - // invalid annotation value will be synced to lastAppliedNodePreserveValue in the defer() call. - // This is to prevent MCM from missing updates on the node object in case of crashes return } if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueFalse { @@ -801,8 +799,6 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { if !machineutils.IsMachineFailed(clone) || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) - // if not preserved, and lastAppliedNodePreserveValue is different from current preserve annotation value on node, - // the defer() call will update the lastAppliedNodePreserveValue } else { machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index a02865315..21a43b00e 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2598,20 +2598,17 @@ func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, pres if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainSuccessful) { newNodePreservedCondition.Message = fmt.Sprintf("%s %s %v.", v1alpha1.PreservedNodeDrainSuccessful, preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) - //newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainSuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionTrue needsUpdate = true } } else if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainUnsuccessful) { newNodePreservedCondition.Message = fmt.Sprintf("%s %s %v.", v1alpha1.PreservedNodeDrainUnsuccessful, preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) - //newNodePreservedCondition.Message = v1alpha1.PreservedNodeDrainUnsuccessful + ". " + preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() newNodePreservedCondition.Status = v1.ConditionFalse needsUpdate = true } } else if newNodePreservedCondition.Status != v1.ConditionTrue { newNodePreservedCondition.Status = v1.ConditionTrue newNodePreservedCondition.Message = fmt.Sprintf("%s %v.", preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) - //newNodePreservedCondition.Message = preserveExpiryMessageSuffix + currentStatus.PreserveExpiryTime.String() needsUpdate = true } if preserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 723756406..949f16e8a 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -107,12 +107,8 @@ const ( // The AutoPreserveFailedMachineMax, set on the MCD, is enforced based on the number of machines annotated with this value. PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" - //// PreserveMachineAnnotationValuePreserveStoppedByMCM is the annotation value used to indicate that - //// the auto-preservation of a Machine was stopped. - //PreserveMachineAnnotationValuePreserveStoppedByMCM = "auto-preserve-stopped" - - //PreserveMachineAnnotationValueFalse is the annotation value used to explicitly request that - // a Machine should not be preserved any longer, even if the expiry timeout has not been reached + // PreserveMachineAnnotationValueFalse is the annotation value used to indicate to MCM that a machine must not be auto-preserved + // on failure. PreserveMachineAnnotationValueFalse = "false" ) From 4d5e455cdaf20fa691d5776fd70112dabecee167 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Thu, 26 Feb 2026 13:10:42 +0530 Subject: [PATCH 74/79] Clean up manageMachinePreservation --- pkg/controller/machineset.go | 2 +- .../provider/machinecontroller/machine.go | 79 +++++++++---------- .../machinecontroller/machine_test.go | 11 +-- .../machinecontroller/machine_util.go | 20 ++--- .../machinecontroller/machine_util_test.go | 12 +-- pkg/util/provider/machineutils/utils.go | 11 ++- 6 files changed, 71 insertions(+), 64 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index dba58413f..d46df3bd0 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -904,7 +904,7 @@ func (c *controller) manageAutoPreservationOfFailedMachines(ctx context.Context, for index, m := range machines { if machineutils.IsMachineFailed(m) { // check if machine is already annotated for preservation, if yes, skip. Machine controller will take care of the rest. - if machineutils.AllowedPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { + if machineutils.PreventAutoPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { continue } if autoPreservationCapacityRemaining == 0 { diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 658b25eab..5683f757e 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -748,11 +748,13 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // manageMachinePreservation manages machine preservation based on the preserve annotation values on the node and machine objects. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { - machineObjectUpdated := false + machineAnnotationsUpdated := false clone := machine.DeepCopy() defer func() { - // this needs to be done for cases when machine is neither preserved nor un-preserved, but the LastAppliedNodePreserveValueAnnotation needs to be updated - if err == nil && !machineObjectUpdated && clone.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] { + // This needs to be done for cases when machine is neither preserved nor un-preserved (e.g. when preserve changes from now to when-failed on a Failed machine), + // but the LastAppliedNodePreserveValueAnnotation needs to be updated. + // We compare annotation value in the clone with the original machine object to see if an update is required + if err == nil && !machineAnnotationsUpdated && clone.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] { _, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error updating LastAppliedNodePreserveValueAnnotation value on machine %q: %v", machine.Name, err) @@ -777,13 +779,8 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } } - effectivePreserveValue := reconcilePreservationAnnotations(nodeAnnotationValue, clone.Annotations) - // The annotation has either been deleted, set to empty or no preserve annotation exists. - // in all these cases, the machine should not be preserved. If machine is preserved, stop preservation. - if effectivePreserveValue == "" { - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) - return - } + var effectivePreserveValue string + effectivePreserveValue, clone.Annotations = getEffectivePreservationAnnotations(nodeAnnotationValue, clone.Annotations) if !machineutils.AllowedPreserveAnnotationValues.Has(effectivePreserveValue) { if effectivePreserveValue == nodeAnnotationValue { klog.Warningf("Preserve annotation value %q on node %q is invalid", effectivePreserveValue, nodeName) @@ -792,42 +789,35 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } return } - if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueFalse { - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) - return - } - if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueWhenFailed { - if !machineutils.IsMachineFailed(clone) || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, false) + switch effectivePreserveValue { + case "", machineutils.PreserveMachineAnnotationValueFalse: + machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, false) + case machineutils.PreserveMachineAnnotationValueWhenFailed: + if !machineutils.IsMachineFailed(clone) || machineutils.IsMachinePreservationExpired(clone) { + machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, false) } else { - machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } - if err != nil { - return - } - } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValueNow { - if clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now()) { + case machineutils.PreserveMachineAnnotationValueNow: + if machineutils.IsMachinePreservationExpired(clone) { // on timing out, remove preserve annotation to prevent incorrect re-preservation - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, true) + machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, true) } else { - machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } - if err != nil { - return - } - } else if effectivePreserveValue == machineutils.PreserveMachineAnnotationValuePreservedByMCM { - if !machineutils.IsMachineFailed(clone) || (clone.Status.CurrentStatus.PreserveExpiryTime != nil && !clone.Status.CurrentStatus.PreserveExpiryTime.After(time.Now())) { + case machineutils.PreserveMachineAnnotationValuePreservedByMCM: + if !machineutils.IsMachineFailed(clone) || machineutils.IsMachinePreservationExpired(clone) { // To prevent incorrect re-preservation of a recovered, previously auto-preserved machine on future failures // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), // in addition to stopping preservation, we also remove the preservation annotation on the machine. - machineObjectUpdated, err = c.stopMachinePreservationIfPreserved(ctx, clone, true) + machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, true) } else { - machineObjectUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) - } - if err != nil { - return + machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } } + if err != nil { + return + } // This is to handle the case where a preserved machine recovers from Failed to Running // in which case, pods should be allowed to be scheduled onto the node if machineutils.IsMachineActive(clone) && nodeName != "" { @@ -836,14 +826,21 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a return } -// reconcilePreservationAnnotations returns the effective preservation value, and updates the machine Annotations related to preservation -func reconcilePreservationAnnotations(nodeAnnotationValue string, machineAnnotations map[string]string) string { +// getEffectivePreservationAnnotations returns the effective preservation value, and updates the machine Annotations related to preservation +func getEffectivePreservationAnnotations(nodeAnnotationValue string, machineAnnotations map[string]string) (string, map[string]string) { + // If there is no active node annotation AND no previously-applied node annotation, + // enforce machine's preserve annotation. + // Otherwise, the node annotation takes precedence (even if now empty/removed). if nodeAnnotationValue == "" && machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "" { - return machineAnnotations[machineutils.PreserveMachineAnnotationKey] + return machineAnnotations[machineutils.PreserveMachineAnnotationKey], machineAnnotations + } + clonedMachineAnnotations := make(map[string]string) + for k, v := range machineAnnotations { + clonedMachineAnnotations[k] = v } - delete(machineAnnotations, machineutils.PreserveMachineAnnotationKey) - machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = nodeAnnotationValue - return nodeAnnotationValue + delete(clonedMachineAnnotations, machineutils.PreserveMachineAnnotationKey) + clonedMachineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = nodeAnnotationValue + return nodeAnnotationValue, clonedMachineAnnotations } func (c *controller) getNodePreserveAnnotationValue(nodeName string) (string, error) { diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 299eb8037..94ae90c59 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -3998,7 +3998,7 @@ var _ = Describe("machine", func() { }), ) }) - Describe("#reconcilePreservationAnnotations", func() { + Describe("#getEffectivePreservationAnnotations", func() { type setup struct { nodeAnnotationValue string machineAnnotations map[string]string @@ -4013,12 +4013,13 @@ var _ = Describe("machine", func() { expect expect } - DescribeTable("reconcilePreservationAnnotations scenarios", + DescribeTable("getEffectivePreservationAnnotations scenarios", func(tc testCase) { - preserveValue := reconcilePreservationAnnotations(tc.setup.nodeAnnotationValue, tc.setup.machineAnnotations) + + preserveValue, updatedMachineAnnotations := getEffectivePreservationAnnotations(tc.setup.nodeAnnotationValue, tc.setup.machineAnnotations) Expect(preserveValue).To(Equal(tc.expect.effectivePreserveValue)) - Expect(tc.setup.machineAnnotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.PreserveMachineAnnotationKey])) - Expect(tc.setup.machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey])) + Expect(updatedMachineAnnotations[machineutils.PreserveMachineAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.PreserveMachineAnnotationKey])) + Expect(updatedMachineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey]).To(Equal(tc.expect.machineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey])) }, Entry("when node is not annotated and laNodeAnnotationValue is empty, should return machine's annotation value and empty string", testCase{ setup: setup{ diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 21a43b00e..2bdd81e1b 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2416,8 +2416,8 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach return machineObjectUpdated, nil } -// stopMachinePreservationIfPreserved stops the preservation of the machine and node, if preserved, and returns true if machine object has been updated -func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, machine *v1alpha1.Machine, deletePreservationAnnotations bool) (bool, error) { +// stopPreservationIfActive stops the preservation of the machine and node, if preserved, and returns true if machine object has been updated +func (c *controller) stopPreservationIfActive(ctx context.Context, machine *v1alpha1.Machine, removePreservationAnnotations bool) (bool, error) { // removal of preserveExpiryTime is the last step of stopping preservation // therefore, if preserveExpiryTime is not set, machine is not preserved nodeName := machine.Labels[v1alpha1.NodeLabelKey] @@ -2427,9 +2427,9 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac // if there is no backing node if nodeName == "" { // remove annotation from machine if needed - if deletePreservationAnnotations { + if removePreservationAnnotations { var err error - machine, err = c.deletePreserveAnnotationOnMachine(ctx, machine) + machine, err = c.removePreserveAnnotationOnMachine(ctx, machine) if err != nil { return false, err } @@ -2473,13 +2473,13 @@ func (c *controller) stopMachinePreservationIfPreserved(ctx context.Context, mac return false, err } // Step 2: remove annotations from node - err = c.deletePreservationRelatedAnnotationsOnNode(ctx, updatedNode, deletePreservationAnnotations) + err = c.removePreservationRelatedAnnotationsOnNode(ctx, updatedNode, removePreservationAnnotations) if err != nil { return false, err } // Step 3: remove annotation from machine if needed - if deletePreservationAnnotations { - machine, err = c.deletePreserveAnnotationOnMachine(ctx, machine) + if removePreservationAnnotations { + machine, err = c.removePreserveAnnotationOnMachine(ctx, machine) if err != nil { return false, err } @@ -2531,7 +2531,7 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, } // removePreserveAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node -func (c *controller) deletePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { +func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { // Check if annotation already absent if node.Annotations == nil { return nil @@ -2625,7 +2625,7 @@ func shouldPreservedNodeBeDrained(existingCondition *v1.NodeCondition, machinePh if existingCondition == nil { return true } - return existingCondition.Message != v1alpha1.PreservedNodeDrainSuccessful + return !strings.Contains(existingCondition.Message, v1alpha1.PreservedNodeDrainSuccessful) } return false } @@ -2645,7 +2645,7 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine return nil } -func (c *controller) deletePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { +func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { return machine, nil diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go index a093800eb..4c3051139 100644 --- a/pkg/util/provider/machinecontroller/machine_util_test.go +++ b/pkg/util/provider/machinecontroller/machine_util_test.go @@ -4209,7 +4209,7 @@ var _ = Describe("machine_util", func() { ), ) }) - Describe("#stopMachinePreservationIfPreserved", func() { + Describe("#stopPreservationIfActive", func() { type setup struct { nodeName string removePreserveAnnotation bool @@ -4221,7 +4221,7 @@ var _ = Describe("machine_util", func() { setup setup expect expect } - DescribeTable("##stopMachinePreservationIfPreserved behaviour scenarios", + DescribeTable("##stopPreservationIfActive behaviour scenarios", func(tc *testCase) { stop := make(chan struct{}) defer close(stop) @@ -4275,7 +4275,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, controlMachineObjects, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - _, err := c.stopMachinePreservationIfPreserved(context.TODO(), machine, tc.setup.removePreserveAnnotation) + _, err := c.stopPreservationIfActive(context.TODO(), machine, tc.setup.removePreserveAnnotation) if tc.expect.err != nil { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(tc.expect.err.Error())) @@ -4585,7 +4585,7 @@ var _ = Describe("machine_util", func() { }), ) }) - Describe("#deletePreservationRelatedAnnotationsOnNode", func() { + Describe("#removePreservationRelatedAnnotationsOnNode", func() { type setup struct { removePreserveAnnotation bool CAAnnotationPresent bool @@ -4600,7 +4600,7 @@ var _ = Describe("machine_util", func() { setup setup expect expect } - DescribeTable("##deletePreservationRelatedAnnotationsOnNode behaviour scenarios", + DescribeTable("##removePreservationRelatedAnnotationsOnNode behaviour scenarios", func(tc *testCase) { stop := make(chan struct{}) defer close(stop) @@ -4623,7 +4623,7 @@ var _ = Describe("machine_util", func() { c, trackers := createController(stop, testNamespace, nil, nil, targetCoreObjects, nil, false) defer trackers.Stop() waitForCacheSync(stop, c) - err := c.deletePreservationRelatedAnnotationsOnNode(context.TODO(), node, tc.setup.removePreserveAnnotation) + err := c.removePreservationRelatedAnnotationsOnNode(context.TODO(), node, tc.setup.removePreserveAnnotation) waitForCacheSync(stop, c) updatedNode, getErr := c.targetCoreClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) Expect(getErr).To(BeNil()) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 949f16e8a..8a60dd799 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -113,7 +113,10 @@ const ( ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation -var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) +var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse, "") + +// PreventAutoPreserveAnnotationValues contains the values for which a machine will not be auto-preserved on failure +var PreventAutoPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) // RetryPeriod is an alias for specifying the retry period type RetryPeriod time.Duration @@ -157,6 +160,12 @@ func IsMachineTriggeredForDeletion(m *v1alpha1.Machine) bool { return m.Annotations[MachinePriority] == "1" } +// IsMachinePreservationExpired checks if the preserve expiry time has passed for a machine +func IsMachinePreservationExpired(m *v1alpha1.Machine) bool { + t := m.Status.CurrentStatus.PreserveExpiryTime + return t != nil && !t.After(time.Now()) +} + // see https://github.com/kubernetes/kubernetes/issues/21479 type updateMachineFunc func(machine *v1alpha1.Machine) error From dcf0573ef2546822dc503fcf4576b6c8b8153e02 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Mon, 2 Mar 2026 11:35:08 +0530 Subject: [PATCH 75/79] Address review comments --- pkg/controller/machineset.go | 4 +- pkg/controller/machineset_test.go | 9 +--- .../provider/machinecontroller/machine.go | 10 +--- .../machinecontroller/machine_util.go | 47 +++---------------- pkg/util/provider/machinecontroller/node.go | 42 +++++++++++++++++ 5 files changed, 53 insertions(+), 59 deletions(-) diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index d46df3bd0..781177740 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -873,7 +873,7 @@ func (c *controller) shouldFailedMachineBeTerminated(machine *v1alpha1.Machine) preserveValue, err := c.findEffectivePreserveValue(machine) if err != nil { // in case of error fetching node or annotations, we don't want to block deletion of failed machines, so we return true - klog.V(2).Infof("Error finding effective preserve value for machine %q: %v. Proceeding with termination of the machine.", machine.Name, err) + klog.Errorf("error finding effective preserve value for machine %q: %v. Proceeding with termination of the machine.", machine.Name, err) return true } switch preserveValue { @@ -974,7 +974,7 @@ func (c *controller) findEffectivePreserveValue(machine *v1alpha1.Machine) (stri if nodeName != "" { node, err := c.nodeLister.Get(nodeName) if err != nil { - klog.Errorf("Error fetching node %q for machine %q: %v", nodeName, machine.Name, err) + klog.Errorf("error fetching node %q for machine %q: %v", nodeName, machine.Name, err) return "", err } nodeAnnotationValue = node.Annotations[machineutils.PreserveMachineAnnotationKey] diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index fe681a9db..39368a2cb 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1379,8 +1379,6 @@ var _ = Describe("machineset", func() { } }) It("should return the Failed machines first.", func() { - stop := make(chan struct{}) - defer close(stop) diff = 1 filteredMachines := []*machinev1.Machine{testActiveMachine1, testFailedMachine1} machinesToDelete := getMachinesToDelete(filteredMachines, diff) @@ -1389,8 +1387,6 @@ var _ = Describe("machineset", func() { Expect(machinesToDelete[0].Name).To(Equal(testFailedMachine1.Name)) }) It("should prioritise non-preserved machines for deletion.", func() { - stop := make(chan struct{}) - defer close(stop) diff = 2 testPreservedFailedMachine := testFailedMachine1.DeepCopy() testPreservedFailedMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: time.Now().Add(1 * time.Hour)} @@ -1401,8 +1397,6 @@ var _ = Describe("machineset", func() { Expect(machinesToDelete).ToNot(ContainElement(testPreservedFailedMachine)) }) It("should include preserved machine when needed to maintain replica count", func() { - stop := make(chan struct{}) - defer close(stop) diff = 2 testPreservedFailedMachine := testFailedMachine1.DeepCopy() testPreservedFailedMachine.Status.CurrentStatus.PreserveExpiryTime = &metav1.Time{Time: time.Now().Add(1 * time.Hour)} @@ -1932,8 +1926,7 @@ var _ = Describe("machineset", func() { }, }, } - objects := []runtime.Object{} - objects = append(objects, testMachineSet, testMachine1, testMachine2, testMachine3, testMachine4) + objects := []runtime.Object{testMachineSet, testMachine1, testMachine2, testMachine3, testMachine4} for _, m := range tc.setup.additionalMachines { objects = append(objects, m) } diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index 5683f757e..f0298db51 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -70,6 +70,7 @@ func (c *controller) updateMachine(oldObj, newObj any) { _, exists := newMachine.Annotations[machineutils.PreserveMachineAnnotationKey] if exists && oldMachine.Status.CurrentStatus.Phase != newMachine.Status.CurrentStatus.Phase { c.enqueueMachine(newObj, "handling preserved machine phase update") + return } if oldMachine.Generation == newMachine.Generation { @@ -842,12 +843,3 @@ func getEffectivePreservationAnnotations(nodeAnnotationValue string, machineAnno clonedMachineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = nodeAnnotationValue return nodeAnnotationValue, clonedMachineAnnotations } - -func (c *controller) getNodePreserveAnnotationValue(nodeName string) (string, error) { - node, err := c.nodeLister.Get(nodeName) - if err != nil { - klog.Errorf("error fetching node %q: %v", nodeName, err) - return "", err - } - return node.Annotations[machineutils.PreserveMachineAnnotationKey], nil -} diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 2bdd81e1b..6554ede46 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1656,7 +1656,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if forceDeleteMachine { klog.Warningf("Failed to update node conditions: %v. However, since it's a force deletion shall continue deletion of VM.", err) } else { - klog.Errorf("Drain failed due to failure in update of node conditions: %v", err) + klog.Errorf("drain failed due to failure in update of node conditions: %v", err) description = fmt.Sprintf("Drain failed due to failure in update of node conditions - %s. Will retry in next sync. %s", err.Error(), machineutils.InitiateDrain) state = v1alpha1.MachineStateFailed @@ -1715,7 +1715,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver description = fmt.Sprintf("Drain failed due to - %s. However, since it's a force deletion shall continue deletion of VM. %s", err.Error(), machineutils.DelVolumesAttachments) state = v1alpha1.MachineStateProcessing } else { - klog.Warningf("Drain failed for machine %q , providerID %q ,backing node %q. \nBuf:%v \nErrBuf:%v \nErr-Message:%v", machine.Name, getProviderID(machine), getNodeName(machine), buf, errBuf, err) + klog.Errorf("drain failed for machine %q , providerID %q ,backing node %q. \nBuf:%v \nErrBuf:%v \nErr-Message:%v", machine.Name, getProviderID(machine), getNodeName(machine), buf, errBuf, err) description = fmt.Sprintf("Drain failed due to - %s. Will retry in next sync. %s", err.Error(), machineutils.InitiateDrain) state = v1alpha1.MachineStateFailed @@ -2489,7 +2489,7 @@ func (c *controller) stopPreservationIfActive(ctx context.Context, machine *v1al if err != nil { return false, err } - klog.V(2).Infof("Preservation of machine %q has stopped.", machine.Name) + klog.V(2).Infof("Preservation of machine %q and backing node %q has stopped.", machine.Name, nodeName) return true, nil } @@ -2545,7 +2545,7 @@ func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Cont delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) updateRequired = true } - if removePreserveAnnotation { + if removePreserveAnnotation && nodeCopy.Annotations[machineutils.PreserveMachineAnnotationKey] != "" { delete(nodeCopy.Annotations, machineutils.PreserveMachineAnnotationKey) updateRequired = true } @@ -2560,23 +2560,6 @@ func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Cont return nil } -func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string) error { - node, err := c.nodeLister.Get(nodeName) - if err != nil { - return err - } - if !node.Spec.Unschedulable { - return nil - } - nodeClone := node.DeepCopy() - nodeClone.Spec.Unschedulable = false - _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error uncordoning node %q: %v", nodeName, err) - } - return err -} - // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { const preserveExpiryMessageSuffix = "Machine preserved until" @@ -2645,22 +2628,6 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine return nil } -func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - - if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { - return machine, nil - } - clone := machine.DeepCopy() - delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) - delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) - updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) - return nil, err - } - return updatedClone, nil -} - // drainPreservedNode attempts to drain the node backing a preserved machine func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { var ( @@ -2692,7 +2659,7 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M // verify and log node object's existence _, err = c.nodeLister.Get(nodeName) if err == nil { - klog.V(3).Infof("(drainNode) For node %q, machine %q", nodeName, machine.Name) + klog.V(3).Infof("(drainNode) For node %q, machine %q, nodeReadyCondition: %s, readOnlyFileSystemCondition: %s", nodeName, machine.Name, nodeReadyCondition, readOnlyFileSystemCondition) } else if apierrors.IsNotFound(err) { klog.Warningf("(drainNode) Node %q for machine %q doesn't exist, so drain will finish instantly", nodeName, machine.Name) } @@ -2716,7 +2683,7 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M timeOutDuration = 1 * time.Minute maxEvictRetries = 1 klog.V(2).Infof( - "Force delete/drain has been triggerred for machine %q with providerID %q and backing node %q due to timeout:%t", + "Force delete/drain has been triggerred for machine %q with providerID %q and backing node %q. Timeout Occurred:%t", machine.Name, getProviderID(machine), getNodeName(machine), @@ -2763,7 +2730,7 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M klog.V(3).Infof("(drainNode) Invoking RunDrain, timeOutDuration: %s", timeOutDuration) err = drainOptions.RunDrain(ctx) if err != nil { - klog.Warningf("Drain failed for machine %q , providerID %q ,backing node %q. \nBuf:%v \nErrBuf:%v \nErr-Message:%v", machine.Name, getProviderID(machine), getNodeName(machine), buf, errBuf, err) + klog.Errorf("drain failed for machine %q , providerID %q ,backing node %q. \nBuf:%v \nErrBuf:%v \nErr-Message:%v", machine.Name, getProviderID(machine), getNodeName(machine), buf, errBuf, err) return err } if forceDeletePods { diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index 51da3c7e8..1a8258b6f 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -338,3 +338,45 @@ func addedOrRemovedEssentialTaints(oldNode, node *corev1.Node, taintKeys []strin } return false } + +func (c *controller) getNodePreserveAnnotationValue(nodeName string) (string, error) { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + klog.Errorf("error fetching node %q: %v", nodeName, err) + return "", err + } + return node.Annotations[machineutils.PreserveMachineAnnotationKey], nil +} + +func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string) error { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + return err + } + if !node.Spec.Unschedulable { + return nil + } + nodeClone := node.DeepCopy() + nodeClone.Spec.Unschedulable = false + _, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeClone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error uncordoning node %q: %v", nodeName, err) + } + return err +} + +func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { + + if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { + return machine, nil + } + clone := machine.DeepCopy() + delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) + updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) + return nil, err + } + return updatedClone, nil +} From 84a42a4610958a3229aaa301f9e5f01a53ea77d1 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Tue, 3 Mar 2026 09:53:58 +0530 Subject: [PATCH 76/79] Address review comments given by gagan16k --- docs/proposals/machine-preservation.md | 8 ++++---- pkg/controller/machineset_test.go | 9 --------- pkg/util/provider/machinecontroller/machine_util.go | 5 ++++- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/docs/proposals/machine-preservation.md b/docs/proposals/machine-preservation.md index 1ab02cdab..111b7ed64 100644 --- a/docs/proposals/machine-preservation.md +++ b/docs/proposals/machine-preservation.md @@ -65,8 +65,8 @@ In order to achieve the objectives mentioned, the following are proposed: . 7. A user/operator can request MCM to stop preserving a machine/node in `Running:Preserved` or `Failed:Preserved` phase by deleting the annotation: `node.machine.sapcloud.io/preserve`. * MCM will move a machine thus annotated either to `Running` phase or `Terminating` depending on the phase of the machine before it was preserved. -9. Machines of a MachineDeployment in `Preserved` sub-phase will also be counted towards the replica count and in the enforcement of maximum machines allowed for the MachineDeployment. -10. MCM will be modified to perform drain in `Failed` phase for preserved machines. +8. Machines of a MachineDeployment in `Preserved` sub-phase will also be counted towards the replica count and in the enforcement of maximum machines allowed for the MachineDeployment. +9. MCM will be modified to perform drain in `Failed` phase for preserved machines. ## State Diagrams: @@ -153,9 +153,9 @@ In order to achieve the objectives mentioned, the following are proposed: 5. If the backing Node object exists but does not have the preservation annotation, preservation annotations added on the Machine will be honoured. 6. However, if a backing Node exists for a Machine and has the preservation annotation, the Node's annotation value will override the Machine annotation value. 7. If `autoPreserveFailedMachineMax` is reduced in the Shoot Spec, older machines are moved to `Terminating` phase before newer ones. -8. In case of a scale down of an MCD's replica count, `Preserved` machines will be the last to be scaled down. Replica count will always be honoured. 9 +8. In case of a scale down of an MCD's replica count, `Preserved` machines will be the last to be scaled down. Replica count will always be honoured. 9. On increase/decrease of `machinePreserveTimeout`, the new value will only apply to machines that go into `Preserved` phase after the change. Operators can always edit `machine.CurrentStatus.PreserveExpiryTime` to prolong the expiry time of existing `Preserved` machines. -11. [Modify CA FAQ](https://github.com/gardener/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node) once feature is developed to use `node.machine.sapcloud.io/preserve=now` instead of the `cluster-autoscaler.kubernetes.io/scale-down-disabled=true` currently suggested. This would: +10. [Modify CA FAQ](https://github.com/gardener/autoscaler/blob/master/cluster-autoscaler/FAQ.md#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node) once feature is developed to use `node.machine.sapcloud.io/preserve=now` instead of the `cluster-autoscaler.kubernetes.io/scale-down-disabled=true` currently suggested. This would: - harmonise machine flow - shield from CA's internals - make it generic and no longer CA specific diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 39368a2cb..7ca81763b 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -2006,15 +2006,6 @@ var _ = Describe("machineset", func() { preservedMachineCount: 2, }, }), - Entry("should not trigger auto preservation of failed machine annotated with preserve=false even if AutoPreserveFailedMachineCount < AutoPreserveFailedMachineMax", testCase{ - setup: setup{ - autoPreserveFailedMachineCount: 0, - autoPreserveFailedMachineMax: 3, - }, - expect: expect{ - preservedMachineCount: 2, - }, - }), Entry("should stop auto preservation of machines annotated with preserve=auto-preserve if AutoPreserveFailedMachineCount > AutoPreserveFailedMachineMax", testCase{ setup: setup{ autoPreserveFailedMachineCount: 1, diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 6554ede46..bb77e4abe 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2520,6 +2520,9 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, // Add annotation to disable CA scale down. // Also add annotation expressing that MCM is the one who added this annotation, so that it can be removed safely when preservation is stopped. nodeCopy := node.DeepCopy() + if node.Annotations == nil { + nodeCopy.Annotations = make(map[string]string) + } nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) @@ -2530,7 +2533,7 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, return updatedNode, nil } -// removePreserveAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node +// removePreservationRelatedAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { // Check if annotation already absent if node.Annotations == nil { From b00ca030081c384c100246e3b325a630d77e52d5 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Fri, 6 Mar 2026 13:56:50 +0530 Subject: [PATCH 77/79] Address review comments given by aaronfern --- .../machinecontroller/machine_util.go | 35 ++----------------- pkg/util/provider/machinecontroller/node.go | 31 ++++++++++++++++ 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index bb77e4abe..a75e978ce 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -2489,7 +2489,7 @@ func (c *controller) stopPreservationIfActive(ctx context.Context, machine *v1al if err != nil { return false, err } - klog.V(2).Infof("Preservation of machine %q and backing node %q has stopped.", machine.Name, nodeName) + klog.V(2).Infof("Preservation of machine %q with backing node %q has stopped.", machine.Name, nodeName) return true, nil } @@ -2533,36 +2533,6 @@ func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, return updatedNode, nil } -// removePreservationRelatedAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node -func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *v1.Node, removePreserveAnnotation bool) error { - // Check if annotation already absent - if node.Annotations == nil { - return nil - } - updateRequired := false - nodeCopy := node.DeepCopy() - // If CA scale-down disabled annotation was added by MCM, it can be safely removed. - // If the annotation was added by some other entity, then it should not be removed. - if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue { - delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) - delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) - updateRequired = true - } - if removePreserveAnnotation && nodeCopy.Annotations[machineutils.PreserveMachineAnnotationKey] != "" { - delete(nodeCopy.Annotations, machineutils.PreserveMachineAnnotationKey) - updateRequired = true - } - if !updateRequired { - return nil - } - _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) - return err - } - return nil -} - // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { const preserveExpiryMessageSuffix = "Machine preserved until" @@ -2686,11 +2656,12 @@ func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.M timeOutDuration = 1 * time.Minute maxEvictRetries = 1 klog.V(2).Infof( - "Force delete/drain has been triggerred for machine %q with providerID %q and backing node %q. Timeout Occurred:%t", + "Force delete/drain has been triggerred for machine %q with providerID %q and backing node %q. Timeout Occurred:%t, force-drain label present:%t", machine.Name, getProviderID(machine), getNodeName(machine), timeOutOccurred, + forceDrainLabelPresent, ) } else { klog.V(2).Infof( diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index 1a8258b6f..da953e1cd 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -10,6 +10,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" "time" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" @@ -380,3 +381,33 @@ func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, mach } return updatedClone, nil } + +// removePreservationRelatedAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node +func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *corev1.Node, removePreserveAnnotation bool) error { + // Check if annotation already absent + if node.Annotations == nil { + return nil + } + updateRequired := false + nodeCopy := node.DeepCopy() + // If CA scale-down disabled annotation was added by MCM, it can be safely removed. + // If the annotation was added by some other entity, then it should not be removed. + if nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue { + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey) + delete(nodeCopy.Annotations, autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey) + updateRequired = true + } + if removePreserveAnnotation && nodeCopy.Annotations[machineutils.PreserveMachineAnnotationKey] != "" { + delete(nodeCopy.Annotations, machineutils.PreserveMachineAnnotationKey) + updateRequired = true + } + if !updateRequired { + return nil + } + _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("node UPDATE failed for node %q. Retrying, error: %s", node.Name, err) + return err + } + return nil +} From 1538501d2ceefeee47776946e147f3913fa0a6ae Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 11 Mar 2026 12:41:13 +0530 Subject: [PATCH 78/79] Address review comments by takoverflow - part 1 --- docs/usage/machine-preservation.md | 16 ++++---- pkg/controller/controller_utils.go | 13 ------ pkg/controller/machineset.go | 4 +- pkg/controller/machineset_test.go | 39 ------------------ .../provider/machinecontroller/machine.go | 24 +++++------ .../machinecontroller/machine_util.go | 41 +++++++------------ pkg/util/provider/machinecontroller/node.go | 38 +++++++++-------- pkg/util/provider/machineutils/utils.go | 17 ++++---- 8 files changed, 70 insertions(+), 122 deletions(-) diff --git a/docs/usage/machine-preservation.md b/docs/usage/machine-preservation.md index bb5029bf3..6e0400c44 100644 --- a/docs/usage/machine-preservation.md +++ b/docs/usage/machine-preservation.md @@ -27,7 +27,8 @@ A preserved machine/node has the following properties: - If a machine has no backing node, only `PreserveExpiryTime` is set. - If the machine is preserved and in `Failed` phase: - The `PreserveExpiryTime` is set in the machine's status to indicate when preservation will end. - - The CA scale-down-disabled annotation is added. + - The CA scale-down-disabled annotation is added on the backing node, + - The backing node is cordoned to prevent scheduling of new pods on it. - The backing node is drained of all pods but the daemonset pods remain. - The `NodeCondition` of `Type=Preserved` is updated to show that the preservation was successful. - If a machine has no backing node, only `PreserveExpiryTime` is set. @@ -65,12 +66,11 @@ kind: Shoot spec: workers: - cri: - name: containerd + name: containerd name: worker1 autoPreserveFailedMachineMax: 1 machineControllerManager: - machinePreserveTimeout: 72h - + machinePreserveTimeout: 72h ``` #### Configuration Semantics @@ -100,9 +100,8 @@ annotation key: `node.machine.sapcloud.io/preserve` ### ⚠️ Preservation Annotation semantics: Both node and machine objects can be annotated for preservation. -However, if both machine and node have the preservation annotation, the node's annotation value (even if set to "") is honoured and the machine's annotation is deleted. -To prevent confusion and unintended behaviour, it is advised to use the feature by annotating only the node or the machine, and not both. - +However, if both machine and node have the preservation annotation, the node's annotation value (even if set to "") is honoured and the machine's annotation is deleted. +To prevent confusion and unintended behaviour, it is recommended to use preservation by annotating the node object, if it exists and can be accessed. When the `PreserveExpiryTime` of a preserved machine is reached, the preservation will be stopped. Additionally, the preservation annotation is removed to prevent undesired re-preservation of the same machine. This is applicable for both manual and auto-preservation. When a machine is annotated with value `false`, the annotation and value is not removed by MCM. This is to explicitly indicate that the machine should not be auto-preserved by MCM. If the annotation value is set to empty or the annotation is deleted, MCM will again auto-preserve the machine if it is in `Failed` phase and the `AutoPreserveFailedMachineMax` limit is not reached. @@ -147,3 +146,6 @@ In all the cases, when the machine moves to `Running` during preservation, the b - Scale-down preference: Preserved machines are the last to be scaled down. - Preservation status is visible via Node Conditions and Machine Status fields. - machinePreserveTimeout changes do not affect existing preserved machines. Operators may edit PreserveExpiryTime directly if required to extend preservation. + + +> NOTE: To prevent confusion and unintended behaviour, it is recommended to use preservation by annotating the node object, if it exists and can be accessed. diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 2bb9d1c21..acd4df5c2 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -797,19 +797,6 @@ func (s ActiveMachines) Less(i, j int) bool { return false } -// AutoPreservedMachines type allows custom sorting of machines so a controller can pick the best ones to delete. -type AutoPreservedMachines []*v1alpha1.Machine - -func (s AutoPreservedMachines) Len() int { return len(s) } -func (s AutoPreservedMachines) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -func (s AutoPreservedMachines) Less(i, j int) bool { - if s[i].CreationTimestamp != s[j].CreationTimestamp { - return s[i].CreationTimestamp.Before(&s[j].CreationTimestamp) - } - return false -} - // MachineKey is the function used to get the machine name from machine object // ToCheck : as machine-namespace does not matter func MachineKey(machine *v1alpha1.Machine) string { diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 781177740..4f30bb29d 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -936,7 +936,9 @@ func (c *controller) stopAutoPreservationForMachines(ctx context.Context, machin return numToStop } if numOfAutoPreservedMachines > numToStop { - sort.Sort(AutoPreservedMachines(autoPreservedMachines)) + sort.Slice(autoPreservedMachines, func(i, j int) bool { + return autoPreservedMachines[i].CreationTimestamp.Before(&autoPreservedMachines[j].CreationTimestamp) + }) } for index, m := range autoPreservedMachines { if numToStop == 0 { diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 7ca81763b..08365e98d 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" corev1 "k8s.io/api/core/v1" - "sort" "sync" "time" @@ -2034,44 +2033,6 @@ var _ = Describe("machineset", func() { }), ) }) - Describe("#AutoPreservedMachinesSorting ", func() { - It("should sort auto-preserved failed machines in the order of increasing creation timestamp", func() { - machines := []*machinev1.Machine{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, - CreationTimestamp: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-2", - Namespace: testNamespace, - CreationTimestamp: metav1.Time{Time: time.Now().Add(-4 * time.Hour)}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-3", - Namespace: testNamespace, - CreationTimestamp: metav1.Time{Time: time.Now().Add(-3 * time.Hour)}, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-4", - Namespace: testNamespace, - CreationTimestamp: metav1.Time{Time: time.Now().Add(-5 * time.Hour)}, - }, - }, - } - sort.Sort(AutoPreservedMachines(machines)) - for index := range machines[:len(machines)-1] { - Expect(machines[index].CreationTimestamp.Time.Before(machines[index+1].CreationTimestamp.Time)).To(BeTrue()) - } - }) - }) Describe("#shouldFailedMachineBeTerminated", func() { type setup struct { diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index f0298db51..1a4b23494 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -749,13 +749,13 @@ func (c *controller) isCreationProcessing(machine *v1alpha1.Machine) bool { // manageMachinePreservation manages machine preservation based on the preserve annotation values on the node and machine objects. func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1alpha1.Machine) (retry machineutils.RetryPeriod, err error) { - machineAnnotationsUpdated := false + machineAnnotationsSynced := false clone := machine.DeepCopy() defer func() { // This needs to be done for cases when machine is neither preserved nor un-preserved (e.g. when preserve changes from now to when-failed on a Failed machine), - // but the LastAppliedNodePreserveValueAnnotation needs to be updated. + // but the LastAppliedNodePreserveValueAnnotation needs to be synced to the machine object. // We compare annotation value in the clone with the original machine object to see if an update is required - if err == nil && !machineAnnotationsUpdated && clone.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] { + if err == nil && !machineAnnotationsSynced && clone.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] != machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] { _, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) if err != nil { klog.Errorf("error updating LastAppliedNodePreserveValueAnnotation value on machine %q: %v", machine.Name, err) @@ -792,28 +792,28 @@ func (c *controller) manageMachinePreservation(ctx context.Context, machine *v1a } switch effectivePreserveValue { case "", machineutils.PreserveMachineAnnotationValueFalse: - machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, false) + machineAnnotationsSynced, err = c.stopPreservationIfActive(ctx, clone, false) case machineutils.PreserveMachineAnnotationValueWhenFailed: if !machineutils.IsMachineFailed(clone) || machineutils.IsMachinePreservationExpired(clone) { - machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, false) + machineAnnotationsSynced, err = c.stopPreservationIfActive(ctx, clone, false) } else { - machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + machineAnnotationsSynced, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } case machineutils.PreserveMachineAnnotationValueNow: if machineutils.IsMachinePreservationExpired(clone) { // on timing out, remove preserve annotation to prevent incorrect re-preservation - machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, true) + machineAnnotationsSynced, err = c.stopPreservationIfActive(ctx, clone, true) } else { - machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + machineAnnotationsSynced, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } case machineutils.PreserveMachineAnnotationValuePreservedByMCM: if !machineutils.IsMachineFailed(clone) || machineutils.IsMachinePreservationExpired(clone) { // To prevent incorrect re-preservation of a recovered, previously auto-preserved machine on future failures // (since the autoPreserveFailedMachineCount maintained by the machineSetController, may have changed), // in addition to stopping preservation, we also remove the preservation annotation on the machine. - machineAnnotationsUpdated, err = c.stopPreservationIfActive(ctx, clone, true) + machineAnnotationsSynced, err = c.stopPreservationIfActive(ctx, clone, true) } else { - machineAnnotationsUpdated, err = c.preserveMachine(ctx, clone, effectivePreserveValue) + machineAnnotationsSynced, err = c.preserveMachine(ctx, clone, effectivePreserveValue) } } if err != nil { @@ -836,9 +836,7 @@ func getEffectivePreservationAnnotations(nodeAnnotationValue string, machineAnno return machineAnnotations[machineutils.PreserveMachineAnnotationKey], machineAnnotations } clonedMachineAnnotations := make(map[string]string) - for k, v := range machineAnnotations { - clonedMachineAnnotations[k] = v - } + maps.Copy(clonedMachineAnnotations, machineAnnotations) delete(clonedMachineAnnotations, machineutils.PreserveMachineAnnotationKey) clonedMachineAnnotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] = nodeAnnotationValue return nodeAnnotationValue, clonedMachineAnnotations diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index a75e978ce..969e85797 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -28,7 +28,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/gardener/machine-controller-manager/pkg/controller/autoscaler" "maps" "math" "runtime" @@ -2387,8 +2386,6 @@ func (c *controller) preserveMachine(ctx context.Context, machine *v1alpha1.Mach if existingNodePreservedCondition != nil && existingNodePreservedCondition.Status == v1.ConditionTrue { return machineObjectUpdated, nil } - // Preservation incomplete - either the flow is being run for the first time, or previous attempt failed midway - // Step 2: Add annotations to prevent scale down of node by CA updatedNode, err := c.addCAScaleDownDisabledAnnotationOnNode(ctx, node) if err != nil { @@ -2511,28 +2508,6 @@ func (c *controller) setPreserveExpiryTimeOnMachine(ctx context.Context, machine return updatedMachine, nil } -// addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node -func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *v1.Node) (*v1.Node, error) { - // Check if annotation already exists with correct value - if node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { - return node, nil - } - // Add annotation to disable CA scale down. - // Also add annotation expressing that MCM is the one who added this annotation, so that it can be removed safely when preservation is stopped. - nodeCopy := node.DeepCopy() - if node.Annotations == nil { - nodeCopy.Annotations = make(map[string]string) - } - nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue - nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue - updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("error trying to update CA annotation on node %q: %v", node.Name, err) - return nil, err - } - return updatedNode, nil -} - // computeNewNodePreservedCondition returns the NodeCondition with the values set according to the preserveValue and the stage of Preservation func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, preserveValue string, drainErr error, existingNodeCondition *v1.NodeCondition) (*v1.NodeCondition, bool) { const preserveExpiryMessageSuffix = "Machine preserved until" @@ -2551,7 +2526,6 @@ func computeNewNodePreservedCondition(currentStatus v1alpha1.CurrentStatus, pres machinePhase := currentStatus.Phase if machinePhase == v1alpha1.MachineFailed { if drainErr == nil { - if !strings.Contains(newNodePreservedCondition.Message, v1alpha1.PreservedNodeDrainSuccessful) { newNodePreservedCondition.Message = fmt.Sprintf("%s %s %v.", v1alpha1.PreservedNodeDrainSuccessful, preserveExpiryMessageSuffix, currentStatus.PreserveExpiryTime) newNodePreservedCondition.Status = v1.ConditionTrue @@ -2601,6 +2575,21 @@ func (c *controller) clearMachinePreserveExpiryTime(ctx context.Context, machine return nil } +func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { + if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { + return machine, nil + } + clone := machine.DeepCopy() + delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) + delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) + updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) + return nil, err + } + return updatedClone, nil +} + // drainPreservedNode attempts to drain the node backing a preserved machine func (c *controller) drainPreservedNode(ctx context.Context, machine *v1alpha1.Machine) error { var ( diff --git a/pkg/util/provider/machinecontroller/node.go b/pkg/util/provider/machinecontroller/node.go index da953e1cd..e701bc146 100644 --- a/pkg/util/provider/machinecontroller/node.go +++ b/pkg/util/provider/machinecontroller/node.go @@ -366,22 +366,6 @@ func (c *controller) uncordonNodeIfCordoned(ctx context.Context, nodeName string return err } -func (c *controller) removePreserveAnnotationOnMachine(ctx context.Context, machine *v1alpha1.Machine) (*v1alpha1.Machine, error) { - - if machine.Annotations == nil || (machine.Annotations[machineutils.PreserveMachineAnnotationKey] == "" && machine.Annotations[machineutils.LastAppliedNodePreserveValueAnnotationKey] == "") { - return machine, nil - } - clone := machine.DeepCopy() - delete(clone.Annotations, machineutils.PreserveMachineAnnotationKey) - delete(clone.Annotations, machineutils.LastAppliedNodePreserveValueAnnotationKey) - updatedClone, err := c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{}) - if err != nil { - klog.Errorf("failed to delete preserve annotation on machine %q. error: %v", machine.Name, err) - return nil, err - } - return updatedClone, nil -} - // removePreservationRelatedAnnotationsOnNode removes the cluster-autoscaler annotation that disables scale down of preserved node func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Context, node *corev1.Node, removePreserveAnnotation bool) error { // Check if annotation already absent @@ -411,3 +395,25 @@ func (c *controller) removePreservationRelatedAnnotationsOnNode(ctx context.Cont } return nil } + +// addCAScaleDownDisabledAnnotationOnNode adds the cluster-autoscaler annotation to disable scale down of preserved node +func (c *controller) addCAScaleDownDisabledAnnotationOnNode(ctx context.Context, node *corev1.Node) (*corev1.Node, error) { + // Check if annotation already exists with correct value + if node.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] == autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue { + return node, nil + } + // Add annotation to disable CA scale down. + // Also add annotation expressing that MCM is the one who added this annotation, so that it can be removed safely when preservation is stopped. + nodeCopy := node.DeepCopy() + if node.Annotations == nil { + nodeCopy.Annotations = make(map[string]string) + } + nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationValue + nodeCopy.Annotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey] = autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMValue + updatedNode, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{}) + if err != nil { + klog.Errorf("error trying to update CA annotation on node %q: %v", node.Name, err) + return nil, err + } + return updatedNode, nil +} diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 8a60dd799..6316e2982 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -95,27 +95,30 @@ const ( LastAppliedNodePreserveValueAnnotationKey = "node.machine.sapcloud.io/last-applied-node-preserve-value" // PreserveMachineAnnotationValueNow is the annotation value used to explicitly request that - // a Machine be preserved immediately in its current phase + // a Machine be preserved immediately, irrespective of its current phase, and its phase is not changed + // on preservation PreserveMachineAnnotationValueNow = "now" // PreserveMachineAnnotationValueWhenFailed is the annotation value used to explicitly request that - // a Machine be preserved if and when in it enters Failed phase + // a Machine be preserved if and when it enters Failed phase PreserveMachineAnnotationValueWhenFailed = "when-failed" - // PreserveMachineAnnotationValuePreservedByMCM is the annotation value used to explicitly request that - // a Machine be preserved if and when in it enters Failed phase. + // PreserveMachineAnnotationValuePreservedByMCM is the annotation value used by the machineset controller to + // indicate to the machine controller that the machine must be auto-preserved. // The AutoPreserveFailedMachineMax, set on the MCD, is enforced based on the number of machines annotated with this value. PreserveMachineAnnotationValuePreservedByMCM = "auto-preserved" - // PreserveMachineAnnotationValueFalse is the annotation value used to indicate to MCM that a machine must not be auto-preserved - // on failure. + // PreserveMachineAnnotationValueFalse is the annotation value used to + // 1) indicate to MCM that a machine must not be auto-preserved on failure + // and, 2) to stop auto-preservation of a machine that is already auto-preserved by MCM. PreserveMachineAnnotationValueFalse = "false" ) // AllowedPreserveAnnotationValues contains the allowed values for the preserve annotation var AllowedPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse, "") -// PreventAutoPreserveAnnotationValues contains the values for which a machine will not be auto-preserved on failure +// PreventAutoPreserveAnnotationValues contains the values to check if a machine is already annotated for preservation, +// in which case it should not be auto-preserved. var PreventAutoPreserveAnnotationValues = sets.New(PreserveMachineAnnotationValueNow, PreserveMachineAnnotationValueWhenFailed, PreserveMachineAnnotationValuePreservedByMCM, PreserveMachineAnnotationValueFalse) // RetryPeriod is an alias for specifying the retry period From 9abff82f42627e72d63f87017bec7528f28d4bf0 Mon Sep 17 00:00:00 2001 From: thiyyakat Date: Wed, 11 Mar 2026 20:09:15 +0530 Subject: [PATCH 79/79] Address review comments by gagan16k - part 2 --- docs/usage/machine-preservation.md | 4 ++- pkg/controller/deployment_rolling.go | 4 +-- pkg/controller/machineset.go | 46 +++++++++++++++---------- pkg/controller/machineset_test.go | 20 ++++++----- pkg/util/provider/machineutils/utils.go | 1 - 5 files changed, 44 insertions(+), 31 deletions(-) diff --git a/docs/usage/machine-preservation.md b/docs/usage/machine-preservation.md index 6e0400c44..01301e9bf 100644 --- a/docs/usage/machine-preservation.md +++ b/docs/usage/machine-preservation.md @@ -78,6 +78,8 @@ spec: - `machinePreserveTimeout` : Duration after which preserved machines are automatically released > Note: ⚠️ Changes to `machinePreserveTimeout` apply only to preservation done after the change. +> If `AutoPreserveFailedMachineMax` is decreased, preservation is stopped for older auto-preserved machines(earlier creationTimestamp) until the number of preserved machines is within the new limit. +> If the number of failed machines exceeds the `AutoPreserveFailedMachineMax` limit at any given time, machines with more recent creationTimestamp are auto-preserved first. ### Preservation annotations @@ -148,4 +150,4 @@ In all the cases, when the machine moves to `Running` during preservation, the b - machinePreserveTimeout changes do not affect existing preserved machines. Operators may edit PreserveExpiryTime directly if required to extend preservation. -> NOTE: To prevent confusion and unintended behaviour, it is recommended to use preservation by annotating the node object, if it exists and can be accessed. +> NOTE: To prevent confusion and unintended behaviour, it is recommended to use preservation by annotating the node object if it exists and can be accessed. diff --git a/pkg/controller/deployment_rolling.go b/pkg/controller/deployment_rolling.go index 325f93193..3a6a14e5d 100644 --- a/pkg/controller/deployment_rolling.go +++ b/pkg/controller/deployment_rolling.go @@ -481,11 +481,11 @@ func (dc *controller) removeAutoscalerAnnotationsIfRequired(ctx context.Context, return err } // Remove the autoscaler-related annotation only if the by-mcm annotation is already set. If - // by-mcm annotation is not set, the original annotation is likely be put by the end-user for their usecases. + // by-mcm annotation is not set, the original annotation is likely be put by the end-user for their use cases. if _, exists := nodeAnnotations[autoscaler.ClusterAutoscalerScaleDownDisabledAnnotationByMCMKey]; exists { // do not remove the autoscaler related annotation if it is added due to ongoing machine preservation. if !machine.Status.CurrentStatus.PreserveExpiryTime.IsZero() { - return nil + continue } err = RemoveAnnotationsOffNode( ctx, diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index 4f30bb29d..cf4abbf20 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -901,27 +901,34 @@ func (c *controller) manageAutoPreservationOfFailedMachines(ctx context.Context, } return machines } - for index, m := range machines { - if machineutils.IsMachineFailed(m) { - // check if machine is already annotated for preservation, if yes, skip. Machine controller will take care of the rest. - if machineutils.PreventAutoPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { - continue - } - if autoPreservationCapacityRemaining == 0 { - break - } - klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) - updatedMachine, err := machineutils.UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, addAutoPreserveAnnotationOnMachine) - if err != nil { - klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) - // since addAutoPreserveAnnotation uses retries internally, on error we can continue with other machines - continue - } - machines[index] = updatedMachine - autoPreservationCapacityRemaining-- + var autoPreservationCandidates []*v1alpha1.Machine + var others []*v1alpha1.Machine + for _, m := range machines { + // check if machine is already annotated for preservation, if yes, skip. Machine controller will take care of the rest. + if machineutils.IsMachineFailed(m) && !machineutils.PreventAutoPreserveAnnotationValues.Has(m.Annotations[machineutils.PreserveMachineAnnotationKey]) { + autoPreservationCandidates = append(autoPreservationCandidates, m) + } else { + others = append(others, m) + } + } + sort.Slice(autoPreservationCandidates, func(i, j int) bool { + return autoPreservationCandidates[i].CreationTimestamp.After(autoPreservationCandidates[j].CreationTimestamp.Time) + }) + for index, m := range autoPreservationCandidates { + if autoPreservationCapacityRemaining == 0 { + break + } + klog.V(2).Infof("Annotating failed machine %q for auto-preservation as part of machine set %q", m.Name, machineSet.Name) + updatedMachine, err := machineutils.UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, addAutoPreserveAnnotationOnMachine) + if err != nil { + klog.V(2).Infof("Error annotating machine %q for auto-preservation: %v", m.Name, err) + // since addAutoPreserveAnnotation uses retries internally, on error we can continue with other machines + continue } + autoPreservationCandidates[index] = updatedMachine + autoPreservationCapacityRemaining-- } - return machines + return append(autoPreservationCandidates, others...) } func (c *controller) stopAutoPreservationForMachines(ctx context.Context, machines []*v1alpha1.Machine, numToStop int) int { @@ -947,6 +954,7 @@ func (c *controller) stopAutoPreservationForMachines(ctx context.Context, machin klog.V(2).Infof("Removing auto-preservation annotation from machine %q as AutoPreserveFailedMachineMax is breached", m.Name) updatedMachine, err := machineutils.UpdateMachineWithRetries(ctx, c.controlMachineClient.Machines(m.Namespace), c.machineLister, m.Namespace, m.Name, removeAutoPreserveAnnotationFromMachine) if err != nil { + klog.Warningf("Error removing %q=%q annotation from machine %q: %v.", machineutils.PreserveMachineAnnotationKey, machineutils.PreserveMachineAnnotationValuePreservedByMCM, m.Name, err) continue } autoPreservedMachines[index] = updatedMachine diff --git a/pkg/controller/machineset_test.go b/pkg/controller/machineset_test.go index 08365e98d..7f807b352 100644 --- a/pkg/controller/machineset_test.go +++ b/pkg/controller/machineset_test.go @@ -1880,8 +1880,9 @@ var _ = Describe("machineset", func() { } testMachine1 := &machinev1.Machine{ ObjectMeta: metav1.ObjectMeta{ - Name: "machine-1", - Namespace: testNamespace, + Name: "machine-1", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-2 * time.Hour)}, }, Status: machinev1.MachineStatus{ CurrentStatus: machinev1.CurrentStatus{ @@ -1891,8 +1892,9 @@ var _ = Describe("machineset", func() { } testMachine2 := &machinev1.Machine{ ObjectMeta: metav1.ObjectMeta{ - Name: "machine-2", - Namespace: testNamespace, + Name: "machine-2", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, }, Status: machinev1.MachineStatus{ CurrentStatus: machinev1.CurrentStatus{ @@ -1902,8 +1904,9 @@ var _ = Describe("machineset", func() { } testMachine3 := &machinev1.Machine{ ObjectMeta: metav1.ObjectMeta{ - Name: "machine-3", - Namespace: testNamespace, + Name: "machine-3", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-2 * time.Hour)}, }, Status: machinev1.MachineStatus{ CurrentStatus: machinev1.CurrentStatus{ @@ -1913,8 +1916,9 @@ var _ = Describe("machineset", func() { } testMachine4 := &machinev1.Machine{ ObjectMeta: metav1.ObjectMeta{ - Name: "machine-4", - Namespace: testNamespace, + Name: "machine-4", + Namespace: testNamespace, + CreationTimestamp: metav1.Time{Time: time.Now().Add(-2 * time.Hour)}, Annotations: map[string]string{ machineutils.PreserveMachineAnnotationKey: machineutils.PreserveMachineAnnotationValueFalse, }, diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 6316e2982..9755d1c0e 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -173,7 +173,6 @@ func IsMachinePreservationExpired(m *v1alpha1.Machine) bool { type updateMachineFunc func(machine *v1alpha1.Machine) error // UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored. -// The returned bool value can be used to tell if the machine is actually updated. func UpdateMachineWithRetries(ctx context.Context, machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) { var machine *v1alpha1.Machine