From 42575813b01d2cbacfec6cdd532458fe98090d17 Mon Sep 17 00:00:00 2001 From: vvoeroes Date: Tue, 3 Mar 2026 15:42:06 +0100 Subject: [PATCH 1/4] Feature: Make Retry Period configurable --- cmd/machine-controller-manager/app/controllermanager.go | 9 +++++++++ cmd/machine-controller-manager/app/options/options.go | 3 +++ pkg/util/provider/machineutils/utils.go | 4 +++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cmd/machine-controller-manager/app/controllermanager.go b/cmd/machine-controller-manager/app/controllermanager.go index ba8adf232..92ce6a3fe 100644 --- a/cmd/machine-controller-manager/app/controllermanager.go +++ b/cmd/machine-controller-manager/app/controllermanager.go @@ -37,6 +37,7 @@ import ( mcmcontroller "github.com/gardener/machine-controller-manager/pkg/controller" corecontroller "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/core" machinecontroller "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/machine" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" kubernetesinformers "k8s.io/client-go/informers" kubescheme "k8s.io/client-go/kubernetes/scheme" @@ -218,6 +219,14 @@ func StartControllers(s *options.MCMServer, recorder record.EventRecorder, stop <-chan struct{}) error { + if s.LongRetryOverride != "" { + d, err := time.ParseDuration(s.LongRetryOverride) + if err != nil { + return fmt.Errorf("invalid --long-retry %q: %w", s.LongRetryOverride, err) + } + machineutils.LongRetry = machineutils.RetryPeriod(d) + } + klog.V(4).Infof("Configured LongRetry=%s", time.Duration(machineutils.LongRetry)) klog.V(4).Info("Getting available resources") availableResources, err := getAvailableResources(controlCoreClientBuilder) if err != nil { diff --git a/cmd/machine-controller-manager/app/options/options.go b/cmd/machine-controller-manager/app/options/options.go index 2fb51d051..71525e8f3 100644 --- a/cmd/machine-controller-manager/app/options/options.go +++ b/cmd/machine-controller-manager/app/options/options.go @@ -46,6 +46,7 @@ type MCMServer struct { ControlKubeconfig string TargetKubeconfig string + LongRetryOverride string } // NewMCMServer creates a new MCMServer with a default config. @@ -101,6 +102,8 @@ func (s *MCMServer) AddFlags(fs *pflag.FlagSet) { fs.BoolVar(&s.AutoscalerScaleDownAnnotationDuringRollout, "autoscaler-scaledown-annotation-during-rollout", true, "Add cluster autoscaler scale-down disabled annotation during roll-out.") + fs.StringVar(&s.LongRetryOverride, "long-retry", "", "Override machineutils.LongRetry.") + logs.AddFlags(fs) // Here `logs` is `k8s.io/component-base/logs`. leaderelectionconfig.BindFlags(&s.LeaderElection, fs) diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 141e3a09c..31ea59b29 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -95,9 +95,11 @@ const ( // MediumRetry tells the controller to retry after a medium duration - 3 minutes MediumRetry RetryPeriod = RetryPeriod(3 * time.Minute) // LongRetry tells the controller to retry after a long duration - 10 minutes - LongRetry RetryPeriod = RetryPeriod(10 * time.Minute) + DefaultLongRetry RetryPeriod = RetryPeriod(10 * time.Minute) ) +var LongRetry RetryPeriod = DefaultLongRetry + // EssentialTaints are taints on node object which if added/removed, require an immediate reconcile by machine controller // TODO: update this when taints for ALT updation and PostCreate operations is introduced. var EssentialTaints = []string{TaintNodeCriticalComponentsNotReady} From 6ccfa21c4053ad4af40d0024acf347902d79e7a0 Mon Sep 17 00:00:00 2001 From: vvoeroes Date: Fri, 6 Mar 2026 14:54:30 +0100 Subject: [PATCH 2/4] make it a provider-scope retry period --- .../app/controllermanager.go | 9 --------- .../app/options/options.go | 3 --- pkg/util/provider/app/app.go | 12 ++++++++++++ pkg/util/provider/app/options/options.go | 7 +++++-- pkg/util/provider/machinecontroller/controller.go | 5 +++++ pkg/util/provider/machinecontroller/machine_util.go | 2 +- pkg/util/provider/machineutils/utils.go | 4 +--- 7 files changed, 24 insertions(+), 18 deletions(-) diff --git a/cmd/machine-controller-manager/app/controllermanager.go b/cmd/machine-controller-manager/app/controllermanager.go index 92ce6a3fe..ba8adf232 100644 --- a/cmd/machine-controller-manager/app/controllermanager.go +++ b/cmd/machine-controller-manager/app/controllermanager.go @@ -37,7 +37,6 @@ import ( mcmcontroller "github.com/gardener/machine-controller-manager/pkg/controller" corecontroller "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/core" machinecontroller "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/machine" - "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" kubernetesinformers "k8s.io/client-go/informers" kubescheme "k8s.io/client-go/kubernetes/scheme" @@ -219,14 +218,6 @@ func StartControllers(s *options.MCMServer, recorder record.EventRecorder, stop <-chan struct{}) error { - if s.LongRetryOverride != "" { - d, err := time.ParseDuration(s.LongRetryOverride) - if err != nil { - return fmt.Errorf("invalid --long-retry %q: %w", s.LongRetryOverride, err) - } - machineutils.LongRetry = machineutils.RetryPeriod(d) - } - klog.V(4).Infof("Configured LongRetry=%s", time.Duration(machineutils.LongRetry)) klog.V(4).Info("Getting available resources") availableResources, err := getAvailableResources(controlCoreClientBuilder) if err != nil { diff --git a/cmd/machine-controller-manager/app/options/options.go b/cmd/machine-controller-manager/app/options/options.go index 71525e8f3..2fb51d051 100644 --- a/cmd/machine-controller-manager/app/options/options.go +++ b/cmd/machine-controller-manager/app/options/options.go @@ -46,7 +46,6 @@ type MCMServer struct { ControlKubeconfig string TargetKubeconfig string - LongRetryOverride string } // NewMCMServer creates a new MCMServer with a default config. @@ -102,8 +101,6 @@ func (s *MCMServer) AddFlags(fs *pflag.FlagSet) { fs.BoolVar(&s.AutoscalerScaleDownAnnotationDuringRollout, "autoscaler-scaledown-annotation-during-rollout", true, "Add cluster autoscaler scale-down disabled annotation during roll-out.") - fs.StringVar(&s.LongRetryOverride, "long-retry", "", "Override machineutils.LongRetry.") - logs.AddFlags(fs) // Here `logs` is `k8s.io/component-base/logs`. leaderelectionconfig.BindFlags(&s.LeaderElection, fs) diff --git a/pkg/util/provider/app/app.go b/pkg/util/provider/app/app.go index 054976f9c..44070bcaa 100644 --- a/pkg/util/provider/app/app.go +++ b/pkg/util/provider/app/app.go @@ -38,6 +38,7 @@ import ( coreclientbuilder "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/core" machineclientbuilder "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/machine" machinecontroller "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" kubernetesinformers "k8s.io/client-go/informers" kubescheme "k8s.io/client-go/kubernetes/scheme" @@ -216,6 +217,16 @@ func StartControllers(s *options.MCServer, recorder record.EventRecorder, stop <-chan struct{}) error { + ResourceExhaustedRetryPeriod := machineutils.LongRetry + if s.ResourceExhaustedRetry != "" { + d, err := time.ParseDuration(s.ResourceExhaustedRetry) + if err != nil { + return fmt.Errorf("invalid --resource-exhausted-retry %q: %w", s.ResourceExhaustedRetry, err) + } + ResourceExhaustedRetryPeriod = machineutils.RetryPeriod(d) + } + klog.V(4).Infof("Configured ResourceExhaustedRetryPeriod=%s", time.Duration(ResourceExhaustedRetryPeriod)) + klog.V(4).Info("Getting available resources") availableResources, err := getAvailableResources(controlCoreClientBuilder) if err != nil { @@ -297,6 +308,7 @@ func StartControllers(s *options.MCServer, s.NodeConditions, s.BootstrapTokenAuthExtraGroups, targetKubernetesVersion, + ResourceExhaustedRetryPeriod, ) if err != nil { return err diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 9e66b4b27..07cd70eb0 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -45,8 +45,9 @@ import ( type MCServer struct { machineconfig.MachineControllerConfiguration - ControlKubeconfig string - TargetKubeconfig string + ControlKubeconfig string + TargetKubeconfig string + ResourceExhaustedRetry string } // NewMCServer creates a new MCServer with a default config. @@ -116,6 +117,8 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.NodeConditions, "node-conditions", s.NodeConditions, "List of comma-separated/case-sensitive node-conditions which when set to True will change machine to a failed state after MachineHealthTimeout duration. It may further be replaced with a new machine if the machine is backed by a machine-set object.") fs.StringVar(&s.BootstrapTokenAuthExtraGroups, "bootstrap-token-auth-extra-groups", s.BootstrapTokenAuthExtraGroups, "Comma-separated list of groups to set bootstrap token's \"auth-extra-groups\" field to") + fs.StringVar(&s.ResourceExhaustedRetry, "resource-exhausted-retry", "", "Retry duration used when machinecreation fails due to ResourceExhausted. Default to LongRetry.") + logs.AddFlags(fs) // adds --v flag for log level. leaderelectionconfig.BindFlags(&s.LeaderElection, fs) diff --git a/pkg/util/provider/machinecontroller/controller.go b/pkg/util/provider/machinecontroller/controller.go index 76cbed046..3f3a28351 100644 --- a/pkg/util/provider/machinecontroller/controller.go +++ b/pkg/util/provider/machinecontroller/controller.go @@ -16,6 +16,7 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/permits" "github.com/gardener/machine-controller-manager/pkg/util/provider/drain" "github.com/gardener/machine-controller-manager/pkg/util/provider/driver" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" "github.com/gardener/machine-controller-manager/pkg/util/provider/options" "github.com/gardener/machine-controller-manager/pkg/util/worker" @@ -73,6 +74,7 @@ func NewController( nodeConditions string, bootstrapTokenAuthExtraGroups string, targetKubernetesVersion *semver.Version, + resourceExhaustedRetryPeriod machineutils.RetryPeriod, ) (Controller, error) { const ( permitGiverStaleEntryTimeout = 1 * time.Hour @@ -121,6 +123,7 @@ func NewController( volumeAttachmentHandler: nil, permitGiver: permits.NewPermitGiver(permitGiverStaleEntryTimeout, janitorFreq), targetKubernetesVersion: targetKubernetesVersion, + resourceExhaustedRetryPeriod: resourceExhaustedRetryPeriod, } controller.internalExternalScheme = runtime.NewScheme() @@ -298,6 +301,8 @@ type controller struct { machineClassSynced cache.InformerSynced machineSynced cache.InformerSynced podSynced cache.InformerSynced + + resourceExhaustedRetryPeriod machineutils.RetryPeriod } func (dc *controller) Run(workers int, stopCh <-chan struct{}) { diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 36fa6f414..844e2f090 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -804,7 +804,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a if ok { switch machineErr.Code() { case codes.ResourceExhausted: - retryRequired = machineutils.LongRetry + retryRequired = c.resourceExhaustedRetryPeriod lastKnownState = machine.Status.LastKnownState case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable: retryRequired = machineutils.ShortRetry diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 31ea59b29..141e3a09c 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -95,11 +95,9 @@ const ( // MediumRetry tells the controller to retry after a medium duration - 3 minutes MediumRetry RetryPeriod = RetryPeriod(3 * time.Minute) // LongRetry tells the controller to retry after a long duration - 10 minutes - DefaultLongRetry RetryPeriod = RetryPeriod(10 * time.Minute) + LongRetry RetryPeriod = RetryPeriod(10 * time.Minute) ) -var LongRetry RetryPeriod = DefaultLongRetry - // EssentialTaints are taints on node object which if added/removed, require an immediate reconcile by machine controller // TODO: update this when taints for ALT updation and PostCreate operations is introduced. var EssentialTaints = []string{TaintNodeCriticalComponentsNotReady} From d56bfbb76c7cce807c2e06c19357d6689e29666b Mon Sep 17 00:00:00 2001 From: vvoeroes Date: Mon, 9 Mar 2026 15:18:10 +0100 Subject: [PATCH 3/4] fix tests and make naming more consistent --- pkg/util/provider/app/app.go | 8 +-- pkg/util/provider/app/options/options.go | 2 +- .../provider/machinecontroller/controller.go | 6 +- .../machinecontroller/machine_test.go | 70 +++++++++++++++++-- .../machinecontroller/machine_util.go | 5 +- 5 files changed, 76 insertions(+), 15 deletions(-) diff --git a/pkg/util/provider/app/app.go b/pkg/util/provider/app/app.go index 44070bcaa..3af57d02d 100644 --- a/pkg/util/provider/app/app.go +++ b/pkg/util/provider/app/app.go @@ -217,15 +217,15 @@ func StartControllers(s *options.MCServer, recorder record.EventRecorder, stop <-chan struct{}) error { - ResourceExhaustedRetryPeriod := machineutils.LongRetry + resourceExhaustedRetryPeriod := machineutils.LongRetry if s.ResourceExhaustedRetry != "" { d, err := time.ParseDuration(s.ResourceExhaustedRetry) if err != nil { return fmt.Errorf("invalid --resource-exhausted-retry %q: %w", s.ResourceExhaustedRetry, err) } - ResourceExhaustedRetryPeriod = machineutils.RetryPeriod(d) + resourceExhaustedRetryPeriod = machineutils.RetryPeriod(d) } - klog.V(4).Infof("Configured ResourceExhaustedRetryPeriod=%s", time.Duration(ResourceExhaustedRetryPeriod)) + klog.V(4).Infof("Configured ResourceExhaustedRetryPeriod=%s", time.Duration(resourceExhaustedRetryPeriod)) klog.V(4).Info("Getting available resources") availableResources, err := getAvailableResources(controlCoreClientBuilder) @@ -308,7 +308,7 @@ func StartControllers(s *options.MCServer, s.NodeConditions, s.BootstrapTokenAuthExtraGroups, targetKubernetesVersion, - ResourceExhaustedRetryPeriod, + resourceExhaustedRetryPeriod, ) if err != nil { return err diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 07cd70eb0..5d043de84 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -117,7 +117,7 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.NodeConditions, "node-conditions", s.NodeConditions, "List of comma-separated/case-sensitive node-conditions which when set to True will change machine to a failed state after MachineHealthTimeout duration. It may further be replaced with a new machine if the machine is backed by a machine-set object.") fs.StringVar(&s.BootstrapTokenAuthExtraGroups, "bootstrap-token-auth-extra-groups", s.BootstrapTokenAuthExtraGroups, "Comma-separated list of groups to set bootstrap token's \"auth-extra-groups\" field to") - fs.StringVar(&s.ResourceExhaustedRetry, "resource-exhausted-retry", "", "Retry duration used when machinecreation fails due to ResourceExhausted. Default to LongRetry.") + fs.StringVar(&s.ResourceExhaustedRetry, "resource-exhausted-retry", "", "Retry duration used when machine creation fails with ResourceExhausted. Defaults to LongRetry.") logs.AddFlags(fs) // adds --v flag for log level. diff --git a/pkg/util/provider/machinecontroller/controller.go b/pkg/util/provider/machinecontroller/controller.go index 3f3a28351..896e43446 100644 --- a/pkg/util/provider/machinecontroller/controller.go +++ b/pkg/util/provider/machinecontroller/controller.go @@ -74,7 +74,7 @@ func NewController( nodeConditions string, bootstrapTokenAuthExtraGroups string, targetKubernetesVersion *semver.Version, - resourceExhaustedRetryPeriod machineutils.RetryPeriod, + resourceExhaustedRetry machineutils.RetryPeriod, ) (Controller, error) { const ( permitGiverStaleEntryTimeout = 1 * time.Hour @@ -123,7 +123,7 @@ func NewController( volumeAttachmentHandler: nil, permitGiver: permits.NewPermitGiver(permitGiverStaleEntryTimeout, janitorFreq), targetKubernetesVersion: targetKubernetesVersion, - resourceExhaustedRetryPeriod: resourceExhaustedRetryPeriod, + resourceExhaustedRetry: resourceExhaustedRetry, } controller.internalExternalScheme = runtime.NewScheme() @@ -302,7 +302,7 @@ type controller struct { machineSynced cache.InformerSynced podSynced cache.InformerSynced - resourceExhaustedRetryPeriod machineutils.RetryPeriod + resourceExhaustedRetry machineutils.RetryPeriod } func (dc *controller) Run(workers int, stopCh <-chan struct{}) { diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index 76214eac7..4c328a38a 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -478,12 +478,13 @@ var _ = Describe("machine", func() { Describe("#triggerCreationFlow", func() { type setup struct { - machineClasses []*v1alpha1.MachineClass - machines []*v1alpha1.Machine - secrets []*corev1.Secret - nodes []*corev1.Node - fakeResourceActions *customfake.ResourceActions - noTargetCluster bool + machineClasses []*v1alpha1.MachineClass + machines []*v1alpha1.Machine + secrets []*corev1.Secret + nodes []*corev1.Node + fakeResourceActions *customfake.ResourceActions + noTargetCluster bool + resourceExhaustedRetry machineutils.RetryPeriod } type action struct { machine string @@ -542,6 +543,10 @@ var _ = Describe("machine", func() { waitForCacheSync(stop, controller) + if data.setup.resourceExhaustedRetry != 0 { + controller.resourceExhaustedRetry = data.setup.resourceExhaustedRetry + } + action := data.action machine, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(context.TODO(), action.machine, metav1.GetOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -916,6 +921,59 @@ var _ = Describe("machine", func() { retry: machineutils.LongRetry, }, }), + Entry("Machine creation fails with CrashLoopBackOff due to resource exhaustion with configured retry period", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + Data: map[string][]byte{"userData": []byte("test")}, + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines(1, &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + }, + }, nil, nil, nil, nil, true, metav1.Now()), + resourceExhaustedRetry: machineutils.RetryPeriod(30 * time.Minute), + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: false, + Err: status.Error(codes.ResourceExhausted, "Provider does not have capacity to create VM"), + }, + }, + expect: expect{ + machine: newMachine(&v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machineClass", + }, + }, + }, &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineCrashLoopBackOff, + }, + LastOperation: v1alpha1.LastOperation{ + ErrorCode: codes.ResourceExhausted.String(), + }, + }, nil, nil, nil, true, metav1.Now()), + err: status.Error(codes.ResourceExhausted, "Provider does not have capacity to create VM"), + retry: machineutils.RetryPeriod(30 * time.Minute), + }, + }), Entry("Machine creation fails with Failure due to timeout", &data{ setup: setup{ secrets: []*corev1.Secret{ diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 844e2f090..b081979c9 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -804,7 +804,10 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a if ok { switch machineErr.Code() { case codes.ResourceExhausted: - retryRequired = c.resourceExhaustedRetryPeriod + if c.resourceExhaustedRetry == 0 { + c.resourceExhaustedRetry = machineutils.LongRetry + } + retryRequired = c.resourceExhaustedRetry lastKnownState = machine.Status.LastKnownState case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable: retryRequired = machineutils.ShortRetry From 167d66865d97f15e9821c2506e9321c045ca12ba Mon Sep 17 00:00:00 2001 From: vvoeroes Date: Fri, 13 Mar 2026 11:14:47 +0100 Subject: [PATCH 4/4] add review --- pkg/util/provider/app/app.go | 9 +-------- pkg/util/provider/app/options/options.go | 12 +++++++----- pkg/util/provider/options/types.go | 2 ++ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pkg/util/provider/app/app.go b/pkg/util/provider/app/app.go index 3af57d02d..a4372d02e 100644 --- a/pkg/util/provider/app/app.go +++ b/pkg/util/provider/app/app.go @@ -217,14 +217,7 @@ func StartControllers(s *options.MCServer, recorder record.EventRecorder, stop <-chan struct{}) error { - resourceExhaustedRetryPeriod := machineutils.LongRetry - if s.ResourceExhaustedRetry != "" { - d, err := time.ParseDuration(s.ResourceExhaustedRetry) - if err != nil { - return fmt.Errorf("invalid --resource-exhausted-retry %q: %w", s.ResourceExhaustedRetry, err) - } - resourceExhaustedRetryPeriod = machineutils.RetryPeriod(d) - } + resourceExhaustedRetryPeriod := machineutils.RetryPeriod(s.ResourceExhaustedRetry.Duration) klog.V(4).Infof("Configured ResourceExhaustedRetryPeriod=%s", time.Duration(resourceExhaustedRetryPeriod)) klog.V(4).Info("Getting available resources") diff --git a/pkg/util/provider/app/options/options.go b/pkg/util/provider/app/options/options.go index 5d043de84..93ae6bf75 100644 --- a/pkg/util/provider/app/options/options.go +++ b/pkg/util/provider/app/options/options.go @@ -28,6 +28,7 @@ import ( "time" drain "github.com/gardener/machine-controller-manager/pkg/util/provider/drain" + "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" machineconfig "github.com/gardener/machine-controller-manager/pkg/util/provider/options" "github.com/spf13/pflag" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -45,9 +46,8 @@ import ( type MCServer struct { machineconfig.MachineControllerConfiguration - ControlKubeconfig string - TargetKubeconfig string - ResourceExhaustedRetry string + ControlKubeconfig string + TargetKubeconfig string } // NewMCServer creates a new MCServer with a default config. @@ -117,8 +117,7 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.NodeConditions, "node-conditions", s.NodeConditions, "List of comma-separated/case-sensitive node-conditions which when set to True will change machine to a failed state after MachineHealthTimeout duration. It may further be replaced with a new machine if the machine is backed by a machine-set object.") fs.StringVar(&s.BootstrapTokenAuthExtraGroups, "bootstrap-token-auth-extra-groups", s.BootstrapTokenAuthExtraGroups, "Comma-separated list of groups to set bootstrap token's \"auth-extra-groups\" field to") - fs.StringVar(&s.ResourceExhaustedRetry, "resource-exhausted-retry", "", "Retry duration used when machine creation fails with ResourceExhausted. Defaults to LongRetry.") - + fs.DurationVar(&s.ResourceExhaustedRetry.Duration, "resource-exhausted-retry", time.Duration(machineutils.LongRetry), "Retry duration used when machine creation fails with ResourceExhausted. Defaults to LongRetry.") logs.AddFlags(fs) // adds --v flag for log level. leaderelectionconfig.BindFlags(&s.LeaderElection, fs) @@ -193,6 +192,9 @@ func (s *MCServer) Validate() error { if s.ControlKubeconfig == "" && s.TargetKubeconfig == constants.TargetKubeconfigDisabledValue { errs = append(errs, fmt.Errorf("--control-kubeconfig cannot be empty if --target-kubeconfig=%s is specified", constants.TargetKubeconfigDisabledValue)) } + if s.ResourceExhaustedRetry.Duration < 0 { + errs = append(errs, fmt.Errorf("resource exhausted retry duration should be a non negative value: got: %v", s.ResourceExhaustedRetry.Duration)) + } return utilerrors.NewAggregate(errs) } diff --git a/pkg/util/provider/options/types.go b/pkg/util/provider/options/types.go index d1be4c2c9..2c0172f9e 100644 --- a/pkg/util/provider/options/types.go +++ b/pkg/util/provider/options/types.go @@ -73,6 +73,8 @@ type MachineControllerConfiguration struct { //BootstrapTokenAuthExtraGroups is a comma-separated string of groups to set bootstrap token's "auth-extra-groups" field to. BootstrapTokenAuthExtraGroups string + + ResourceExhaustedRetry metav1.Duration } // SafetyOptions are used to configure the upper-limit and lower-limit