Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/util/provider/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
coreclientbuilder "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/core"
machineclientbuilder "github.com/gardener/machine-controller-manager/pkg/util/clientbuilder/machine"
machinecontroller "github.com/gardener/machine-controller-manager/pkg/util/provider/machinecontroller"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils"
kubernetesinformers "k8s.io/client-go/informers"
kubescheme "k8s.io/client-go/kubernetes/scheme"

Expand Down Expand Up @@ -216,6 +217,9 @@ func StartControllers(s *options.MCServer,
recorder record.EventRecorder,
stop <-chan struct{}) error {

resourceExhaustedRetryPeriod := machineutils.RetryPeriod(s.ResourceExhaustedRetry.Duration)
klog.V(4).Infof("Configured ResourceExhaustedRetryPeriod=%s", time.Duration(resourceExhaustedRetryPeriod))

klog.V(4).Info("Getting available resources")
availableResources, err := getAvailableResources(controlCoreClientBuilder)
if err != nil {
Expand Down Expand Up @@ -297,6 +301,7 @@ func StartControllers(s *options.MCServer,
s.NodeConditions,
s.BootstrapTokenAuthExtraGroups,
targetKubernetesVersion,
resourceExhaustedRetryPeriod,
)
if err != nil {
return err
Expand Down
5 changes: 5 additions & 0 deletions pkg/util/provider/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"time"

drain "github.com/gardener/machine-controller-manager/pkg/util/provider/drain"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils"
machineconfig "github.com/gardener/machine-controller-manager/pkg/util/provider/options"
"github.com/spf13/pflag"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -116,6 +117,7 @@ func (s *MCServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.NodeConditions, "node-conditions", s.NodeConditions, "List of comma-separated/case-sensitive node-conditions which when set to True will change machine to a failed state after MachineHealthTimeout duration. It may further be replaced with a new machine if the machine is backed by a machine-set object.")
fs.StringVar(&s.BootstrapTokenAuthExtraGroups, "bootstrap-token-auth-extra-groups", s.BootstrapTokenAuthExtraGroups, "Comma-separated list of groups to set bootstrap token's \"auth-extra-groups\" field to")

fs.DurationVar(&s.ResourceExhaustedRetry.Duration, "resource-exhausted-retry", time.Duration(machineutils.LongRetry), "Retry duration used when machine creation fails with ResourceExhausted. Defaults to LongRetry.")
logs.AddFlags(fs) // adds --v flag for log level.

leaderelectionconfig.BindFlags(&s.LeaderElection, fs)
Expand Down Expand Up @@ -190,6 +192,9 @@ func (s *MCServer) Validate() error {
if s.ControlKubeconfig == "" && s.TargetKubeconfig == constants.TargetKubeconfigDisabledValue {
errs = append(errs, fmt.Errorf("--control-kubeconfig cannot be empty if --target-kubeconfig=%s is specified", constants.TargetKubeconfigDisabledValue))
}
if s.ResourceExhaustedRetry.Duration < 0 {
errs = append(errs, fmt.Errorf("resource exhausted retry duration should be a non negative value: got: %v", s.ResourceExhaustedRetry.Duration))
}

return utilerrors.NewAggregate(errs)
}
5 changes: 5 additions & 0 deletions pkg/util/provider/machinecontroller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/gardener/machine-controller-manager/pkg/util/permits"
"github.com/gardener/machine-controller-manager/pkg/util/provider/drain"
"github.com/gardener/machine-controller-manager/pkg/util/provider/driver"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils"
"github.com/gardener/machine-controller-manager/pkg/util/provider/options"
"github.com/gardener/machine-controller-manager/pkg/util/worker"

Expand Down Expand Up @@ -73,6 +74,7 @@ func NewController(
nodeConditions string,
bootstrapTokenAuthExtraGroups string,
targetKubernetesVersion *semver.Version,
resourceExhaustedRetry machineutils.RetryPeriod,
) (Controller, error) {
const (
permitGiverStaleEntryTimeout = 1 * time.Hour
Expand Down Expand Up @@ -121,6 +123,7 @@ func NewController(
volumeAttachmentHandler: nil,
permitGiver: permits.NewPermitGiver(permitGiverStaleEntryTimeout, janitorFreq),
targetKubernetesVersion: targetKubernetesVersion,
resourceExhaustedRetry: resourceExhaustedRetry,
}

controller.internalExternalScheme = runtime.NewScheme()
Expand Down Expand Up @@ -298,6 +301,8 @@ type controller struct {
machineClassSynced cache.InformerSynced
machineSynced cache.InformerSynced
podSynced cache.InformerSynced

resourceExhaustedRetry machineutils.RetryPeriod
}

func (dc *controller) Run(workers int, stopCh <-chan struct{}) {
Expand Down
70 changes: 64 additions & 6 deletions pkg/util/provider/machinecontroller/machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,13 @@ var _ = Describe("machine", func() {

Describe("#triggerCreationFlow", func() {
type setup struct {
machineClasses []*v1alpha1.MachineClass
machines []*v1alpha1.Machine
secrets []*corev1.Secret
nodes []*corev1.Node
fakeResourceActions *customfake.ResourceActions
noTargetCluster bool
machineClasses []*v1alpha1.MachineClass
machines []*v1alpha1.Machine
secrets []*corev1.Secret
nodes []*corev1.Node
fakeResourceActions *customfake.ResourceActions
noTargetCluster bool
resourceExhaustedRetry machineutils.RetryPeriod
}
type action struct {
machine string
Expand Down Expand Up @@ -542,6 +543,10 @@ var _ = Describe("machine", func() {

waitForCacheSync(stop, controller)

if data.setup.resourceExhaustedRetry != 0 {
controller.resourceExhaustedRetry = data.setup.resourceExhaustedRetry
}

action := data.action
machine, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(context.TODO(), action.machine, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -916,6 +921,59 @@ var _ = Describe("machine", func() {
retry: machineutils.LongRetry,
},
}),
Entry("Machine creation fails with CrashLoopBackOff due to resource exhaustion with configured retry period", &data{
setup: setup{
secrets: []*corev1.Secret{
{
ObjectMeta: *newObjectMeta(objMeta, 0),
Data: map[string][]byte{"userData": []byte("test")},
},
},
machineClasses: []*v1alpha1.MachineClass{
{
ObjectMeta: *newObjectMeta(objMeta, 0),
SecretRef: newSecretReference(objMeta, 0),
},
},
machines: newMachines(1, &v1alpha1.MachineTemplateSpec{
ObjectMeta: *newObjectMeta(objMeta, 0),
Spec: v1alpha1.MachineSpec{
Class: v1alpha1.ClassSpec{
Kind: "MachineClass",
Name: "machine-0",
},
},
}, nil, nil, nil, nil, true, metav1.Now()),
resourceExhaustedRetry: machineutils.RetryPeriod(30 * time.Minute),
},
action: action{
machine: "machine-0",
fakeDriver: &driver.FakeDriver{
VMExists: false,
Err: status.Error(codes.ResourceExhausted, "Provider does not have capacity to create VM"),
},
},
expect: expect{
machine: newMachine(&v1alpha1.MachineTemplateSpec{
ObjectMeta: *newObjectMeta(objMeta, 0),
Spec: v1alpha1.MachineSpec{
Class: v1alpha1.ClassSpec{
Kind: "MachineClass",
Name: "machineClass",
},
},
}, &v1alpha1.MachineStatus{
CurrentStatus: v1alpha1.CurrentStatus{
Phase: v1alpha1.MachineCrashLoopBackOff,
},
LastOperation: v1alpha1.LastOperation{
ErrorCode: codes.ResourceExhausted.String(),
},
}, nil, nil, nil, true, metav1.Now()),
err: status.Error(codes.ResourceExhausted, "Provider does not have capacity to create VM"),
retry: machineutils.RetryPeriod(30 * time.Minute),
},
}),
Entry("Machine creation fails with Failure due to timeout", &data{
setup: setup{
secrets: []*corev1.Secret{
Expand Down
5 changes: 4 additions & 1 deletion pkg/util/provider/machinecontroller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,10 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
if ok {
switch machineErr.Code() {
case codes.ResourceExhausted:
retryRequired = machineutils.LongRetry
if c.resourceExhaustedRetry == 0 {
c.resourceExhaustedRetry = machineutils.LongRetry
}
retryRequired = c.resourceExhaustedRetry
lastKnownState = machine.Status.LastKnownState
case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable:
retryRequired = machineutils.ShortRetry
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/provider/options/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ type MachineControllerConfiguration struct {

//BootstrapTokenAuthExtraGroups is a comma-separated string of groups to set bootstrap token's "auth-extra-groups" field to.
BootstrapTokenAuthExtraGroups string

ResourceExhaustedRetry metav1.Duration
}

// SafetyOptions are used to configure the upper-limit and lower-limit
Expand Down