diff --git a/pkg/operator/apiserver/controller/workload/workload.go b/pkg/operator/apiserver/controller/workload/workload.go index 7d031f5eda..b7a1b08a5a 100644 --- a/pkg/operator/apiserver/controller/workload/workload.go +++ b/pkg/operator/apiserver/controller/workload/workload.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "strings" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -19,7 +18,6 @@ import ( operatorv1 "github.com/openshift/api/operator/v1" applyoperatorv1 "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" - "github.com/openshift/library-go/pkg/apps/deployment" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/events" "github.com/openshift/library-go/pkg/operator/status" @@ -291,7 +289,7 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o deploymentAvailableCondition = deploymentAvailableCondition. WithStatus(operatorv1.ConditionFalse). WithReason("NoPod"). - WithMessage(fmt.Sprintf("no %s.%s pods available on any node.", workload.Name, c.targetNamespace)) + WithMessage(fmt.Sprintf("no %s.%s pods available on any node", workload.Name, c.targetNamespace)) } else { deploymentAvailableCondition = deploymentAvailableCondition. WithStatus(operatorv1.ConditionTrue). @@ -303,23 +301,24 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o desiredReplicas = *(workload.Spec.Replicas) } - // If the workload is up to date, then we are no longer progressing - workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration - // Update is done when all pods have been updated to the latest revision - // and the deployment controller has reported NewReplicaSetAvailable - workloadIsBeingUpdated := !workloadAtHighestGeneration || !hasDeploymentProgressed(workload.Status) - workloadIsBeingUpdatedTooLong := v1helpers.IsUpdatingTooLong(previousStatus, *deploymentProgressingCondition.Type) - if !workloadAtHighestGeneration { - deploymentProgressingCondition = deploymentProgressingCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("NewGeneration"). - WithMessage(fmt.Sprintf("deployment/%s.%s: observed generation is %d, desired generation is %d.", workload.Name, c.targetNamespace, workload.Status.ObservedGeneration, workload.ObjectMeta.Generation)) - } else if workloadIsBeingUpdated { + // Update is done when the deployment controller has reported NewReplicaSetAvailable. + // Checking the current vs. observed generation here is not possible since we don't want to be Progressing on scaling. + progressTimedOutMessage, workloadIsBeingUpdatedTooLong := hasDeploymentTimedOutProgressing(workload.Status) + workloadIsBeingUpdated := !hasDeploymentProgressed(workload.Status) && !workloadIsBeingUpdatedTooLong + switch { + case workloadIsBeingUpdated: deploymentProgressingCondition = deploymentProgressingCondition. WithStatus(operatorv1.ConditionTrue). WithReason("PodsUpdating"). - WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest generation and %d/%d pods are available", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas, workload.Status.AvailableReplicas, desiredReplicas)) - } else { + WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest revision and %d/%d pods are available", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas, workload.Status.AvailableReplicas, desiredReplicas)) + + case workloadIsBeingUpdatedTooLong: + deploymentProgressingCondition = deploymentProgressingCondition. + WithStatus(operatorv1.ConditionFalse). + WithReason("ProgressDeadlineExceeded"). + WithMessage(fmt.Sprintf("deployment/%s.%s has timed out progressing: %s", workload.Name, c.targetNamespace, progressTimedOutMessage)) + + default: // Terminating pods don't account for any of the other status fields but // still can exist in a state when they are accepting connections and would // contribute to unexpected behavior when we report Progressing=False. @@ -332,22 +331,22 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o WithReason("AsExpected") } - // During a rollout the default maxSurge (25%) will allow the available - // replicas to temporarily exceed the desired replica count. If this were - // to occur, the operator should not report degraded. - workloadHasAllPodsAvailable := workload.Status.AvailableReplicas >= desiredReplicas - if !workloadHasAllPodsAvailable && (!workloadIsBeingUpdated || workloadIsBeingUpdatedTooLong) { - numNonAvailablePods := desiredReplicas - workload.Status.AvailableReplicas + // Degraded is set when the deployment is not Available or timed out progressing. + // This will cause the operator to go Degraded during the initial rollout. + switch { + case workload.Status.AvailableReplicas == 0: deploymentDegradedCondition = deploymentDegradedCondition. WithStatus(operatorv1.ConditionTrue). - WithReason("UnavailablePod") - podContainersStatus, err := deployment.PodContainersStatus(workload, c.podsLister) - if err != nil { - podContainersStatus = []string{fmt.Sprintf("failed to get pod containers details: %v", err)} - } + WithReason("Unavailable"). + WithMessage(fmt.Sprintf("no %s.%s pods available on any node", workload.Name, c.targetNamespace)) + + case workloadIsBeingUpdatedTooLong: deploymentDegradedCondition = deploymentDegradedCondition. - WithMessage(fmt.Sprintf("%v of %v requested instances are unavailable for %s.%s (%s)", numNonAvailablePods, desiredReplicas, workload.Name, c.targetNamespace, strings.Join(podContainersStatus, ", "))) - } else { + WithStatus(operatorv1.ConditionTrue). + WithReason("ProgressDeadlineExceeded"). + WithMessage(fmt.Sprintf("deployment/%s.%s has timed out progressing: %s", workload.Name, c.targetNamespace, progressTimedOutMessage)) + + default: deploymentDegradedCondition = deploymentDegradedCondition. WithStatus(operatorv1.ConditionFalse). WithReason("AsExpected") @@ -356,8 +355,11 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o // if the deployment is all available and at the expected generation, then update the version to the latest // when we update, the image pull spec should immediately be different, which should immediately cause a deployment rollout // which should immediately result in a deployment generation diff, which should cause this block to be skipped until it is ready. - workloadHasAllPodsUpdated := workload.Status.UpdatedReplicas == desiredReplicas - if workloadAtHighestGeneration && workloadHasAllPodsAvailable && workloadHasAllPodsUpdated && operatorConfigAtHighestGeneration { + if operatorConfigAtHighestGeneration && + workload.ObjectMeta.Generation == workload.Status.ObservedGeneration && + workload.Status.AvailableReplicas == desiredReplicas && + workload.Status.UpdatedReplicas == desiredReplicas { + c.versionRecorder.SetVersion(c.constructOperandNameFor(workload.Name), c.targetOperandVersion) } @@ -386,6 +388,17 @@ func hasDeploymentProgressed(status appsv1.DeploymentStatus) bool { return false } +// hasDeploymentTimedOutProgressing returns true if the deployment reports ProgressDeadlineExceeded. +// The function returns the Progressing condition message as the first return value. +func hasDeploymentTimedOutProgressing(status appsv1.DeploymentStatus) (string, bool) { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Message, cond.Status == corev1.ConditionFalse && cond.Reason == "ProgressDeadlineExceeded" + } + } + return "", false +} + // EnsureAtMostOnePodPerNode updates the deployment spec to prevent more than // one pod of a given replicaset from landing on a node. It accomplishes this // by adding a label on the template and updates the pod anti-affinity term to include that label. diff --git a/pkg/operator/apiserver/controller/workload/workload_test.go b/pkg/operator/apiserver/controller/workload/workload_test.go index fb6d1b8c97..4ae0a24cb5 100644 --- a/pkg/operator/apiserver/controller/workload/workload_test.go +++ b/pkg/operator/apiserver/controller/workload/workload_test.go @@ -66,7 +66,7 @@ func TestUpdateOperatorStatus(t *testing.T) { validateOperatorStatus func(*operatorv1.OperatorStatus) error }{ { - name: "scenario: no workload, no errors thus we are degraded and we are progressing", + name: "no workload", validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { @@ -99,7 +99,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: no workload but errors thus we are degraded and we are progressing", + name: "no workload with sync errors", errors: []error{fmt.Errorf("nasty error")}, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ @@ -133,7 +133,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have an unavailable workload being updated for too long and no errors thus we are degraded", + name: "unavailable workload with progress deadline exceeded", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -184,7 +184,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), Status: operatorv1.ConditionFalse, Reason: "NoPod", - Message: "no apiserver.openshift-apiserver pods available on any node.", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -193,21 +193,21 @@ func TestUpdateOperatorStatus(t *testing.T) { { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), Status: operatorv1.ConditionTrue, - Reason: "UnavailablePod", - Message: "3 of 3 requested instances are unavailable for apiserver.openshift-apiserver (container is waiting in pending apiserver pod)", + Reason: "Unavailable", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: timed out", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an unavailable workload being updated for a short time and no errors so we are progressing", + name: "unavailable workload progressing normally", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -258,29 +258,78 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), Status: operatorv1.ConditionFalse, Reason: "NoPod", - Message: "no apiserver.openshift-apiserver pods available on any node.", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), Status: operatorv1.ConditionFalse, }, { - Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "Unavailable", + Message: "no apiserver.openshift-apiserver pods available on any node", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest revision and 0/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "unavailable workload that previously progressed successfully", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 0, + UpdatedReplicas: 3, + ObservedGeneration: 5, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + }, + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionFalse, + Reason: "NoPod", + Message: "no apiserver.openshift-apiserver pods available on any node", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "Unavailable", + Message: "no apiserver.openshift-apiserver pods available on any node", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an incomplete workload and no errors thus we are available and degraded (missing 1 replica)", + name: "partially available workload (2 of 3 replicas)", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -332,9 +381,9 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionTrue, - Reason: "UnavailablePod", - Message: "1 of 3 requested instances are unavailable for apiserver.openshift-apiserver ()", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), @@ -347,7 +396,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have a complete workload and no errors thus we are available", + name: "fully available workload", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -393,7 +442,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have an outdated (generation) workload and no errors thus we are available and we are progressing", + name: "workload scaling with generation mismatch", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -401,7 +450,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Generation: 100, }, Spec: appsv1.DeploymentSpec{ - Replicas: ptr.To[int32](3), + Replicas: ptr.To[int32](5), }, Status: appsv1.DeploymentStatus{ Replicas: 3, @@ -434,9 +483,9 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) @@ -444,7 +493,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { - name: "scenario: rare case when we have an outdated (generation) workload and one old replica failing is but it will be picked up soon by the new rollout thus we are available and we are progressing", + name: "workload with generation mismatch and pod failure", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -485,9 +534,9 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) @@ -527,7 +576,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "the deployment is progressing to rollout pods, but not all replicas have been updated yet", + name: "workload rollout in progress", operatorPreconditionsNotReady: false, workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -570,79 +619,132 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest generation and 2/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest revision and 2/3 pods are available", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "progressing==false for a longer time shouldn't make the otherwise fine workload degraded", + name: "workload with terminating pods during rollout", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", Namespace: "openshift-apiserver", }, Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 3, + AvailableReplicas: 2, + ReadyReplicas: 2, UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ - {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, }, }, }, - previousConditions: []operatorv1.OperatorCondition{ - { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest revision and 2/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "available workload with progress deadline exceeded", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 2, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 1, + ObservedGeneration: 2, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentProgressing, + Status: corev1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment has timed out", + LastUpdateTime: metav1.Now(), + LastTransitionTime: metav1.Now(), + }, + }, }, }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), - Status: operatorv1.ConditionTrue, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), Status: operatorv1.ConditionFalse, }, { - Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: deployment has timed out", }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: deployment has timed out", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "some pods rolled out and waiting for old terminating pod before we can progress further", + name: "workload rollout with maxSurge (4 of 3 replicas available)", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, }, Spec: appsv1.DeploymentSpec{ - Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, - Template: corev1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "bar"}}}, Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 2, - ReadyReplicas: 2, - UpdatedReplicas: 3, + Replicas: 4, + AvailableReplicas: 4, + UpdatedReplicas: 2, + ObservedGeneration: 5, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, }, @@ -670,7 +772,70 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest generation and 2/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 2/3 pods have been updated to the latest revision and 4/3 pods are available", + }, + } + return areCondidtionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "workload recovering from progress deadline exceeded", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 3, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 3, + UpdatedReplicas: 3, + ObservedGeneration: 3, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + }, + }, + }, + previousConditions: []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment has timed out", + LastTransitionTime: metav1.NewTime(time.Now().Add(-5 * time.Minute)), + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "ProgressDeadlineExceeded", + LastTransitionTime: metav1.NewTime(time.Now().Add(-5 * time.Minute)), + }, + }, + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areCondidtionsEqual(expectedConditions, actualStatus.Conditions)