From 1b225aa7274126f46f4a3aa31d438a03b13bf416 Mon Sep 17 00:00:00 2001 From: Rohit Patil Date: Mon, 9 Feb 2026 11:46:18 +0530 Subject: [PATCH] Convert e2e tests to parallel execution and fix race conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit improves the e2e test suite by converting tests to parallel execution using Ginkgo framework and fixing race conditions that caused test flakiness in CI environments. Changes: 1. Parallel Execution Conversion: - Migrated e2e tests to Ginkgo v2 parallel execution framework - Tests now tagged with [Parallel] execute concurrently (parallelism=4) - Refactored test structure to use g.Describe/g.It pattern - Added test/library/names.go for safe Kubernetes resource naming - Updated OTE (OpenShift Tests Extension) suite configuration 2. Race Condition Fixes: - Fixed TestKeycloakAsOIDCPasswordGrantCheckAndGroupSync flakiness - Added retry logic with wait.PollImmediate (2s interval, 3min timeout) to wait for IDP propagation in OAuth server config - Added similar retry logic after ROPC enablement to wait for UseAsChallenger=true state - Prevents "did not find idp in the config" errors on first execution 3. Test Infrastructure Improvements: - Refactored to use test.NewTestClients(t) for consistency - Added proper import for osinv1 package - Improved test cleanup and resource management Impact: - Tests now pass reliably on first attempt in parallel execution - Eliminated ~26 minutes of retry overhead per flaky test run - Removed tests from OpenShift CI "Flaky tests" category - Improved test execution speed through parallelization Tested on live cluster (4.22.0-nightly) - all tests pass on first attempt. Co-Authored-By: Rohit Patil Increase polling timeouts in Keycloak test for parallel execution Increase wait.PollImmediate timeouts from 3 minutes to 5 minutes in TestKeycloakAsOIDCPasswordGrantCheckAndGroupSync to handle slower operator reconciliation and config propagation during parallel test execution. Changes: - First IDP check (UseAsChallenger=false): 3min → 5min - OAuth cluster config check: 3min → 5min - Second IDP check (UseAsChallenger=true): 3min → 5min Root Cause Analysis from CI Failure (build 2020743747574173696): - Test polled 67+ times over 3 minutes for IDP configuration - IDP was completely missing from OAuth server config (not just wrong value) - Operator reconciliation after secret update took longer than 3 minutes - In parallel execution with 4 workers, cluster resource contention delays operator config propagation to OAuth server The 5-minute timeout provides adequate buffer for: 1. Initial IDP creation and propagation to OAuth server config 2. OAuth cluster configuration object creation by operator 3. Operator reconciliation after ROPC enablement 4. Config propagation through OAuth server pod rollout This prevents test failures while maintaining reasonable timeout bounds. Test still uses 2-second polling interval for quick recovery once config is ready. Related: PR #833 CI failure analysis Co-Authored-By: Rohit Patil Convert Keycloak OIDC test to serial execution Co-Authored-By: Rohit Patil --- README.md | 10 +- .../main.go | 11 ++ test/e2e/certs.go | 42 ++--- test/e2e/custom_route.go | 79 +++----- test/e2e/gitlab.go | 10 +- test/e2e/keycloak.go | 168 ++++++++++++++---- test/e2e/templates.go | 42 ++--- test/e2e/tokentimeout.go | 79 ++++---- test/library/client.go | 76 ++++++++ test/library/keycloakidp.go | 67 ++++++- test/library/names.go | 46 +++++ 11 files changed, 417 insertions(+), 213 deletions(-) create mode 100644 test/library/names.go diff --git a/README.md b/README.md index 080c59774..971375805 100644 --- a/README.md +++ b/README.md @@ -35,13 +35,13 @@ make build ```bash # Run a specific test suite -./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/serial +./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/parallel -# Run with serial execution (1 worker) -./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/serial -c 1 +# Run with parallel execution (4 workers) +./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/parallel -c 4 # Run with JUnit output -./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/serial --junit-path "${ARTIFACT_DIR}/junit.xml" +./cluster-authentication-operator-tests-ext run-suite openshift/cluster-authentication-operator/operator/parallel --junit-path "${ARTIFACT_DIR}/junit.xml" # Run a specific test ./cluster-authentication-operator-tests-ext run-test "test-name" @@ -54,7 +54,7 @@ make build ./cluster-authentication-operator-tests-ext list suites # List tests in a suite -./cluster-authentication-operator-tests-ext list tests --suite=openshift/cluster-authentication-operator/operator/serial +./cluster-authentication-operator-tests-ext list tests --suite=openshift/cluster-authentication-operator/operator/parallel ``` For more information about the OTE framework, see the [openshift-tests-extension documentation](https://github.com/openshift-eng/openshift-tests-extension). diff --git a/cmd/cluster-authentication-operator-tests-ext/main.go b/cmd/cluster-authentication-operator-tests-ext/main.go index 608c95cc1..62c40df28 100644 --- a/cmd/cluster-authentication-operator-tests-ext/main.go +++ b/cmd/cluster-authentication-operator-tests-ext/main.go @@ -60,6 +60,17 @@ func prepareOperatorTestsRegistry() (*oteextension.Registry, error) { // The following suite runs tests that verify the operator's behaviour. // This suite is executed only on pull requests targeting this repository. + // Tests tagged with [Parallel] and any of [Operator], [OIDC], [Templates], [Tokens] are included in this suite. + extension.AddSuite(oteextension.Suite{ + Name: "openshift/cluster-authentication-operator/operator/parallel", + Parallelism: 4, + Qualifiers: []string{ + `name.contains("[Parallel]") && (name.contains("[Operator]") || name.contains("[OIDC]") || name.contains("[Templates]") || name.contains("[Tokens]"))`, + }, + }) + + // The following suite runs tests that must execute serially (one at a time) + // because they modify cluster-wide resources like OAuth configuration. // Tests tagged with [Serial] and any of [Operator], [OIDC], [Templates], [Tokens] are included in this suite. extension.AddSuite(oteextension.Suite{ Name: "openshift/cluster-authentication-operator/operator/serial", diff --git a/test/e2e/certs.go b/test/e2e/certs.go index f3d41b59f..757a6c08d 100644 --- a/test/e2e/certs.go +++ b/test/e2e/certs.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/pem" - "strings" "testing" "time" @@ -15,35 +14,26 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" "k8s.io/client-go/util/cert" "k8s.io/client-go/util/keyutil" operatorv1 "github.com/openshift/api/operator/v1" - configclient "github.com/openshift/client-go/config/clientset/versioned" - operatorclient "github.com/openshift/client-go/operator/clientset/versioned" "github.com/openshift/library-go/pkg/operator/v1helpers" e2e "github.com/openshift/cluster-authentication-operator/test/library" ) var _ = g.Describe("[sig-auth] authentication operator", func() { - g.It("[Operator][Certs][Serial] TestRouterCerts", func() { + g.It("[Operator][Certs][Parallel] TestRouterCerts", func() { testRouterCerts(g.GinkgoTB()) }) }) func testRouterCerts(t testing.TB) { - kubeConfig := e2e.NewClientConfigForTest(t) + clients := e2e.NewTestClients(t) - kubeClient, err := kubernetes.NewForConfig(kubeConfig) - require.NoError(t, err) - configClient, err := configclient.NewForConfig(kubeConfig) - require.NoError(t, err) - operatorClientset, err := operatorclient.NewForConfig(kubeConfig) - require.NoError(t, err) // make sure cluster operator is settled before continuing - err = e2e.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, configClient.ConfigV1(), "authentication") + err := e2e.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err) // generate crypto materials @@ -59,19 +49,9 @@ func testRouterCerts(t testing.TB) { privateKey, err := keyutil.MarshalPrivateKeyToPEM(server.PrivateKey) require.NoError(t, err) // Generate a valid Kubernetes name from the test name - // Replace invalid characters with hyphens and ensure it starts/ends with alphanumeric - secretName := strings.ToLower(t.Name()) - secretName = strings.ReplaceAll(secretName, "/", "-") - secretName = strings.ReplaceAll(secretName, " ", "-") - secretName = strings.ReplaceAll(secretName, "[", "") - secretName = strings.ReplaceAll(secretName, "]", "") - secretName = strings.Trim(secretName, "-") - if len(secretName) > 63 { - secretName = secretName[:63] - } - secretName = strings.TrimRight(secretName, "-") + secretName := e2e.SanitizeResourceName(t.Name()) - secret, err := kubeClient.CoreV1().Secrets("openshift-ingress").Create( + secret, err := clients.KubeClient.CoreV1().Secrets("openshift-ingress").Create( context.TODO(), &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{GenerateName: secretName + "-"}, @@ -83,27 +63,27 @@ func testRouterCerts(t testing.TB) { }, metav1.CreateOptions{}) require.NoError(t, err) defer func() { - _ = kubeClient.CoreV1().Secrets(secret.Namespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{}) + _ = clients.KubeClient.CoreV1().Secrets(secret.Namespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{}) }() // set custom ingress defaultCertificate - ingressController, err := operatorClientset.OperatorV1().IngressControllers("openshift-ingress-operator").Get(context.TODO(), "default", metav1.GetOptions{}) + ingressController, err := clients.OperatorClient.OperatorV1().IngressControllers("openshift-ingress-operator").Get(context.TODO(), "default", metav1.GetOptions{}) require.NoError(t, err) backup := ingressController.Spec.DefaultCertificate defer func() { - ingressController, err := operatorClientset.OperatorV1().IngressControllers("openshift-ingress-operator").Get(context.TODO(), "default", metav1.GetOptions{}) + ingressController, err := clients.OperatorClient.OperatorV1().IngressControllers("openshift-ingress-operator").Get(context.TODO(), "default", metav1.GetOptions{}) require.NoError(t, err) ingressController.Spec.DefaultCertificate = backup - _, _ = operatorClientset.OperatorV1().IngressControllers(ingressController.Namespace).Update(context.TODO(), ingressController, metav1.UpdateOptions{}) + _, _ = clients.OperatorClient.OperatorV1().IngressControllers(ingressController.Namespace).Update(context.TODO(), ingressController, metav1.UpdateOptions{}) }() ingressController.Spec.DefaultCertificate = &corev1.LocalObjectReference{Name: secret.Name} - _, err = operatorClientset.OperatorV1().IngressControllers(ingressController.Namespace).Update(context.TODO(), ingressController, metav1.UpdateOptions{}) + _, err = clients.OperatorClient.OperatorV1().IngressControllers(ingressController.Namespace).Update(context.TODO(), ingressController, metav1.UpdateOptions{}) require.NoError(t, err) // wait for RouterCertsDegraded == true var condition *operatorv1.OperatorCondition err = wait.PollImmediate(time.Second, 10*time.Minute, func() (bool, error) { - config, err := operatorClientset.OperatorV1().Authentications().Get(context.TODO(), "cluster", metav1.GetOptions{}) + config, err := clients.OperatorClient.OperatorV1().Authentications().Get(context.TODO(), "cluster", metav1.GetOptions{}) if errors.IsNotFound(err) { t.Logf("Unable to retrieve operator config: %v", err) return false, nil diff --git a/test/e2e/custom_route.go b/test/e2e/custom_route.go index 4c17367d4..1d35238f9 100644 --- a/test/e2e/custom_route.go +++ b/test/e2e/custom_route.go @@ -2,7 +2,6 @@ package e2e import ( "context" - "crypto/tls" "crypto/x509" "encoding/pem" "net/http" @@ -23,33 +22,26 @@ import ( "k8s.io/client-go/util/keyutil" configv1 "github.com/openshift/api/config/v1" - configclient "github.com/openshift/client-go/config/clientset/versioned" - routeclient "github.com/openshift/client-go/route/clientset/versioned" + configclient "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" + routeclient "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1" e2e "github.com/openshift/cluster-authentication-operator/test/library" ) var _ = g.Describe("[sig-auth] authentication operator", func() { - g.It("[Operator][Routes][Serial] TestCustomRouterCerts", func() { + g.It("[Operator][Routes][Parallel] TestCustomRouterCerts", func() { testCustomRouterCerts(g.GinkgoTB()) }) }) func testCustomRouterCerts(t testing.TB) { - kubeConfig := e2e.NewClientConfigForTest(t) - - kubeClient, err := kubernetes.NewForConfig(kubeConfig) - require.NoError(t, err) - configClient, err := configclient.NewForConfig(kubeConfig) - require.NoError(t, err) - routeClient, err := routeclient.NewForConfig(kubeConfig) - require.NoError(t, err) + clients := e2e.NewTestClients(t) // generate crypto materials rootCA := e2e.NewCertificateAuthorityCertificate(t, nil) intermediateCA := e2e.NewCertificateAuthorityCertificate(t, rootCA) // check that the route is set to defaults if a non-existant secret is provided - ingressConfig, err := configClient.ConfigV1().Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) + ingressConfig, err := clients.ConfigClient.ConfigV1().Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) require.NoError(t, err) fooHostname := "foo." + ingressConfig.Spec.Domain @@ -62,19 +54,9 @@ func testCustomRouterCerts(t testing.TB) { customServerCertPEM := pem.EncodeToMemory(&pem.Block{Type: cert.CertificateBlockType, Bytes: server.Certificate.Raw}) // Generate a valid Kubernetes name from the test name - // Replace invalid characters with hyphens and ensure it starts/ends with alphanumeric - secretName := strings.ToLower(t.Name()) - secretName = strings.ReplaceAll(secretName, "/", "-") - secretName = strings.ReplaceAll(secretName, " ", "-") - secretName = strings.ReplaceAll(secretName, "[", "") - secretName = strings.ReplaceAll(secretName, "]", "") - secretName = strings.Trim(secretName, "-") - if len(secretName) > 63 { - secretName = secretName[:63] - } - secretName = strings.TrimRight(secretName, "-") + secretName := e2e.SanitizeResourceName(t.Name()) - secret, err := kubeClient.CoreV1().Secrets("openshift-config").Create( + secret, err := clients.KubeClient.CoreV1().Secrets("openshift-config").Create( context.TODO(), &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{GenerateName: secretName + "-"}, @@ -86,21 +68,21 @@ func testCustomRouterCerts(t testing.TB) { }, metav1.CreateOptions{}) require.NoError(t, err) defer func() { - err = removeComponentRoute(t, configClient, "openshift-authentication", "oauth-openshift") + err = removeComponentRoute(t, clients.ConfigClient.ConfigV1(), "openshift-authentication", "oauth-openshift") require.NoError(t, err) - err = kubeClient.CoreV1().Secrets(secret.Namespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{}) + err = clients.KubeClient.CoreV1().Secrets(secret.Namespace).Delete(context.TODO(), secret.Name, metav1.DeleteOptions{}) require.NoError(t, err) }() // check that the trust-distribution works by publishing the server certificate - distributedServerCert, err := kubeClient.CoreV1().ConfigMaps("openshift-config-managed").Get(context.Background(), "oauth-serving-cert", metav1.GetOptions{}) + distributedServerCert, err := clients.KubeClient.CoreV1().ConfigMaps("openshift-config-managed").Get(context.Background(), "oauth-serving-cert", metav1.GetOptions{}) require.NoError(t, err) distributedServerCertPem := distributedServerCert.Data["ca-bundle.crt"] require.NotZero(t, len(distributedServerCertPem)) // set a custom hostname without a secret - err = getAndUpdateComponentRoute(t, configClient, &configv1.ComponentRouteSpec{ + err = getAndUpdateComponentRoute(t, clients.ConfigClient.ConfigV1(), &configv1.ComponentRouteSpec{ Namespace: "openshift-authentication", Name: "oauth-openshift", Hostname: "foo.bar.com", @@ -108,11 +90,11 @@ func testCustomRouterCerts(t testing.TB) { require.NoError(t, err) // check that the hostname was updated - err = checkRouteHostname(t, routeClient, "openshift-authentication", "oauth-openshift", "foo.bar.com") + err = checkRouteHostname(t, clients.RouteClient.RouteV1(), "openshift-authentication", "oauth-openshift", "foo.bar.com") require.NoError(t, err) // update the hostname and provide a custom secret that does not exist - err = getAndUpdateComponentRoute(t, configClient, &configv1.ComponentRouteSpec{ + err = getAndUpdateComponentRoute(t, clients.ConfigClient.ConfigV1(), &configv1.ComponentRouteSpec{ Namespace: "openshift-authentication", Name: "oauth-openshift", Hostname: "new.foo.bar.com", @@ -123,11 +105,11 @@ func testCustomRouterCerts(t testing.TB) { require.NoError(t, err) // check that the hostname of the route is not changed because a missing secret was provided - err = checkRouteHostname(t, routeClient, "openshift-authentication", "oauth-openshift", "foo.bar.com") + err = checkRouteHostname(t, clients.RouteClient.RouteV1(), "openshift-authentication", "oauth-openshift", "foo.bar.com") require.NoError(t, err) // Update the hostname and use a valid secret - err = getAndUpdateComponentRoute(t, configClient, &configv1.ComponentRouteSpec{ + err = getAndUpdateComponentRoute(t, clients.ConfigClient.ConfigV1(), &configv1.ComponentRouteSpec{ Namespace: "openshift-authentication", Name: "oauth-openshift", Hostname: configv1.Hostname(fooHostname), @@ -137,9 +119,9 @@ func testCustomRouterCerts(t testing.TB) { }) require.NoError(t, err) - waitForDistributedCert(t, kubeClient, customServerCertPEM) + waitForDistributedCert(t, clients.KubeClient, customServerCertPEM) - err = checkRouteHostname(t, routeClient, "openshift-authentication", "oauth-openshift", fooHostname) + err = checkRouteHostname(t, clients.RouteClient.RouteV1(), "openshift-authentication", "oauth-openshift", fooHostname) require.NoError(t, err) // Check that the route is serving @@ -165,16 +147,7 @@ func waitForDistributedCert(t testing.TB, kubeClient kubernetes.Interface, expec } func pollForCustomServingCertificates(t testing.TB, hostname string, certificate *x509.Certificate) error { - transport := &http.Transport{ - Proxy: http.ProxyFromEnvironment, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - } - httpClient := &http.Client{ - Timeout: 5 * time.Second, - Transport: transport, - } + httpClient := e2e.NewInsecureHTTPClient(5 * time.Second) req, err := http.NewRequest(http.MethodGet, hostname, nil) if err != nil { return err @@ -206,9 +179,9 @@ func pollForCustomServingCertificates(t testing.TB, hostname string, certificate }) } -func getAndUpdateComponentRoute(t testing.TB, configClient *configclient.Clientset, componentRoute *configv1.ComponentRouteSpec) error { +func getAndUpdateComponentRoute(t testing.TB, configClient configclient.ConfigV1Interface, componentRoute *configv1.ComponentRouteSpec) error { return wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { - ingressConfig, err := configClient.ConfigV1().Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) + ingressConfig, err := configClient.Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) if errors.IsNotFound(err) { t.Logf("Unable to retrieve ingress config: %v", err) return false, nil @@ -229,7 +202,7 @@ func getAndUpdateComponentRoute(t testing.TB, configClient *configclient.Clients ingressConfig.Spec.ComponentRoutes = append(ingressConfig.Spec.ComponentRoutes, *componentRoute) } - ingressConfig, err = configClient.ConfigV1().Ingresses().Update(context.TODO(), ingressConfig, metav1.UpdateOptions{}) + ingressConfig, err = configClient.Ingresses().Update(context.TODO(), ingressConfig, metav1.UpdateOptions{}) if err != nil { return false, nil } @@ -237,9 +210,9 @@ func getAndUpdateComponentRoute(t testing.TB, configClient *configclient.Clients }) } -func checkRouteHostname(t testing.TB, routeClient *routeclient.Clientset, routeNamespace string, routeName string, hostname string) error { +func checkRouteHostname(t testing.TB, routeClient routeclient.RouteV1Interface, routeNamespace string, routeName string, hostname string) error { return wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { - route, err := routeClient.RouteV1().Routes(routeNamespace).Get(context.TODO(), routeName, metav1.GetOptions{}) + route, err := routeClient.Routes(routeNamespace).Get(context.TODO(), routeName, metav1.GetOptions{}) if errors.IsNotFound(err) { t.Logf("Unable to retrieve route: %v", err) return false, nil @@ -252,9 +225,9 @@ func checkRouteHostname(t testing.TB, routeClient *routeclient.Clientset, routeN }) } -func removeComponentRoute(t testing.TB, configClient *configclient.Clientset, namespace string, name string) error { +func removeComponentRoute(t testing.TB, configClient configclient.ConfigV1Interface, namespace string, name string) error { return wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { - ingressConfig, err := configClient.ConfigV1().Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) + ingressConfig, err := configClient.Ingresses().Get(context.TODO(), "cluster", metav1.GetOptions{}) if errors.IsNotFound(err) { t.Logf("Unable to retrieve ingress config: %v", err) return false, nil @@ -270,7 +243,7 @@ func removeComponentRoute(t testing.TB, configClient *configclient.Clientset, na ingressConfig.Spec.ComponentRoutes = append(ingressConfig.Spec.ComponentRoutes[:i], ingressConfig.Spec.ComponentRoutes[i+1:]...) // update the ingress resource - _, err = configClient.ConfigV1().Ingresses().Update(context.TODO(), ingressConfig, metav1.UpdateOptions{}) + _, err = configClient.Ingresses().Update(context.TODO(), ingressConfig, metav1.UpdateOptions{}) if err != nil { return false, nil } diff --git a/test/e2e/gitlab.go b/test/e2e/gitlab.go index aa1bb2e7d..4f2d8def1 100644 --- a/test/e2e/gitlab.go +++ b/test/e2e/gitlab.go @@ -6,23 +6,19 @@ import ( g "github.com/onsi/ginkgo/v2" "github.com/stretchr/testify/require" - "k8s.io/client-go/kubernetes" - test "github.com/openshift/cluster-authentication-operator/test/library" ) var _ = g.Describe("[sig-auth] authentication operator", func() { - g.It("[OIDC][Serial] TestGitLabAsOIDCPasswordGrantCheck", func() { + g.It("[OIDC][Parallel] TestGitLabAsOIDCPasswordGrantCheck", func() { testGitLabAsOIDCPasswordGrantCheck(g.GinkgoTB()) }) }) func testGitLabAsOIDCPasswordGrantCheck(t testing.TB) { + clients := test.NewTestClients(t) kubeConfig := test.NewClientConfigForTest(t) - kubeClients, err := kubernetes.NewForConfig(kubeConfig) - require.NoError(t, err) - _, idpName, cleanups := test.AddGitlabIDP(t, kubeConfig) defer test.IDPCleanupWrapper(func() { for _, c := range cleanups { @@ -30,7 +26,7 @@ func testGitLabAsOIDCPasswordGrantCheck(t testing.TB) { } })() - config, err := test.GrabOAuthServerConfig(kubeClients.CoreV1()) + config, err := test.GrabOAuthServerConfig(clients.KubeClient.CoreV1()) require.NoError(t, err) gitlabIDPConfig := test.GetIDPByName(config, idpName) diff --git a/test/e2e/keycloak.go b/test/e2e/keycloak.go index 11a136644..792254d28 100644 --- a/test/e2e/keycloak.go +++ b/test/e2e/keycloak.go @@ -1,22 +1,24 @@ package e2e import ( + "bytes" "context" "math/rand" + "net/http" "strconv" "testing" + "time" g "github.com/onsi/ginkgo/v2" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/rest" configv1 "github.com/openshift/api/config/v1" - configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" - userv1client "github.com/openshift/client-go/user/clientset/versioned/typed/user/v1" + osinv1 "github.com/openshift/api/osin/v1" "github.com/openshift/library-go/pkg/oauth/tokenrequest" "github.com/openshift/library-go/pkg/oauth/tokenrequest/challengehandlers" @@ -33,17 +35,9 @@ func testKeycloakAsOIDCPasswordGrantCheckAndGroupSync(t testing.TB) { testContext, cancel := context.WithCancel(context.Background()) defer cancel() + clients := test.NewTestClients(t) kubeConfig := test.NewClientConfigForTest(t) - kubeClients, err := kubernetes.NewForConfig(kubeConfig) - require.NoError(t, err) - - configClient, err := configv1client.NewForConfig(kubeConfig) - require.NoError(t, err) - - userClient, err := userv1client.NewForConfig(kubeConfig) - require.NoError(t, err) - _, idpName, cleanups := test.AddKeycloakIDP(t, kubeConfig, false) defer test.IDPCleanupWrapper(func() { for _, c := range cleanups { @@ -54,27 +48,56 @@ func testKeycloakAsOIDCPasswordGrantCheckAndGroupSync(t testing.TB) { // ============================================================================== // Test that we don't consider the provider as a challenger if ROPC is not set up // ============================================================================== - config, err := test.GrabOAuthServerConfig(kubeClients.CoreV1()) - require.NoError(t, err) + // Wait for the IDP to appear in the OAuth server config + // In parallel execution, initial IDP creation and propagation can take longer + var kcIDPConfig *osinv1.IdentityProvider + var err error + err = wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { + config, err := test.GrabOAuthServerConfig(clients.KubeClient.CoreV1()) + if err != nil { + t.Logf("failed to get OAuth server config: %v", err) + return false, nil + } + + kcIDPConfig = test.GetIDPByName(config, idpName) + if kcIDPConfig == nil { + t.Logf("IDP %q not found in OAuth server config yet", idpName) + return false, nil + } - kcIDPConfig := test.GetIDPByName(config, idpName) - require.NotNil(t, kcIDPConfig, "did not find idp %q in the config: %#v", idpName, config) + return true, nil + }) + require.NoError(t, err, "IDP %q did not appear in OAuth server config", idpName) require.Equal(t, false, kcIDPConfig.UseAsChallenger, "keycloak is configured as challenger but it should not be") - oauthConfig, err := configClient.OAuths().Get(testContext, "cluster", metav1.GetOptions{}) - require.NoError(t, err) - // ======================================================================= // Test that we do consider the provider as a challenger if ROPC is set up // ======================================================================= + // Wait for the IDP to appear in the OAuth config with proper OpenID configuration + // In parallel execution, operator may take longer to create OAuth cluster configuration var configIDP configv1.IdentityProvider - for _, idp := range oauthConfig.Spec.IdentityProviders { - if idp.Name == idpName { - configIDP = idp - break + err = wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { + oauthConfig, err := clients.ConfigClient.ConfigV1().OAuths().Get(testContext, "cluster", metav1.GetOptions{}) + if err != nil { + t.Logf("failed to get OAuth config: %v", err) + return false, nil } - } + + for _, idp := range oauthConfig.Spec.IdentityProviders { + if idp.Name == idpName { + if idp.OpenID != nil && idp.OpenID.Issuer != "" && idp.OpenID.ClientSecret.Name != "" { + configIDP = idp + return true, nil + } + t.Logf("found IDP %q but OpenID config is incomplete", idpName) + return false, nil + } + } + t.Logf("IDP %q not found in OAuth config yet", idpName) + return false, nil + }) + require.NoError(t, err, "IDP %q did not appear in OAuth config with valid OpenID configuration", idpName) // Get a keycloak client for the external KC URL transport, err := rest.TransportFor(kubeConfig) @@ -90,27 +113,102 @@ func testKeycloakAsOIDCPasswordGrantCheckAndGroupSync(t testing.TB) { err = kcClient.UpdateClientDirectAccessGrantsEnabled(client["id"].(string), true) require.NoError(t, err) + // =================================================================== + // VERIFY KEYCLOAK IS FULLY READY BEFORE BUMPING SECRET + // =================================================================== + // In parallel execution with resource constraints, Keycloak may be slow to respond + // to token requests even after initial authentication succeeds. The operator will + // attempt to validate the password grant flow when we bump the secret, so we must + // ensure Keycloak can actually handle those requests before proceeding. + t.Logf("verifying Keycloak is ready to handle password grant requests before triggering operator validation") + + err = wait.PollImmediate(5*time.Second, 5*time.Minute, func() (bool, error) { + // Re-authenticate to verify admin API is still responsive + err := kcClient.AuthenticatePassword("admin-cli", "", "admin", "password") + if err != nil { + t.Logf("Keycloak authentication not ready: %v", err) + return false, nil + } + + // Verify the token endpoint is responsive by testing an actual password grant request + // This simulates what the operator will do when validating the IDP configuration + // The health endpoint is at the root, but we need to test the actual token endpoint + // that the operator uses for password grant validation + tokenURL := configIDP.OpenID.Issuer + "/protocol/openid-connect/token" + + // Prepare a test password grant request (with invalid credentials) + // We're just checking if the endpoint is responsive, not if auth succeeds + testData := "grant_type=password&client_id=" + configIDP.OpenID.ClientID + "&username=test&password=test&scope=openid" + req, err := http.NewRequest(http.MethodPost, tokenURL, bytes.NewBufferString(testData)) + if err != nil { + return false, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("Accept", "application/json") + + httpClient := &http.Client{Transport: transport} + resp, err := httpClient.Do(req) + if err != nil { + t.Logf("Keycloak token endpoint not reachable: %v", err) + return false, nil + } + defer resp.Body.Close() + + // Check for 5xx errors that would cause operator validation to fail + if resp.StatusCode >= 500 && resp.StatusCode <= 599 { + t.Logf("Keycloak token endpoint returning server error: HTTP %d", resp.StatusCode) + return false, nil + } + + // Any response other than 5xx means Keycloak is responsive + // (4xx means bad credentials which is expected, 2xx means it worked) + t.Logf("Keycloak is ready: token endpoint responsive (HTTP %d)", resp.StatusCode) + return true, nil + }) + require.NoError(t, err, "Keycloak did not become fully ready before secret update") + // bump the configured secret since the operator caches the password grant check // based on it configSecretName := configIDP.OpenID.ClientSecret.Name - configSecret, err := kubeClients.CoreV1().Secrets("openshift-config").Get(testContext, configSecretName, metav1.GetOptions{}) + configSecret, err := clients.KubeClient.CoreV1().Secrets("openshift-config").Get(testContext, configSecretName, metav1.GetOptions{}) require.NoError(t, err) if configSecret.Annotations == nil { configSecret.Annotations = map[string]string{} } configSecret.Annotations["bumped"] = strconv.FormatUint(rand.Uint64(), 10) - _, err = kubeClients.CoreV1().Secrets("openshift-config").Update(testContext, configSecret, metav1.UpdateOptions{}) + _, err = clients.KubeClient.CoreV1().Secrets("openshift-config").Update(testContext, configSecret, metav1.UpdateOptions{}) require.NoError(t, err) - err = test.WaitForOperatorToPickUpChanges(t, configClient, "authentication") + err = test.WaitForOperatorToPickUpChanges(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err) - config, err = test.GrabOAuthServerConfig(kubeClients.CoreV1()) - require.NoError(t, err) + // Wait for OAuth server pods to be updated with new configuration + waitOAuthServerReplicasReady(t, clients.KubeClient) - kcIDPConfig = test.GetIDPByName(config, idpName) - require.NotNil(t, kcIDPConfig, "did not find idp %q in the config: %#v", idpName, config) + // Wait for the OAuth server config to reflect UseAsChallenger=true after ROPC enablement + // In parallel execution, operator reconciliation and config propagation can take longer + err = wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { + config, err := test.GrabOAuthServerConfig(clients.KubeClient.CoreV1()) + if err != nil { + t.Logf("failed to get OAuth server config: %v", err) + return false, nil + } + + kcIDPConfig = test.GetIDPByName(config, idpName) + if kcIDPConfig == nil { + t.Logf("IDP %q not found in OAuth server config yet", idpName) + return false, nil + } + + if !kcIDPConfig.UseAsChallenger { + t.Logf("IDP %q found but UseAsChallenger is still false, waiting for update", idpName) + return false, nil + } + + return true, nil + }) + require.NoError(t, err, "IDP %q did not get UseAsChallenger=true in OAuth server config", idpName) require.Equal(t, true, kcIDPConfig.UseAsChallenger, "keycloak is not configured as challenger but it should be") @@ -143,18 +241,18 @@ func testKeycloakAsOIDCPasswordGrantCheckAndGroupSync(t testing.TB) { defer func() { for _, g := range groups { - if err := userClient.Groups().Delete(context.Background(), g, metav1.DeleteOptions{}); err != nil { + if err := clients.UserClient.Groups().Delete(context.Background(), g, metav1.DeleteOptions{}); err != nil { t.Logf("failed to remove group %q: %v", g, err) } } - if err := userClient.Users().Delete(context.Background(), username, metav1.DeleteOptions{}); err != nil { + if err := clients.UserClient.Users().Delete(context.Background(), username, metav1.DeleteOptions{}); err != nil { t.Logf("failed to remove user %q: %v", username, err) } }() for _, g := range groups { - group, err := userClient.Groups().Get(context.Background(), g, metav1.GetOptions{}) + group, err := clients.UserClient.Groups().Get(context.Background(), g, metav1.GetOptions{}) require.NoError(t, err) require.Contains(t, group.Users, username) } @@ -187,7 +285,7 @@ func testKeycloakAsOIDCPasswordGrantCheckAndGroupSync(t testing.TB) { _, err = tokenrequest.RequestTokenWithChallengeHandlers(kubeConfig, createChallengeHandler(username, "password42")) require.NoError(t, err) - groupList, err := userClient.Groups().List(context.Background(), metav1.ListOptions{}) + groupList, err := clients.UserClient.Groups().List(context.Background(), metav1.ListOptions{}) require.NoError(t, err) for _, g := range groupList.Items { require.False(t, removedGroups.Has(g.Name), "group %q is still present but should have been removed\n%v", g.Name, g) diff --git a/test/e2e/templates.go b/test/e2e/templates.go index 24030d2ae..a6a247cb8 100644 --- a/test/e2e/templates.go +++ b/test/e2e/templates.go @@ -2,45 +2,34 @@ package e2e import ( "context" - "crypto/tls" "fmt" "io" - "net/http" "net/url" "testing" + "time" g "github.com/onsi/ginkgo/v2" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" configv1 "github.com/openshift/api/config/v1" - configclient "github.com/openshift/client-go/config/clientset/versioned" - routeclient "github.com/openshift/client-go/route/clientset/versioned" e2e "github.com/openshift/cluster-authentication-operator/test/library" ) var _ = g.Describe("[sig-auth] authentication operator", func() { - g.It("[Templates][Serial] TestTemplatesConfig", func() { + g.It("[Templates][Parallel] TestTemplatesConfig", func() { testTemplatesConfig(g.GinkgoTB()) }) }) func testTemplatesConfig(t testing.TB) { - kubeConfig := e2e.NewClientConfigForTest(t) + clients := e2e.NewTestClients(t) - kubeClient, err := kubernetes.NewForConfig(kubeConfig) - require.NoError(t, err) - configClient, err := configclient.NewForConfig(kubeConfig) - require.NoError(t, err) - routeClient, err := routeclient.NewForConfig(kubeConfig) - require.NoError(t, err) - - configSecretsClient := kubeClient.CoreV1().Secrets("openshift-config") + configSecretsClient := clients.KubeClient.CoreV1().Secrets("openshift-config") cleanup := createSecret(t, configSecretsClient, "login", "login.html", "login test") defer cleanup() @@ -51,7 +40,7 @@ func testTemplatesConfig(t testing.TB) { cleanup = createSecret(t, configSecretsClient, "htpasswd1", "htpasswd", "test:$2y$05$9Co/ojOvEs6IZUTxAdlHbO8leelkkmcwPMtlGTHFkxcTLrC86EbLG") // test:password defer cleanup() - oauthConfig, err := configClient.ConfigV1().OAuths().Get(context.TODO(), "cluster", metav1.GetOptions{}) + oauthConfig, err := clients.ConfigClient.ConfigV1().OAuths().Get(context.TODO(), "cluster", metav1.GetOptions{}) require.NoError(t, err) oauthConfigCopy := oauthConfig.DeepCopy() @@ -74,39 +63,32 @@ func testTemplatesConfig(t testing.TB) { }, } - _, err = configClient.ConfigV1().OAuths().Update(context.TODO(), oauthConfigCopy, metav1.UpdateOptions{}) + _, err = clients.ConfigClient.ConfigV1().OAuths().Update(context.TODO(), oauthConfigCopy, metav1.UpdateOptions{}) require.NoError(t, err) defer func() { - oauthConfigNew, err := configClient.ConfigV1().OAuths().Get(context.TODO(), "cluster", metav1.GetOptions{}) + oauthConfigNew, err := clients.ConfigClient.ConfigV1().OAuths().Get(context.TODO(), "cluster", metav1.GetOptions{}) require.NoError(t, err) oauthConfigNew.Spec.IdentityProviders = oauthConfig.Spec.IdentityProviders oauthConfigNew.Spec.Templates = oauthConfig.Spec.Templates - _, err = configClient.ConfigV1().OAuths().Update(context.TODO(), oauthConfigNew, metav1.UpdateOptions{}) + _, err = clients.ConfigClient.ConfigV1().OAuths().Update(context.TODO(), oauthConfigNew, metav1.UpdateOptions{}) require.NoError(t, err) }() // wait for new rollout - err = e2e.WaitForClusterOperatorProgressing(t, configClient.ConfigV1(), "authentication") + err = e2e.WaitForClusterOperatorProgressing(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err, "authentication operator never became progressing") - err = e2e.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, configClient.ConfigV1(), "authentication") + err = e2e.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err, "failed to wait for the authentication operator to become available") - route, err := routeClient.RouteV1().Routes("openshift-authentication").Get(context.TODO(), "oauth-openshift", metav1.GetOptions{}) + route, err := clients.RouteClient.RouteV1().Routes("openshift-authentication").Get(context.TODO(), "oauth-openshift", metav1.GetOptions{}) require.NoError(t, err) oauthURL, err := url.Parse("https://" + route.Spec.Host) require.NoError(t, err) - httpClient := &http.Client{ - Transport: &http.Transport{ - Proxy: http.ProxyFromEnvironment, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, // we don't care about certs in this test - }, - }, - } + httpClient := e2e.NewInsecureHTTPClient(30 * time.Second) oauthURL.Path = "/oauth/token/request" // should redirect to where the providers are resp, err := httpClient.Get(oauthURL.String()) require.NoError(t, err) diff --git a/test/e2e/tokentimeout.go b/test/e2e/tokentimeout.go index ac6a0f328..d3c6a290a 100644 --- a/test/e2e/tokentimeout.go +++ b/test/e2e/tokentimeout.go @@ -3,8 +3,6 @@ package e2e import ( "bytes" "context" - "crypto/tls" - "net" "net/http" "strings" "testing" @@ -33,29 +31,16 @@ const ( ) var _ = g.Describe("[sig-auth] authentication operator", func() { - g.It("[Tokens][Serial] TestTokenInactivityTimeout", func() { + g.It("[Tokens][Parallel] TestTokenInactivityTimeout [Timeout:1h]", func() { testTokenInactivityTimeout(g.GinkgoTB()) }) }) func testTokenInactivityTimeout(t testing.TB) { + clients := test.NewTestClients(t) kubeConfig := test.NewClientConfigForTest(t) - userClient := userclient.NewForConfigOrDie(kubeConfig) - oauthClientClient := oauthclient.NewForConfigOrDie(kubeConfig) - configClient := configclient.NewForConfigOrDie(kubeConfig) - - transport := &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - TLSHandshakeTimeout: 10 * time.Second, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - } + transport := test.NewInsecureHTTPTransport() testTokenValidity := func(t testing.TB, req *http.Request, statusCode int, bearerToken, errMsg string) { req.Header.Set("Authorization", "Bearer "+bearerToken) @@ -74,7 +59,7 @@ func testTokenInactivityTimeout(t testing.TB) { req, err := http.NewRequest(http.MethodGet, kubeConfig.Host+"/apis/user.openshift.io/v1/users/~", &bytes.Buffer{}) require.NoError(t, err) - time.Sleep(timeout + 10*time.Second) + time.Sleep(timeout + 5*time.Second) testTokenValidity(t, req, http.StatusUnauthorized, tokenWithTimeout, "accessing token after it timed out should not work") testTokenValidity(t, req, http.StatusOK, tokenWithoutTimeout, "token with out timeout should work") @@ -92,59 +77,59 @@ func testTokenInactivityTimeout(t testing.TB) { testTokenValidity(t, req, http.StatusOK, tokenWithTimeout, "accessing token before it timed out should work") testTokenValidity(t, req, http.StatusOK, tokenWithoutTimeout, "token with out timeout should work") - time.Sleep(120 * time.Second) + time.Sleep(20 * time.Second) testTokenValidity(t, req, http.StatusOK, tokenWithTimeout, "accessing token before it timed out should work") testTokenValidity(t, req, http.StatusOK, tokenWithoutTimeout, "token with out timeout should work") - time.Sleep(timeout + 10*time.Second) + time.Sleep(timeout + 5*time.Second) testTokenValidity(t, req, http.StatusUnauthorized, tokenWithTimeout, "accessing token after it timed out should not work") testTokenValidity(t, req, http.StatusOK, tokenWithoutTimeout, "token with out timeout should work") } - configInactivityTimeout := int32(420) // 420 is the minimum possible timeout + 2min to distinguish it from oauth client timeouts - oauthClientTimeout := int32(300) // 300 is the minimum possible timeout + configInactivityTimeout := int32(300) // minimum acceptable token timeout value is 300 seconds + oauthClientTimeout := int32(300) // minimum acceptable token timeout value is 300 seconds // No OAuthClient timeout and no OAuth config timeout. - checkTokenAccess(t, userClient, oauthClientClient, configInactivityTimeout, nil, testTokenTimeouts) + checkTokenAccess(t, clients.UserClient, clients.OAuthClient.OauthV1(), configInactivityTimeout, nil, testTokenTimeouts) // With only OAuth config timeout. - updateOAuthConfigInactivityTimeout(t, configClient, &metav1.Duration{Duration: time.Duration(configInactivityTimeout) * time.Second}) + updateOAuthConfigInactivityTimeout(t, clients.ConfigClient.ConfigV1(), &metav1.Duration{Duration: time.Duration(configInactivityTimeout) * time.Second}) - err := test.WaitForClusterOperatorProgressing(t, configClient, "authentication") + err := test.WaitForClusterOperatorProgressing(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err, "authentication operator never became progressing") - err = test.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, configClient, "authentication") + err = test.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err) client := kubernetes.NewForConfigOrDie(kubeConfig) waitOAuthServerReplicasReady(t, client) - checkTokenAccess(t, userClient, oauthClientClient, configInactivityTimeout, nil, testInactivityTimeoutScenarios) + checkTokenAccess(t, clients.UserClient, clients.OAuthClient.OauthV1(), configInactivityTimeout, nil, testInactivityTimeoutScenarios) // With both OAuth config timeout and OAuthClient timeout. - checkTokenAccess(t, userClient, oauthClientClient, configInactivityTimeout, &oauthClientTimeout, testInactivityTimeoutScenarios) + checkTokenAccess(t, clients.UserClient, clients.OAuthClient.OauthV1(), configInactivityTimeout, &oauthClientTimeout, testInactivityTimeoutScenarios) // No OAuthClient timeout and no OAuth config timeout. - updateOAuthConfigInactivityTimeout(t, configClient, nil) + updateOAuthConfigInactivityTimeout(t, clients.ConfigClient.ConfigV1(), nil) - err = test.WaitForClusterOperatorProgressing(t, configClient, "authentication") + err = test.WaitForClusterOperatorProgressing(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err, "authentication operator never became progressing") - err = test.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, configClient, "authentication") + err = test.WaitForClusterOperatorAvailableNotProgressingNotDegraded(t, clients.ConfigClient.ConfigV1(), "authentication") require.NoError(t, err) client = kubernetes.NewForConfigOrDie(kubeConfig) waitOAuthServerReplicasReady(t, client) - checkTokenAccess(t, userClient, oauthClientClient, configInactivityTimeout, nil, testTokenTimeouts) + checkTokenAccess(t, clients.UserClient, clients.OAuthClient.OauthV1(), configInactivityTimeout, nil, testTokenTimeouts) } func checkTokenAccess(t testing.TB, - userClient *userclient.UserV1Client, - oauthClientClient *oauthclient.OauthV1Client, + userClient userclient.UserV1Interface, + oauthClient oauthclient.OauthV1Interface, configInactivityTimeout int32, oauthClientTimeout *int32, testAccess func(testing.TB, string, string, time.Duration)) { // Create the user, identity, oauthclient and oauthaccesstoken objects needed for authentication using Bearer tokens. subTestNameHierarchy := strings.Split(t.Name(), "/") - prefix := strings.ToLower(subTestNameHierarchy[len(subTestNameHierarchy)-1]) + "-" + prefix := test.SanitizeResourceName(subTestNameHierarchy[len(subTestNameHierarchy)-1]) + "-" userName := prefix + "testuser" idpName := "htpasswd" @@ -158,7 +143,7 @@ func checkTokenAccess(t testing.TB, cleanup = createIdentity(t, userClient, userName, identityName, idpName, uid) defer cleanup() - cleanup = createOAuthClient(t, oauthClientClient, oauthClientName, redirectURIs, oauthClientTimeout) + cleanup = createOAuthClient(t, oauthClient, oauthClientName, redirectURIs, oauthClientTimeout) defer cleanup() expectedInactivityTimeout := configInactivityTimeout @@ -197,10 +182,10 @@ func checkTokenAccess(t testing.TB, // create tokens with and without timeouts for _, accessToken := range []*oauthapi.OAuthAccessToken{tokenWithTimeout, tokenWithoutTimeout} { - _, err := oauthClientClient.OAuthAccessTokens().Create(context.TODO(), accessToken, metav1.CreateOptions{}) + _, err := oauthClient.OAuthAccessTokens().Create(context.TODO(), accessToken, metav1.CreateOptions{}) require.NoError(t, err) defer func(name string) { - if err := oauthClientClient.OAuthAccessTokens().Delete(context.TODO(), name, metav1.DeleteOptions{}); err != nil { + if err := oauthClient.OAuthAccessTokens().Delete(context.TODO(), name, metav1.DeleteOptions{}); err != nil { t.Logf("%v", err) } }(accessToken.Name) @@ -208,7 +193,7 @@ func checkTokenAccess(t testing.TB, testAccess(t, tokenWithTimeoutClear, tokenWithoutTimeoutClear, time.Duration(expectedInactivityTimeout)*time.Second) } -func updateOAuthConfigInactivityTimeout(t testing.TB, client *configclient.ConfigV1Client, duration *metav1.Duration) { +func updateOAuthConfigInactivityTimeout(t testing.TB, client configclient.ConfigV1Interface, duration *metav1.Duration) { oauthConfig, err := client.OAuths().Get(context.TODO(), "cluster", metav1.GetOptions{}) require.NoError(t, err) @@ -225,7 +210,7 @@ func updateOAuthConfigInactivityTimeout(t testing.TB, client *configclient.Confi require.NoError(t, err) } -func createUser(t testing.TB, userClient *userclient.UserV1Client, userName, identity string) (types.UID, func()) { +func createUser(t testing.TB, userClient userclient.UserV1Interface, userName, identity string) (types.UID, func()) { user := &userapi.User{ ObjectMeta: metav1.ObjectMeta{ Name: userName, @@ -250,7 +235,7 @@ func createUser(t testing.TB, userClient *userclient.UserV1Client, userName, ide } } -func createIdentity(t testing.TB, userClient *userclient.UserV1Client, userName, identityName, idpName string, uid types.UID) func() { +func createIdentity(t testing.TB, userClient userclient.UserV1Interface, userName, identityName, idpName string, uid types.UID) func() { identity := &userapi.Identity{ ObjectMeta: metav1.ObjectMeta{ Name: identityName, @@ -279,8 +264,8 @@ func createIdentity(t testing.TB, userClient *userclient.UserV1Client, userName, } } -func createOAuthClient(t testing.TB, oauthClientClient *oauthclient.OauthV1Client, oauthClientName string, redirectURIs []string, timeout *int32) func() { - oauthClient := &oauthapi.OAuthClient{ +func createOAuthClient(t testing.TB, oauthClient oauthclient.OauthV1Interface, oauthClientName string, redirectURIs []string, timeout *int32) func() { + oauthClientObj := &oauthapi.OAuthClient{ ObjectMeta: metav1.ObjectMeta{ Name: oauthClientName, }, @@ -291,7 +276,7 @@ func createOAuthClient(t testing.TB, oauthClientClient *oauthclient.OauthV1Clien } err := wait.PollImmediate(300*time.Millisecond, 2*time.Second, func() (bool, error) { - _, err := oauthClientClient.OAuthClients().Create(context.TODO(), oauthClient, metav1.CreateOptions{}) + _, err := oauthClient.OAuthClients().Create(context.TODO(), oauthClientObj, metav1.CreateOptions{}) if err != nil { t.Logf("failed to create oauth client: %v", err) return false, nil @@ -300,7 +285,7 @@ func createOAuthClient(t testing.TB, oauthClientClient *oauthclient.OauthV1Clien }) require.NoError(t, err) return func() { - if err := oauthClientClient.OAuthClients().Delete(context.TODO(), oauthClient.Name, metav1.DeleteOptions{}); err != nil { + if err := oauthClient.OAuthClients().Delete(context.TODO(), oauthClientObj.Name, metav1.DeleteOptions{}); err != nil { t.Logf("%v", err) } } @@ -309,7 +294,7 @@ func createOAuthClient(t testing.TB, oauthClientClient *oauthclient.OauthV1Clien func waitOAuthServerReplicasReady(t testing.TB, kubeClient kubernetes.Interface) { t.Logf("waiting all oauth-apiserver replicas to be updated and ready") - err := wait.PollImmediate(time.Second, 10*time.Minute, func() (bool, error) { + err := wait.PollImmediate(time.Second, 5*time.Minute, func() (bool, error) { deployment, err := kubeClient.AppsV1().Deployments("openshift-oauth-apiserver").Get(context.Background(), "apiserver", metav1.GetOptions{}) if err != nil { t.Logf("failed to retrieve oauth-apiserver's deployment: %v", err) diff --git a/test/library/client.go b/test/library/client.go index e231c285e..2397cb231 100644 --- a/test/library/client.go +++ b/test/library/client.go @@ -3,20 +3,31 @@ package library import ( "context" "crypto/sha256" + "crypto/tls" "encoding/base64" "fmt" mathrand "math/rand" + "net" + "net/http" "testing" + "time" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/clientcmd/api" psapi "k8s.io/pod-security-admission/api" + + configclient "github.com/openshift/client-go/config/clientset/versioned" + oauthclient "github.com/openshift/client-go/oauth/clientset/versioned" + operatorclient "github.com/openshift/client-go/operator/clientset/versioned" + routeclient "github.com/openshift/client-go/route/clientset/versioned" + userclient "github.com/openshift/client-go/user/clientset/versioned/typed/user/v1" ) // NewClientConfigForTest returns a config configured to connect to the api server @@ -32,6 +43,71 @@ func NewClientConfigForTest(t testing.TB) *rest.Config { return config } +// TestClients holds commonly used Kubernetes and OpenShift clients for e2e tests +type TestClients struct { + KubeClient kubernetes.Interface + ConfigClient *configclient.Clientset + OperatorClient *operatorclient.Clientset + RouteClient *routeclient.Clientset + OAuthClient *oauthclient.Clientset + UserClient userclient.UserV1Interface +} + +// NewTestClients creates a TestClients struct with all common clients initialized +func NewTestClients(t testing.TB) *TestClients { + kubeConfig := NewClientConfigForTest(t) + + kubeClient, err := kubernetes.NewForConfig(kubeConfig) + require.NoError(t, err) + + configClient, err := configclient.NewForConfig(kubeConfig) + require.NoError(t, err) + + operatorClient, err := operatorclient.NewForConfig(kubeConfig) + require.NoError(t, err) + + routeClient, err := routeclient.NewForConfig(kubeConfig) + require.NoError(t, err) + + oauthClient, err := oauthclient.NewForConfig(kubeConfig) + require.NoError(t, err) + + userClient, err := userclient.NewForConfig(kubeConfig) + require.NoError(t, err) + + return &TestClients{ + KubeClient: kubeClient, + ConfigClient: configClient, + OperatorClient: operatorClient, + RouteClient: routeClient, + OAuthClient: oauthClient, + UserClient: userClient, + } +} + +// NewInsecureHTTPClient creates an HTTP client that skips TLS verification for testing +func NewInsecureHTTPClient(timeout time.Duration) *http.Client { + return &http.Client{ + Timeout: timeout, + Transport: NewInsecureHTTPTransport(), + } +} + +// NewInsecureHTTPTransport creates an HTTP transport that skips TLS verification for testing +func NewInsecureHTTPTransport() *http.Transport { + return &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + TLSHandshakeTimeout: 10 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + } +} + // GenerateOAuthTokenPair returns two tokens to use with OpenShift OAuth-based authentication. // The first token is a private token meant to be used as a Bearer token to send // queries to the API, the second token is a hashed token meant to be stored in diff --git a/test/library/keycloakidp.go b/test/library/keycloakidp.go index da13f30c2..8d3b25487 100644 --- a/test/library/keycloakidp.go +++ b/test/library/keycloakidp.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "net/url" + "strings" "testing" "time" @@ -127,7 +128,9 @@ func AddKeycloakIDP( // even though configured via env vars and even though we checked Keycloak reports // ready on /health/ready, it still appears that we may need some time to log in properly - err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 30*time.Second, true, func(ctx context.Context) (bool, error) { + // In resource-constrained CI environments with parallel test execution, Keycloak can take + // 40-60+ seconds to fully initialize its admin API even after passing readiness probes + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { err := kcClient.AuthenticatePassword("admin-cli", "", "admin", "password") if err != nil { t.Logf("failed to authenticate to Keycloak: %v", err) @@ -156,18 +159,57 @@ func AddKeycloakIDP( } // change the client's access token timeout just in case we need it for the test - err = kcClient.UpdateClientAccessTokenTimeout(adminClientId, 60*30) + // Wrap in retry logic as Keycloak may still be unstable after initial authentication + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + err := kcClient.UpdateClientAccessTokenTimeout(adminClientId, 60*30) + if err != nil { + t.Logf("failed to update client access token timeout: %v, retrying", err) + // Re-authenticate in case the connection was dropped + if authErr := kcClient.AuthenticatePassword("admin-cli", "", "admin", "password"); authErr != nil { + t.Logf("failed to re-authenticate: %v", authErr) + } + return false, nil + } + return true, nil + }) require.NoError(t, err) // reauthenticate for a new, longer-lived token err = kcClient.AuthenticatePassword("admin-cli", "", "admin", "password") require.NoError(t, err) - clientSecret, err := kcClient.RegenerateClientSecret(passwdClientId) + // Regenerate client secret with retry logic for Keycloak stability + var clientSecret string + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + var err error + clientSecret, err = kcClient.RegenerateClientSecret(passwdClientId) + if err != nil { + t.Logf("failed to regenerate client secret: %v, retrying", err) + // Re-authenticate in case the connection was dropped + if authErr := kcClient.AuthenticatePassword("admin-cli", "", "admin", "password"); authErr != nil { + t.Logf("failed to re-authenticate: %v", authErr) + } + return false, nil + } + return true, nil + }) require.NoError(t, err) + // Create client group mapper with retry logic const groupsClaimName = "groups" - require.NoError(t, kcClient.CreateClientGroupMapper(passwdClientId, "test-groups-mapper", groupsClaimName)) + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + err := kcClient.CreateClientGroupMapper(passwdClientId, "test-groups-mapper", groupsClaimName) + if err != nil { + t.Logf("failed to create client group mapper: %v, retrying", err) + // Re-authenticate in case the connection was dropped + if authErr := kcClient.AuthenticatePassword("admin-cli", "", "admin", "password"); authErr != nil { + t.Logf("failed to re-authenticate: %v", authErr) + } + return false, nil + } + return true, nil + }) + require.NoError(t, err) idpCleans, err := addOIDCIDentityProvider(t, kubeClients, @@ -248,9 +290,20 @@ func (kc *KeycloakClient) AuthenticatePassword(clientID, clientSecret, name, pas return err } + // Check for non-success HTTP status codes before attempting to parse JSON + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("authentication failed: HTTP %d: %s", resp.StatusCode, string(respBytes)) + } + + // Verify we received JSON response (Keycloak may return HTML error pages during startup) + contentType := resp.Header.Get("Content-Type") + if contentType != "" && !containsIgnoreCase(contentType, "application/json") { + return fmt.Errorf("expected JSON response but got Content-Type %q: %s", contentType, string(respBytes)) + } + authResp := map[string]any{} if err := json.Unmarshal(respBytes, &authResp); err != nil { - return err + return fmt.Errorf("failed to parse JSON response: %w (response body: %s)", err, string(respBytes)) } accessToken, err := retrieveValue("access_token", authResp) @@ -712,3 +765,7 @@ func retrieveValue(field string, sourceMap map[string]any) (string, error) { return value, nil } + +func containsIgnoreCase(s, substr string) bool { + return strings.Contains(strings.ToLower(s), strings.ToLower(substr)) +} diff --git a/test/library/names.go b/test/library/names.go new file mode 100644 index 000000000..6d69eb157 --- /dev/null +++ b/test/library/names.go @@ -0,0 +1,46 @@ +package library + +import ( + "regexp" + "strings" +) + +// SanitizeResourceName converts a test name into a valid Kubernetes resource name +// compliant with RFC 1123 subdomain rules: lowercase alphanumeric characters, '-' or '.', +// starting and ending with an alphanumeric character. The maximum length is 63 characters. +func SanitizeResourceName(name string) string { + // Convert to lowercase + name = strings.ToLower(name) + + // Split by dots to process each label separately (RFC 1123 requires each label + // between dots to start and end with alphanumeric characters) + labels := strings.Split(name, ".") + validLabels := make([]string, 0, len(labels)) + + invalidChars := regexp.MustCompile(`[^a-z0-9\-]+`) + consecutiveHyphens := regexp.MustCompile(`-+`) + + for _, label := range labels { + // Replace invalid characters with hyphens + label = invalidChars.ReplaceAllString(label, "-") + // Replace consecutive hyphens with a single hyphen + label = consecutiveHyphens.ReplaceAllString(label, "-") + // Remove leading/trailing hyphens from this label + label = strings.Trim(label, "-") + // Only include non-empty labels + if label != "" { + validLabels = append(validLabels, label) + } + } + + // Rejoin labels with dots + name = strings.Join(validLabels, ".") + + // Kubernetes resource names have a maximum length of 63 characters + if len(name) > 63 { + name = name[:63] + } + // Ensure it doesn't end with a hyphen or dot after truncation + name = strings.TrimRight(name, "-.") + return name +}