diff --git a/testsuite/chaos_mesh/__init__.py b/testsuite/chaos_mesh/__init__.py new file mode 100644 index 00000000..0ec85805 --- /dev/null +++ b/testsuite/chaos_mesh/__init__.py @@ -0,0 +1,107 @@ +"""PodChaos object for simulating Pod faults in Kubernetes.""" + +from typing import Dict, List, Optional, Literal + +from testsuite.kubernetes import KubernetesObject, modify +from testsuite.kubernetes.client import KubernetesClient + + +class PodChaos(KubernetesObject): + """Represents PodChaos CR from Chaos Mesh.""" + + ACTIONS = Literal["pod-failure", "pod-kill", "container-kill"] + MODES = Literal["one", "all", "fixed", "fixed-percent", "random-max-percent"] + + @classmethod + def create_instance( + cls, + cluster: KubernetesClient, + name: str, + namespace: str = "kuadrant-system", + labels: Optional[Dict[str, str]] = None, + ): + """Creates base instance.""" + model = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "PodChaos", + "metadata": {"name": name, "namespace": namespace, "labels": labels or {}}, + "spec": {"selector": {"labelSelectors": {}}}, + } + return cls(model, context=cluster.context) + + @modify + def set_action(self, action: ACTIONS): + """Set the chaos action.""" + self.model.spec.action = action + + @modify + def set_mode(self, mode: MODES, value: Optional[str] = None): + """Set the experiment mode.""" + self.model.spec.mode = mode + if value is not None: + self.model.spec.value = value + + @modify + def set_selector(self, labels: Dict[str, str], namespaces: Optional[List[str]] = None): + """Set pod selector.""" + self.model.spec.selector.labelSelectors = labels + if namespaces: + self.model.spec.selector.namespaces = namespaces + + @modify + def set_container_names(self, containers: List[str]): + """Set target container names.""" + self.model.spec.containerNames = containers + + @modify + def set_grace_period(self, period: int): + """Set grace period for pod-kill action.""" + self.model.spec.gracePeriod = period + + @modify + def set_duration(self, duration: str): + """Set experiment duration.""" + self.model.spec.duration = duration + + def pod_failure( + self, + labels: Dict[str, str], + duration: str, + mode: MODES = "one", + value: Optional[str] = None, + namespaces: Optional[List[str]] = None, + ): + """Configure for pod-failure chaos experiment.""" + self.set_action("pod-failure") + self.set_mode(mode, value) + self.set_selector(labels, namespaces) + self.set_duration(duration) + + def pod_kill( + self, + labels: Dict[str, str], + mode: MODES = "one", + value: Optional[str] = None, + namespaces: Optional[List[str]] = None, + grace_period: int = 0, + ): + """Configure for pod-kill chaos experiment.""" + self.set_action("pod-kill") + self.set_mode(mode, value) + self.set_selector(labels, namespaces) + if grace_period > 0: + self.set_grace_period(grace_period) + + def container_kill( + self, + labels: Dict[str, str], + containers: List[str], + mode: MODES = "one", + value: Optional[str] = None, + namespaces: Optional[List[str]] = None, + ): + """Configure for container-kill chaos experiment.""" + self.set_action("container-kill") + self.set_mode(mode, value) + self.set_selector(labels, namespaces) + self.set_container_names(containers) diff --git a/testsuite/custom_metrics_apiserver/__init__.py b/testsuite/custom_metrics_apiserver/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/testsuite/custom_metrics_apiserver/client.py b/testsuite/custom_metrics_apiserver/client.py index 1334cb47..7655d5f8 100644 --- a/testsuite/custom_metrics_apiserver/client.py +++ b/testsuite/custom_metrics_apiserver/client.py @@ -1,23 +1,21 @@ -from urllib.parse import urljoin +"""Client for interacting with the Custom Metrics API Server. + +This module provides a client for writing metrics to the Custom Metrics API Server, +which can be used to set custom metrics for Kubernetes resources. +""" + import httpx class CustomMetricsApiServerClient(httpx.Client): - """Client for the Custom Metrics API Server""" - - def __init__(self, url: str): - return super().__init__(base_url=url, verify=False, headers={"Content-Type": "application/json"}) + """Client for the Custom Metrics API Server.""" - def write_metric(self, namespace: str, resource_type: str, name: str, metric: str, value: int): - """Write a metric value to the Custom Metrics API Server. + def __init__(self, url: str) -> None: + """Initialize the client with base URL and default headers""" + super().__init__(base_url=url, verify=False, headers={"Content-Type": "application/json"}) - Args: - namespace: The namespace of the resource - resource_type: The type of resource (e.g. 'pods', 'services') - name: The name of the resource - metric: The name of the metric - value: The value to set - """ + def write_metric(self, namespace: str, resource_type: str, name: str, metric: str, value: int) -> int: + """Write a metric value to the Custom Metrics API Server""" endpoint = f"/write-metrics/namespaces/{namespace}/{resource_type}/{name}/{metric}" response = self.post(endpoint, content=f"{value}") diff --git a/testsuite/tests/singlecluster/chaos/__init__.py b/testsuite/tests/singlecluster/chaos/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/testsuite/tests/singlecluster/chaos/conftest.py b/testsuite/tests/singlecluster/chaos/conftest.py new file mode 100644 index 00000000..4ddb77f0 --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/conftest.py @@ -0,0 +1,54 @@ +"""Conftest for chaos testing.""" + +import pytest +import openshift_client as oc + +from testsuite.chaos_mesh import PodChaos + + +@pytest.fixture(scope="module") +def create_pod_chaos(request, cluster, blame): + """Creates and returns a PodChaos experiment.""" + + def _create_pod_chaos(name, namespace="kuadrant-system"): + chaos = PodChaos.create_instance(cluster, blame(name), namespace=namespace) + request.addfinalizer(chaos.delete) + return chaos + + return _create_pod_chaos + + +@pytest.fixture(scope="module") +def kuadrant_operator_pod_chaos(create_pod_chaos): + """Creates a PodChaos experiment targeting the Kuadrant operator.""" + chaos = create_pod_chaos("operator-kill") + chaos.container_kill( + labels={"app": "kuadrant"}, + containers=["manager"], + ) + return chaos + + +@pytest.fixture(autouse=True) +def restart_operator(cluster): + """Restart the Kuadrant operator deployment after each test.""" + yield # Run the test first + + # After test, delete the pod to force a restart + kuadrant_system = cluster.change_project("kuadrant-system") + with kuadrant_system.context: + # Find and delete the operator pod + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + pod.delete() + + +@pytest.fixture(autouse=True) +def commit(): + """ + Override commit fixture to do nothing. + + In chaos testing, we don't want the standard commit behavior that + ensures all components are ready and stable before tests run. + Chaos tests need to control component lifecycle themselves. + """ + pass # pylint: disable=unnecessary-pass diff --git a/testsuite/tests/singlecluster/chaos/container_kill/container_kill.py b/testsuite/tests/singlecluster/chaos/container_kill/container_kill.py new file mode 100644 index 00000000..8d8d09b5 --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/container_kill/container_kill.py @@ -0,0 +1,44 @@ +"""Test Kuadrant operator resilience when its container is killed.""" + +import json +import pytest +import openshift_client as oc + +pytestmark = [pytest.mark.disruptive, pytest.mark.kuadrant_only] + + +def test_operator_container_kill(cluster, kuadrant_operator_pod_chaos): + """Test operator resilience when its container is killed.""" + # Check actual operator labels first + kuadrant_system = cluster.change_project("kuadrant-system") + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + + # Apply chaos + kuadrant_operator_pod_chaos.commit() + + # Get logs after recovery + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + log_content = next(iter(pod.logs().values())) + + # Check each log line for errors + errors = [] + for line in log_content.splitlines(): + try: + log_entry = json.loads(line) + if log_entry.get("level") == "error": + error_details = { + "msg": log_entry.get("msg", "Unknown error"), + "error": log_entry.get("error"), + "stacktrace": log_entry.get("stacktrace"), + "timestamp": log_entry.get("ts"), + } + # Remove None values for cleaner output + error_details = {k: v for k, v in error_details.items() if v is not None} + errors.append(error_details) + except json.JSONDecodeError: + continue # Skip non-JSON lines + + assert not errors, f"Found errors in operator logs: {errors}" diff --git a/testsuite/tests/singlecluster/chaos/control_plane/conftest.py b/testsuite/tests/singlecluster/chaos/control_plane/conftest.py new file mode 100644 index 00000000..317d96cb --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/control_plane/conftest.py @@ -0,0 +1,69 @@ +"""Conftest for control plane chaos testing.""" + +import pytest + +@pytest.fixture(scope="module") +def kuadrant_operator_selector(): + """Selector for Kuadrant operator pods.""" + return {"app": "kuadrant"} + + +@pytest.fixture(scope="module") +def control_plane_namespace(): + """Namespace where control plane components run.""" + return "kuadrant-system" + + +@pytest.fixture(scope="module") +def operator_chaos_factory(create_pod_chaos, kuadrant_operator_selector): + """Factory fixture for creating operator chaos experiments.""" + def _create_operator_chaos(name, action, **kwargs): + chaos = create_pod_chaos(f"operator-{name}") + + if action == "container-kill": + chaos.container_kill( + labels=kuadrant_operator_selector, + containers=kwargs.get("containers", ["manager"]), + ) + elif action == "pod-kill": + chaos.pod_kill( + labels=kuadrant_operator_selector, + grace_period=kwargs.get("grace_period", 0), + ) + elif action == "pod-failure": + chaos.pod_failure( + labels=kuadrant_operator_selector, + ) + else: + raise ValueError(f"Unsupported action: {action}") + + return chaos + return _create_operator_chaos + + +@pytest.fixture(scope="module") +def operator_network_chaos(create_network_chaos, kuadrant_operator_selector): + """Creates NetworkChaos targeting the Kuadrant operator.""" + def _create_network_chaos(name, action="delay", **kwargs): + chaos = create_network_chaos(f"operator-network-{name}") + chaos.configure_network_chaos( + labels=kuadrant_operator_selector, + action=action, + **kwargs + ) + return chaos + return _create_network_chaos + + +@pytest.fixture(scope="module") +def operator_stress_chaos(create_stress_chaos, kuadrant_operator_selector): + """Creates StressChaos targeting the Kuadrant operator.""" + def _create_stress_chaos(name, stress_type="memory", **kwargs): + chaos = create_stress_chaos(f"operator-stress-{name}") + chaos.configure_stress( + labels=kuadrant_operator_selector, + stress_type=stress_type, + **kwargs + ) + return chaos + return _create_stress_chaos diff --git a/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_container_kill.py b/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_container_kill.py new file mode 100644 index 00000000..18adeb4c --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_container_kill.py @@ -0,0 +1,44 @@ +"""Test Kuadrant operator resilience when its container is killed.""" + +import json +import pytest +import openshift_client as oc + +pytestmark = [pytest.mark.disruptive, pytest.mark.kuadrant_only] + + +def test_operator_container_kill_basic(cluster, operator_chaos_factory): + """Test basic operator container kill and recovery.""" + kuadrant_system = cluster.change_project("kuadrant-system") + + # Verify operator is running + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + + # Apply chaos - kill container + chaos = operator_chaos_factory("container-kill-basic", "container-kill") + chaos.commit() + + # Verify recovery and check logs + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + log_content = next(iter(pod.logs().values())) + + # Check for error-level logs + errors = [] + for line in log_content.splitlines(): + try: + log_entry = json.loads(line) + if log_entry.get("level") == "error": + error_details = { + "msg": log_entry.get("msg", "Unknown error"), + "error": log_entry.get("error"), + "timestamp": log_entry.get("ts"), + } + error_details = {k: v for k, v in error_details.items() if v is not None} + errors.append(error_details) + except json.JSONDecodeError: + continue + + assert not errors, f"Found errors in operator logs: {errors}" diff --git a/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_pod_kill.py b/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_pod_kill.py new file mode 100644 index 00000000..fe3bde58 --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/control_plane/pod_chaos/test_operator_pod_kill.py @@ -0,0 +1,78 @@ +"""Test Kuadrant operator resilience with pod-kill chaos.""" + +import pytest +import openshift_client as oc + +pytestmark = [pytest.mark.disruptive, pytest.mark.kuadrant_only] + + +def test_operator_pod_kill_basic(cluster, operator_chaos_factory): + """Test basic operator pod kill and recovery.""" + kuadrant_system = cluster.change_project("kuadrant-system") + + # Verify operator is running + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + original_pod_name = pod.model.metadata.name + + # Apply chaos - kill pod + chaos = operator_chaos_factory("pod-kill-basic", "pod-kill") + chaos.commit() + + # Verify new pod is created and running + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + # Should be a different pod + assert pod.model.metadata.name != original_pod_name + + +def test_operator_pod_kill_with_grace_period(cluster, operator_chaos_factory): + """Test operator pod kill with custom grace period.""" + # Create chaos with 30s grace period + chaos = operator_chaos_factory("graceful-kill", "pod-kill", grace_period=30) + chaos.commit() + + kuadrant_system = cluster.change_project("kuadrant-system") + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + + +def test_operator_pod_kill_immediate(cluster, operator_chaos_factory): + """Test operator pod kill with immediate termination.""" + # Create chaos with 0s grace period (immediate kill) + chaos = operator_chaos_factory("immediate-kill", "pod-kill", grace_period=0) + chaos.commit() + + kuadrant_system = cluster.change_project("kuadrant-system") + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + + +def test_operator_pod_failure_recovery(cluster, operator_chaos_factory): + """Test operator recovery from pod failure.""" + kuadrant_system = cluster.change_project("kuadrant-system") + + # Apply chaos - make pod fail + chaos = operator_chaos_factory("pod-failure-recovery", "pod-failure") + chaos.commit() + + # Verify operator eventually recovers + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" + + +def test_operator_pod_failure_custom_duration(cluster, operator_chaos_factory): + """Test operator pod failure with custom duration.""" + # Create chaos with longer failure duration + chaos = operator_chaos_factory("long-failure", "pod-failure", duration="60s") + chaos.commit() + + kuadrant_system = cluster.change_project("kuadrant-system") + with kuadrant_system.context: + pod = oc.selector("pod", labels={"app": "kuadrant"}).object() + assert pod.model.status.phase == "Running" diff --git a/testsuite/tests/singlecluster/chaos/data_plane/authorino/network_chaos/test_oidc_provider_network.py b/testsuite/tests/singlecluster/chaos/data_plane/authorino/network_chaos/test_oidc_provider_network.py new file mode 100644 index 00000000..6ce94a62 --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/data_plane/authorino/network_chaos/test_oidc_provider_network.py @@ -0,0 +1,113 @@ +"""Test Authorino resilience when OIDC provider network is disrupted.""" + +import pytest +import time + +pytestmark = [pytest.mark.disruptive, pytest.mark.authorino] + + +def test_authorino_oidc_network_delay( + cluster, authorino_network_chaos, oidc_provider, auth_policy_with_oidc +): + """Test Authorino behavior with delayed OIDC provider responses.""" + # Apply auth policy that uses OIDC + auth_policy_with_oidc.commit() + + # Create network delay to OIDC provider + chaos = authorino_network_chaos( + "oidc-delay", + action="delay", + external_targets=[oidc_provider.issuer_url], + delay="3s", + duration="60s" + ) + chaos.commit() + + # Test authentication with delay + start_time = time.time() + response = auth_policy_with_oidc.test_authentication() + end_time = time.time() + + # Should still work but be slower + assert response.status_code in [200, 401] # Auth decision made + assert end_time - start_time > 3.0 # Delay applied + + # Verify Authorino handles timeout gracefully + assert "timeout" not in response.headers.get("x-ext-auth-reason", "").lower() + + +def test_authorino_oidc_network_partition( + cluster, authorino_network_chaos, oidc_provider, auth_policy_with_oidc +): + """Test Authorino behavior when OIDC provider is unreachable.""" + auth_policy_with_oidc.commit() + + # Create network partition to OIDC provider + chaos = authorino_network_chaos( + "oidc-partition", + action="partition", + external_targets=[oidc_provider.issuer_url], + duration="30s" + ) + chaos.commit() + + # Test authentication during partition + response = auth_policy_with_oidc.test_authentication() + + # Should fail gracefully (not hang indefinitely) + assert response.status_code == 401 + assert "connection" in response.headers.get("x-ext-auth-reason", "").lower() + + +def test_authorino_oidc_intermittent_failures( + cluster, authorino_network_chaos, oidc_provider, auth_policy_with_oidc +): + """Test Authorino with intermittent OIDC provider failures.""" + auth_policy_with_oidc.commit() + + # Create intermittent network issues (50% packet loss) + chaos = authorino_network_chaos( + "oidc-intermittent", + action="loss", + external_targets=[oidc_provider.issuer_url], + loss_percent=50, + duration="45s" + ) + chaos.commit() + + # Test multiple authentication attempts + success_count = 0 + total_attempts = 10 + + for _ in range(total_attempts): + response = auth_policy_with_oidc.test_authentication() + if response.status_code == 200: + success_count += 1 + time.sleep(1) + + # Some should succeed, some should fail + assert 0 < success_count < total_attempts + print(f"Success rate: {success_count}/{total_attempts}") + + +def test_authorino_oidc_discovery_chaos( + cluster, authorino_network_chaos, oidc_provider, auth_policy_with_oidc +): + """Test Authorino when OIDC discovery endpoint is disrupted.""" + # Target specifically the .well-known/openid-configuration endpoint + discovery_url = f"{oidc_provider.issuer_url}/.well-known/openid-configuration" + + chaos = authorino_network_chaos( + "oidc-discovery-chaos", + action="delay", + external_targets=[discovery_url], + delay="10s", + duration="30s" + ) + chaos.commit() + + # Apply policy (this should trigger discovery) + auth_policy_with_oidc.commit() + + # Verify policy eventually becomes ready despite discovery delays + assert auth_policy_with_oidc.wait_for_ready(timeout=60) diff --git a/testsuite/tests/singlecluster/chaos/data_plane/conftest.py b/testsuite/tests/singlecluster/chaos/data_plane/conftest.py new file mode 100644 index 00000000..d492781d --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/data_plane/conftest.py @@ -0,0 +1,107 @@ +"""Conftest for data plane chaos testing.""" + +import pytest + + +@pytest.fixture(scope="module") +def authorino_selector(): + """Selector for Authorino pods.""" + return {"app": "authorino"} + + +@pytest.fixture(scope="module") +def limitador_selector(): + """Selector for Limitador pods.""" + return {"app": "limitador"} + + +@pytest.fixture(scope="module") +def data_plane_namespace(): + """Namespace where data plane components run.""" + return "kuadrant-system" + + +@pytest.fixture(scope="module") +def authorino_chaos_factory(create_pod_chaos, authorino_selector): + """Factory fixture for creating Authorino chaos experiments.""" + def _create_authorino_chaos(name, action, **kwargs): + chaos = create_pod_chaos(f"authorino-{name}") + + if action == "container-kill": + chaos.container_kill( + labels=authorino_selector, + containers=kwargs.get("containers", ["authorino"]), + duration=kwargs.get("duration", "10s"), + ) + elif action == "pod-kill": + chaos.pod_kill( + labels=authorino_selector, + grace_period=kwargs.get("grace_period", 0), + ) + elif action == "pod-failure": + chaos.pod_failure( + labels=authorino_selector, + duration=kwargs.get("duration", "30s"), + ) + else: + raise ValueError(f"Unsupported action: {action}") + + return chaos + return _create_authorino_chaos + + +@pytest.fixture(scope="module") +def limitador_chaos_factory(create_pod_chaos, limitador_selector): + """Factory fixture for creating Limitador chaos experiments.""" + def _create_limitador_chaos(name, action, **kwargs): + chaos = create_pod_chaos(f"limitador-{name}") + + if action == "container-kill": + chaos.container_kill( + labels=limitador_selector, + containers=kwargs.get("containers", ["limitador"]), + duration=kwargs.get("duration", "10s"), + ) + elif action == "pod-kill": + chaos.pod_kill( + labels=limitador_selector, + grace_period=kwargs.get("grace_period", 0), + ) + elif action == "pod-failure": + chaos.pod_failure( + labels=limitador_selector, + duration=kwargs.get("duration", "30s"), + ) + else: + raise ValueError(f"Unsupported action: {action}") + + return chaos + return _create_limitador_chaos + + +@pytest.fixture(scope="module") +def authorino_network_chaos(create_network_chaos, authorino_selector): + """Creates NetworkChaos targeting Authorino.""" + def _create_network_chaos(name, action="delay", **kwargs): + chaos = create_network_chaos(f"authorino-network-{name}") + chaos.configure_network_chaos( + labels=authorino_selector, + action=action, + **kwargs + ) + return chaos + return _create_network_chaos + + +@pytest.fixture(scope="module") +def limitador_network_chaos(create_network_chaos, limitador_selector): + """Creates NetworkChaos targeting Limitador.""" + def _create_network_chaos(name, action="delay", **kwargs): + chaos = create_network_chaos(f"limitador-network-{name}") + chaos.configure_network_chaos( + labels=limitador_selector, + action=action, + **kwargs + ) + return chaos + return _create_network_chaos diff --git a/testsuite/tests/singlecluster/chaos/data_plane/limitador/network_chaos/test_redis_connection_chaos.py b/testsuite/tests/singlecluster/chaos/data_plane/limitador/network_chaos/test_redis_connection_chaos.py new file mode 100644 index 00000000..3354e360 --- /dev/null +++ b/testsuite/tests/singlecluster/chaos/data_plane/limitador/network_chaos/test_redis_connection_chaos.py @@ -0,0 +1,151 @@ +"""Test Limitador resilience when Redis connection is disrupted.""" + +import pytest +import time + +pytestmark = [pytest.mark.disruptive, pytest.mark.limitador] + + +def test_limitador_redis_network_delay( + cluster, limitador_network_chaos, redis_backend, rate_limit_policy +): + """Test Limitador behavior with delayed Redis responses.""" + rate_limit_policy.commit() + + # Create network delay to Redis + chaos = limitador_network_chaos( + "redis-delay", + action="delay", + external_targets=["redis.kuadrant-system.svc.cluster.local"], + delay="500ms", + duration="60s" + ) + chaos.commit() + + # Test rate limiting with delay + start_time = time.time() + response = rate_limit_policy.test_rate_limit() + end_time = time.time() + + # Should still work but be slower + assert response.status_code in [200, 429] # Rate limit decision made + assert end_time - start_time > 0.5 # Delay applied + + +def test_limitador_redis_network_partition( + cluster, limitador_network_chaos, redis_backend, rate_limit_policy +): + """Test Limitador behavior when Redis is unreachable.""" + rate_limit_policy.commit() + + # Create network partition to Redis + chaos = limitador_network_chaos( + "redis-partition", + action="partition", + external_targets=["redis.kuadrant-system.svc.cluster.local"], + duration="30s" + ) + chaos.commit() + + # Test rate limiting during partition + response = rate_limit_policy.test_rate_limit() + + # Should fail-open or fail-closed based on configuration + # This depends on Limitador's configuration + assert response.status_code in [200, 500, 503] + + +def test_limitador_redis_intermittent_connection( + cluster, limitador_network_chaos, redis_backend, rate_limit_policy +): + """Test Limitador with intermittent Redis connection issues.""" + rate_limit_policy.commit() + + # Create intermittent network issues (30% packet loss) + chaos = limitador_network_chaos( + "redis-intermittent", + action="loss", + external_targets=["redis.kuadrant-system.svc.cluster.local"], + loss_percent=30, + duration="45s" + ) + chaos.commit() + + # Test multiple rate limit attempts + responses = [] + for _ in range(20): + response = rate_limit_policy.test_rate_limit() + responses.append(response.status_code) + time.sleep(0.5) + + # Should have mixed results due to intermittent failures + unique_responses = set(responses) + assert len(unique_responses) > 1 # Should have different response codes + print(f"Response distribution: {dict(zip(*zip(*[(r, responses.count(r)) for r in unique_responses])))}") + + +def test_limitador_redis_high_latency( + cluster, limitador_network_chaos, redis_backend, rate_limit_policy +): + """Test Limitador with high Redis latency.""" + rate_limit_policy.commit() + + # Create high latency to Redis + chaos = limitador_network_chaos( + "redis-high-latency", + action="delay", + external_targets=["redis.kuadrant-system.svc.cluster.local"], + delay="2s", + jitter="500ms", + duration="60s" + ) + chaos.commit() + + # Test rate limiting under high latency + slow_responses = 0 + total_requests = 10 + + for _ in range(total_requests): + start_time = time.time() + response = rate_limit_policy.test_rate_limit() + end_time = time.time() + + if end_time - start_time > 1.5: # Accounting for jitter + slow_responses += 1 + + # Verify response is still valid + assert response.status_code in [200, 429, 500, 503] + + # Most responses should be slow due to Redis latency + assert slow_responses >= total_requests * 0.7 # At least 70% slow + + +def test_limitador_redis_connection_reset( + cluster, limitador_network_chaos, redis_backend, rate_limit_policy +): + """Test Limitador when Redis connections are reset.""" + rate_limit_policy.commit() + + # Create connection resets to Redis + chaos = limitador_network_chaos( + "redis-reset", + action="abort", + external_targets=["redis.kuadrant-system.svc.cluster.local"], + abort_percent=50, + duration="30s" + ) + chaos.commit() + + # Test rate limiting with connection resets + error_responses = 0 + total_requests = 15 + + for _ in range(total_requests): + response = rate_limit_policy.test_rate_limit() + if response.status_code >= 500: + error_responses += 1 + time.sleep(1) + + # Should have some errors due to connection resets + assert error_responses > 0 + print(f"Error rate: {error_responses}/{total_requests}")