Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions testsuite/chaos_mesh/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""PodChaos object for simulating Pod faults in Kubernetes."""

from typing import Dict, List, Optional, Literal

from testsuite.kubernetes import KubernetesObject, modify
from testsuite.kubernetes.client import KubernetesClient


class PodChaos(KubernetesObject):
"""Represents PodChaos CR from Chaos Mesh."""

ACTIONS = Literal["pod-failure", "pod-kill", "container-kill"]
MODES = Literal["one", "all", "fixed", "fixed-percent", "random-max-percent"]

@classmethod
def create_instance(
cls,
cluster: KubernetesClient,
name: str,
namespace: str = "kuadrant-system",
labels: Optional[Dict[str, str]] = None,
):
"""Creates base instance."""
model = {
"apiVersion": "chaos-mesh.org/v1alpha1",
"kind": "PodChaos",
"metadata": {"name": name, "namespace": namespace, "labels": labels or {}},
"spec": {"selector": {"labelSelectors": {}}},
}
return cls(model, context=cluster.context)

@modify
def set_action(self, action: ACTIONS):
"""Set the chaos action."""
self.model.spec.action = action

@modify
def set_mode(self, mode: MODES, value: Optional[str] = None):
"""Set the experiment mode."""
self.model.spec.mode = mode
if value is not None:
self.model.spec.value = value

@modify
def set_selector(self, labels: Dict[str, str], namespaces: Optional[List[str]] = None):
"""Set pod selector."""
self.model.spec.selector.labelSelectors = labels
if namespaces:
self.model.spec.selector.namespaces = namespaces

@modify
def set_container_names(self, containers: List[str]):
"""Set target container names."""
self.model.spec.containerNames = containers

@modify
def set_grace_period(self, period: int):
"""Set grace period for pod-kill action."""
self.model.spec.gracePeriod = period

@modify
def set_duration(self, duration: str):
"""Set experiment duration."""
self.model.spec.duration = duration

def pod_failure(
self,
labels: Dict[str, str],
duration: str,
mode: MODES = "one",
value: Optional[str] = None,
namespaces: Optional[List[str]] = None,
):
"""Configure for pod-failure chaos experiment."""
self.set_action("pod-failure")
self.set_mode(mode, value)
self.set_selector(labels, namespaces)
self.set_duration(duration)

def pod_kill(
self,
labels: Dict[str, str],
mode: MODES = "one",
value: Optional[str] = None,
namespaces: Optional[List[str]] = None,
grace_period: int = 0,
):
"""Configure for pod-kill chaos experiment."""
self.set_action("pod-kill")
self.set_mode(mode, value)
self.set_selector(labels, namespaces)
if grace_period > 0:
self.set_grace_period(grace_period)

def container_kill(
self,
labels: Dict[str, str],
containers: List[str],
mode: MODES = "one",
value: Optional[str] = None,
namespaces: Optional[List[str]] = None,
):
"""Configure for container-kill chaos experiment."""
self.set_action("container-kill")
self.set_mode(mode, value)
self.set_selector(labels, namespaces)
self.set_container_names(containers)
Empty file.
26 changes: 12 additions & 14 deletions testsuite/custom_metrics_apiserver/client.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
from urllib.parse import urljoin
"""Client for interacting with the Custom Metrics API Server.

This module provides a client for writing metrics to the Custom Metrics API Server,
which can be used to set custom metrics for Kubernetes resources.
"""

import httpx


class CustomMetricsApiServerClient(httpx.Client):
"""Client for the Custom Metrics API Server"""

def __init__(self, url: str):
return super().__init__(base_url=url, verify=False, headers={"Content-Type": "application/json"})
"""Client for the Custom Metrics API Server."""

def write_metric(self, namespace: str, resource_type: str, name: str, metric: str, value: int):
"""Write a metric value to the Custom Metrics API Server.
def __init__(self, url: str) -> None:
"""Initialize the client with base URL and default headers"""
super().__init__(base_url=url, verify=False, headers={"Content-Type": "application/json"})

Args:
namespace: The namespace of the resource
resource_type: The type of resource (e.g. 'pods', 'services')
name: The name of the resource
metric: The name of the metric
value: The value to set
"""
def write_metric(self, namespace: str, resource_type: str, name: str, metric: str, value: int) -> int:
"""Write a metric value to the Custom Metrics API Server"""
endpoint = f"/write-metrics/namespaces/{namespace}/{resource_type}/{name}/{metric}"

response = self.post(endpoint, content=f"{value}")
Expand Down
Empty file.
54 changes: 54 additions & 0 deletions testsuite/tests/singlecluster/chaos/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Conftest for chaos testing."""

import pytest
import openshift_client as oc

from testsuite.chaos_mesh import PodChaos


@pytest.fixture(scope="module")
def create_pod_chaos(request, cluster, blame):
"""Creates and returns a PodChaos experiment."""

def _create_pod_chaos(name, namespace="kuadrant-system"):
chaos = PodChaos.create_instance(cluster, blame(name), namespace=namespace)
request.addfinalizer(chaos.delete)
return chaos

return _create_pod_chaos


@pytest.fixture(scope="module")
def kuadrant_operator_pod_chaos(create_pod_chaos):
"""Creates a PodChaos experiment targeting the Kuadrant operator."""
chaos = create_pod_chaos("operator-kill")
chaos.container_kill(
labels={"app": "kuadrant"},
containers=["manager"],
)
return chaos


@pytest.fixture(autouse=True)
def restart_operator(cluster):
"""Restart the Kuadrant operator deployment after each test."""
yield # Run the test first

# After test, delete the pod to force a restart
kuadrant_system = cluster.change_project("kuadrant-system")
with kuadrant_system.context:
# Find and delete the operator pod
pod = oc.selector("pod", labels={"app": "kuadrant"}).object()
pod.delete()


@pytest.fixture(autouse=True)
def commit():
"""
Override commit fixture to do nothing.

In chaos testing, we don't want the standard commit behavior that
ensures all components are ready and stable before tests run.
Chaos tests need to control component lifecycle themselves.
"""
pass # pylint: disable=unnecessary-pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Test Kuadrant operator resilience when its container is killed."""

import json
import pytest
import openshift_client as oc

pytestmark = [pytest.mark.disruptive, pytest.mark.kuadrant_only]


def test_operator_container_kill(cluster, kuadrant_operator_pod_chaos):
"""Test operator resilience when its container is killed."""
# Check actual operator labels first
kuadrant_system = cluster.change_project("kuadrant-system")
with kuadrant_system.context:
pod = oc.selector("pod", labels={"app": "kuadrant"}).object()
assert pod.model.status.phase == "Running"

# Apply chaos
kuadrant_operator_pod_chaos.commit()

# Get logs after recovery
with kuadrant_system.context:
pod = oc.selector("pod", labels={"app": "kuadrant"}).object()
log_content = next(iter(pod.logs().values()))

# Check each log line for errors
errors = []
for line in log_content.splitlines():
try:
log_entry = json.loads(line)
if log_entry.get("level") == "error":
error_details = {
"msg": log_entry.get("msg", "Unknown error"),
"error": log_entry.get("error"),
"stacktrace": log_entry.get("stacktrace"),
"timestamp": log_entry.get("ts"),
}
# Remove None values for cleaner output
error_details = {k: v for k, v in error_details.items() if v is not None}
errors.append(error_details)
except json.JSONDecodeError:
continue # Skip non-JSON lines

assert not errors, f"Found errors in operator logs: {errors}"
69 changes: 69 additions & 0 deletions testsuite/tests/singlecluster/chaos/control_plane/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Conftest for control plane chaos testing."""

import pytest

@pytest.fixture(scope="module")
def kuadrant_operator_selector():
"""Selector for Kuadrant operator pods."""
return {"app": "kuadrant"}


@pytest.fixture(scope="module")
def control_plane_namespace():
"""Namespace where control plane components run."""
return "kuadrant-system"


@pytest.fixture(scope="module")
def operator_chaos_factory(create_pod_chaos, kuadrant_operator_selector):
"""Factory fixture for creating operator chaos experiments."""
def _create_operator_chaos(name, action, **kwargs):
chaos = create_pod_chaos(f"operator-{name}")

if action == "container-kill":
chaos.container_kill(
labels=kuadrant_operator_selector,
containers=kwargs.get("containers", ["manager"]),
)
elif action == "pod-kill":
chaos.pod_kill(
labels=kuadrant_operator_selector,
grace_period=kwargs.get("grace_period", 0),
)
elif action == "pod-failure":
chaos.pod_failure(
labels=kuadrant_operator_selector,
)
else:
raise ValueError(f"Unsupported action: {action}")

return chaos
return _create_operator_chaos


@pytest.fixture(scope="module")
def operator_network_chaos(create_network_chaos, kuadrant_operator_selector):
"""Creates NetworkChaos targeting the Kuadrant operator."""
def _create_network_chaos(name, action="delay", **kwargs):
chaos = create_network_chaos(f"operator-network-{name}")
chaos.configure_network_chaos(
labels=kuadrant_operator_selector,
action=action,
**kwargs
)
return chaos
return _create_network_chaos


@pytest.fixture(scope="module")
def operator_stress_chaos(create_stress_chaos, kuadrant_operator_selector):
"""Creates StressChaos targeting the Kuadrant operator."""
def _create_stress_chaos(name, stress_type="memory", **kwargs):
chaos = create_stress_chaos(f"operator-stress-{name}")
chaos.configure_stress(
labels=kuadrant_operator_selector,
stress_type=stress_type,
**kwargs
)
return chaos
return _create_stress_chaos
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Test Kuadrant operator resilience when its container is killed."""

import json
import pytest
import openshift_client as oc

pytestmark = [pytest.mark.disruptive, pytest.mark.kuadrant_only]


def test_operator_container_kill_basic(cluster, operator_chaos_factory):
"""Test basic operator container kill and recovery."""
kuadrant_system = cluster.change_project("kuadrant-system")

# Verify operator is running
with kuadrant_system.context:
pod = oc.selector("pod", labels={"app": "kuadrant"}).object()
assert pod.model.status.phase == "Running"

# Apply chaos - kill container
chaos = operator_chaos_factory("container-kill-basic", "container-kill")
chaos.commit()

# Verify recovery and check logs
with kuadrant_system.context:
pod = oc.selector("pod", labels={"app": "kuadrant"}).object()
log_content = next(iter(pod.logs().values()))

# Check for error-level logs
errors = []
for line in log_content.splitlines():
try:
log_entry = json.loads(line)
if log_entry.get("level") == "error":
error_details = {
"msg": log_entry.get("msg", "Unknown error"),
"error": log_entry.get("error"),
"timestamp": log_entry.get("ts"),
}
error_details = {k: v for k, v in error_details.items() if v is not None}
errors.append(error_details)
except json.JSONDecodeError:
continue

assert not errors, f"Found errors in operator logs: {errors}"
Loading
Loading