diff --git a/testsuite/tests/singlecluster/observability/conftest.py b/testsuite/tests/singlecluster/observability/conftest.py index e315464b..aa952609 100644 --- a/testsuite/tests/singlecluster/observability/conftest.py +++ b/testsuite/tests/singlecluster/observability/conftest.py @@ -4,6 +4,7 @@ import pytest from openshift_client import selector +from testsuite.config import settings from testsuite.kubernetes.monitoring.pod_monitor import PodMonitor from testsuite.kubernetes.monitoring.service_monitor import ServiceMonitor @@ -66,6 +67,14 @@ def wait_for_monitor(): return monitor[0] +@pytest.fixture(scope="module") +def operator_metrics(prometheus): + """Return all metrics from the Kuadrant operator metrics endpoint""" + return prometheus.get_metrics( + labels={"service": "kuadrant-operator-metrics", "namespace": settings["service_protection"]["system_project"]} + ) + + @pytest.fixture(scope="module") def service_monitor_metrics_by_service(service_monitors, prometheus): """Return a dictionary, for each expected service, showing which metrics were collected""" diff --git a/testsuite/tests/singlecluster/observability/test_kuadrant_operator_health_metrics.py b/testsuite/tests/singlecluster/observability/test_kuadrant_operator_health_metrics.py new file mode 100644 index 00000000..353df6ae --- /dev/null +++ b/testsuite/tests/singlecluster/observability/test_kuadrant_operator_health_metrics.py @@ -0,0 +1,90 @@ +"""Tests for Kuadrant operator health metrics (existence, readiness, component and dependency status).""" + +import pytest + +from testsuite.prometheus import has_label + +pytestmark = [pytest.mark.observability] + +COMPONENTS = ["authorino", "limitador"] +DEPENDENCIES = ["authorino-operator", "limitador-operator", "cert-manager", "dns-operator", "istio", "envoygateway"] +CONTROLLERS = [ + "auth_policies", + "rate_limit_policies", + "dns_policies", + "tls_policies", + "istio_integration", + "envoygateway_integration", +] + + +def test_metric_kuadrant_exist(operator_metrics): + """Tests that kuadrant_exists metric is present and has value 1""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_exists")) + assert metrics.values, "No values returned for 'kuadrant_exists'" + assert metrics.values[0] == 1, f"Expected 'kuadrant_exists' to have value 1, but got values: {metrics.values}" + + +def test_metric_kuadrant_ready(operator_metrics, kuadrant): + """Tests that kuadrant_ready metric is present and has value 1""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_ready")).filter( + has_label("name", kuadrant.name()) + ) + assert metrics.values, "No values returned for 'kuadrant_ready'" + assert metrics.values[0] == 1, f"Expected 'kuadrant_ready' to have value 1, but got values: {metrics.values}" + + +@pytest.mark.parametrize("component", COMPONENTS) +def test_metric_kuadrant_component_ready(operator_metrics, component): + """Tests that kuadrant_component_ready metric is present and has value 1 for each component""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_component_ready")).filter( + has_label("component", component) + ) + assert metrics.values, f"No values returned for 'kuadrant_component_ready' for component '{component}'" + assert metrics.values[0] == 1, ( + f"Expected 'kuadrant_component_ready' for component '{component}' to have value 1, " + f"but got values: {metrics.values}" + ) + + +@pytest.mark.parametrize("dependency", DEPENDENCIES) +def test_metric_kuadrant_dependency_detected(operator_metrics, dependency): + """Tests that kuadrant_dependency_detected metric has expected value for each dependency""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_dependency_detected")).filter( + has_label("dependency", dependency) + ) + assert metrics.values, f"No values returned for 'kuadrant_dependency_detected' for dependency '{dependency}'" + if dependency in ("istio", "envoygateway"): + assert metrics.values[0] in ( + 1, + 0, + ), ( + f"Expected 'kuadrant_dependency_detected' for '{dependency}' to have value 1 or 0, " + f"but got: {metrics.values}" + ) + else: + assert ( + metrics.values[0] == 1 + ), f"Expected 'kuadrant_dependency_detected' for '{dependency}' to have value 1, but got: {metrics.values}" + + +@pytest.mark.parametrize("controller", CONTROLLERS) +def test_metric_kuadrant_controller_registered(operator_metrics, controller): + """Tests that kuadrant_controller_registered metric has expected value for each controller""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_controller_registered")).filter( + has_label("controller", controller) + ) + assert metrics.values, f"No values returned for 'kuadrant_controller_registered' for controller '{controller}'" + if controller in ("istio_integration", "envoygateway_integration"): + assert metrics.values[0] in ( + 1, + 0, + ), ( + f"Expected 'kuadrant_controller_registered' for '{controller}' to have value 1 or 0, " + f"but got: {metrics.values}" + ) + else: + assert metrics.values[0] == 1, ( + f"Expected 'kuadrant_controller_registered' for '{controller}' to have value 1, " + f"but got: {metrics.values}" + ) diff --git a/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics.py b/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics.py new file mode 100644 index 00000000..ab8edc85 --- /dev/null +++ b/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics.py @@ -0,0 +1,123 @@ +"""Tests for Kuadrant policy metrics (policies total and policies enforced per policy kind).""" + +import pytest + +from testsuite.gateway import Exposer, TLSGatewayListener +from testsuite.prometheus import has_label +from testsuite.gateway.gateway_api.gateway import KuadrantGateway +from testsuite.gateway.gateway_api.hostname import DNSPolicyExposer +from testsuite.kuadrant.policy.authorization.auth_policy import AuthPolicy +from testsuite.kuadrant.policy.rate_limit import RateLimitPolicy, Limit +from testsuite.kuadrant.policy.token_rate_limit import TokenRateLimitPolicy +from testsuite.kuadrant.policy.dns import DNSPolicy +from testsuite.kuadrant.policy.tls import TLSPolicy + +pytestmark = [pytest.mark.observability] + +POLICY_KINDS = ["AuthPolicy", "RateLimitPolicy", "DNSPolicy", "TLSPolicy", "TokenRateLimitPolicy"] + + +@pytest.fixture(scope="module") +def exposer(request, cluster) -> Exposer: + """DNSPolicyExposer setup with expected TLS certificate""" + exposer = DNSPolicyExposer(cluster) + request.addfinalizer(exposer.delete) + exposer.commit() + return exposer + + +@pytest.fixture(scope="module") +def base_domain(exposer): + """Returns preconfigured base domain from DNS provider""" + return exposer.base_domain + + +@pytest.fixture(scope="module") +def wildcard_domain(base_domain): + """Wildcard domain for the exposer""" + return f"*.{base_domain}" + + +@pytest.fixture(scope="module") +def gateway(request, cluster, blame, wildcard_domain, module_label): + """Returns gateway with TLS listener for DNS/TLS policy support""" + gateway_name = blame("gw") + gw = KuadrantGateway.create_instance(cluster, gateway_name, {"app": module_label}) + gw.add_listener(TLSGatewayListener(hostname=wildcard_domain, gateway_name=gateway_name)) + request.addfinalizer(gw.delete) + gw.commit() + gw.wait_for_ready() + return gw + + +@pytest.fixture(scope="module") +def authorization(cluster, blame, route, module_label): + """Create AuthPolicy targeting the route with anonymous identity""" + policy = AuthPolicy.create_instance(cluster, blame("authz"), route, labels={"testRun": module_label}) + policy.identity.add_anonymous("anonymous") + return policy + + +@pytest.fixture(scope="module") +def rate_limit(cluster, blame, route, module_label): + """Create RateLimitPolicy targeting the route""" + policy = RateLimitPolicy.create_instance(cluster, blame("limit"), route, labels={"testRun": module_label}) + policy.add_limit("basic", [Limit(5, "10s")]) + return policy + + +@pytest.fixture(scope="module") +def token_rate_limit(cluster, blame, route, module_label): + """Create TokenRateLimitPolicy targeting the route""" + policy = TokenRateLimitPolicy.create_instance(cluster, blame("trlp"), route, labels={"testRun": module_label}) + policy.add_limit("basic", [Limit(5, "10s")]) + return policy + + +@pytest.fixture(scope="module") +def dns_policy(cluster, blame, gateway, module_label, dns_provider_secret): + """Create DNSPolicy targeting the gateway""" + return DNSPolicy.create_instance( + cluster, blame("dns"), gateway, dns_provider_secret, labels={"testRun": module_label} + ) + + +@pytest.fixture(scope="module") +def tls_policy(cluster, blame, gateway, module_label, cluster_issuer): + """Create TLSPolicy targeting the gateway""" + return TLSPolicy.create_instance( + cluster, blame("tls"), parent=gateway, issuer=cluster_issuer, labels={"testRun": module_label} + ) + + +@pytest.fixture(scope="module", autouse=True) +def commit(request, authorization, rate_limit, token_rate_limit, dns_policy, tls_policy): + """Commit all policies and register finalizers for cleanup""" + for component in [dns_policy, tls_policy, authorization, rate_limit, token_rate_limit]: + request.addfinalizer(component.delete) + component.commit() + component.wait_for_ready() + + +@pytest.mark.parametrize("kind", POLICY_KINDS) +def test_metric_kuadrant_policies_total(operator_metrics, kind): + """Tests that kuadrant_policies_total metric has value >= 1 for each policy kind""" + metrics = operator_metrics.filter(has_label("__name__", "kuadrant_policies_total")).filter(has_label("kind", kind)) + assert metrics.values, f"No values returned for 'kuadrant_policies_total' for kind '{kind}'" + assert ( + metrics.values[0] >= 1 + ), f"Expected 'kuadrant_policies_total' for kind '{kind}' to have value >= 1, but got: {metrics.values}" + + +@pytest.mark.parametrize("kind", POLICY_KINDS) +def test_metric_kuadrant_policies_enforced(operator_metrics, kind): + """Tests that kuadrant_policies_enforced metric has value >= 1 for each enforced policy kind""" + metrics = ( + operator_metrics.filter(has_label("__name__", "kuadrant_policies_enforced")) + .filter(has_label("kind", kind)) + .filter(has_label("status", "true")) + ) + assert metrics.values, f"No values returned for 'kuadrant_policies_enforced' for kind '{kind}'" + assert ( + metrics.values[0] >= 1 + ), f"Expected 'kuadrant_policies_enforced' for kind '{kind}' to have value >= 1, but got: {metrics.values}" diff --git a/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics_lifecycle.py b/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics_lifecycle.py new file mode 100644 index 00000000..9c8e7957 --- /dev/null +++ b/testsuite/tests/singlecluster/observability/test_kuadrant_policy_metrics_lifecycle.py @@ -0,0 +1,60 @@ +"""Tests for Kuadrant policy metrics lifecycle (increment/decrement on policy create/delete).""" + +import backoff +import pytest + +from testsuite.config import settings +from testsuite.kuadrant.policy.rate_limit import RateLimitPolicy, Limit + +pytestmark = [pytest.mark.observability, pytest.mark.disruptive] + +POLICY_METRICS = ["kuadrant_policies_total", "kuadrant_policies_enforced"] + + +def _get_metric_value(prometheus, metric, kind): + """Helper to get current metric value for a given policy kind""" + labels = { + "service": "kuadrant-operator-metrics", + "kind": kind, + "namespace": settings["service_protection"]["system_project"], + } + if metric == "kuadrant_policies_enforced": + labels["status"] = "true" + + metrics = prometheus.get_metrics(key=metric, labels=labels) + return metrics.values[0] if metrics.values else 0 + + +def test_metric_policy_lifecycle(prometheus, cluster, blame, route, module_label): + """Tests that policy metrics increment on create and decrement on delete""" + initial_counts = {m: _get_metric_value(prometheus, m, "RateLimitPolicy") for m in POLICY_METRICS} + + policy = RateLimitPolicy.create_instance(cluster, blame("rlp-lc"), route, labels={"testRun": module_label}) + policy.add_limit("basic", [Limit(5, "10s")]) + policy.commit() + policy.wait_for_ready() + + for metric in POLICY_METRICS: + + @backoff.on_predicate(backoff.constant, interval=10, jitter=None, max_tries=12) + def _wait_for_increment(m=metric): + return _get_metric_value(prometheus, m, "RateLimitPolicy") > initial_counts[m] + + assert _wait_for_increment(), ( + f"Expected '{metric}' for RateLimitPolicy to increment from {initial_counts[metric]}, " + f"but got: {_get_metric_value(prometheus, metric, 'RateLimitPolicy')}" + ) + + counts_before_delete = {m: _get_metric_value(prometheus, m, "RateLimitPolicy") for m in POLICY_METRICS} + policy.delete() + + for metric in POLICY_METRICS: + + @backoff.on_predicate(backoff.constant, interval=10, jitter=None, max_tries=12) + def _wait_for_decrement(m=metric): + return _get_metric_value(prometheus, m, "RateLimitPolicy") < counts_before_delete[m] + + assert _wait_for_decrement(), ( + f"Expected '{metric}' for RateLimitPolicy to decrement from {counts_before_delete[metric]}, " + f"but got: {_get_metric_value(prometheus, metric, 'RateLimitPolicy')}" + ) diff --git a/testsuite/tests/singlecluster/observability/test_observability.py b/testsuite/tests/singlecluster/observability/test_observability.py index e242a24a..f1f5dd41 100644 --- a/testsuite/tests/singlecluster/observability/test_observability.py +++ b/testsuite/tests/singlecluster/observability/test_observability.py @@ -37,7 +37,7 @@ def test_service_monitor_metrics_per_service(metric, service_monitor_metrics_by_service): """Tests that all expected metrics appear in the data collected from each ServiceMonitor""" for service_name, metrics in service_monitor_metrics_by_service.items(): - assert metric in metrics, f"Expected metric '{metric}' in '{service_name}'. Metrics found: {metrics}" + assert metric in metrics, f"Expected metric '{metric}' not found in '{service_name}'. Metrics found: {metrics}" @pytest.mark.parametrize("metric", POD_MONITOR_METRICS)