diff --git a/.gitignore b/.gitignore index 61444760..86b9e735 100644 --- a/.gitignore +++ b/.gitignore @@ -134,5 +134,8 @@ dmypy.json logs/* !logs/.gitkeep +# Test failure artifacts (automatic log collection) +test-failures/ + # macOS system files .DS_Store diff --git a/testsuite/log_collection.py b/testsuite/log_collection.py new file mode 100644 index 00000000..ecaba3d2 --- /dev/null +++ b/testsuite/log_collection.py @@ -0,0 +1,263 @@ +""" +Generic log collection utilities for test failures + +This module provides automatic log collection for failed tests across all test suites, +including support for parallel test execution with pytest-xdist. + +Configuration: +-------------- +Configure which components to collect logs from by adding a module-level +variable to your conftest.py: + + log_components = ["authorino", "gateway", "limitador"] + +If not configured, no logs will be collected (opt-in by default). + +Available components: +- authorino: Authorino service logs +- limitador: Limitador service logs +- gateway: Gateway/Istio proxy logs +- dns-operator: DNS Operator controller logs +- authorino-operator: Authorino Operator controller logs +- kuadrant-operator: Kuadrant Operator controller logs +""" + +import logging +import subprocess +from datetime import datetime +from pathlib import Path +from typing import Optional + +from openshift_client import selector, OpenShiftPythonException + +from testsuite.config import settings + +logger = logging.getLogger(__name__) + + +class LogCollector: + """Generic log collector for Kubernetes pods.""" + + def __init__(self, cluster, log_dir: Path, since_time: str): + """ + Initialize the log collector. + + cluster: Kubernetes cluster client + log_dir: Directory to save logs to + since_time: ISO 8601 timestamp string (e.g., "2025-10-23T10:30:00Z") + """ + self.cluster = cluster + self.log_dir = log_dir + self.since_time = since_time + + def collect_logs( + self, namespace: str, label_selector: dict, component_name: str, container_name: Optional[str] = None + ): + """ + Collect logs from pods matching the given criteria. + + namespace: Kubernetes namespace to search in + label_selector: Dictionary of labels to match pods (e.g., {"app": "myapp"}) + component_name: Name to use in log filenames + container_name: Optional specific container name. If None, collects from all containers. + """ + try: + target_cluster = self.cluster.change_project(namespace) + with target_cluster.context: + pods = selector("pod", labels=label_selector) + + if pods.count_existing() == 0: + logger.warning( + "No %s pods found with labels %s in namespace %s", component_name, label_selector, namespace + ) + return + + for pod in pods.objects(): + pod_name = pod.name() + try: + containers = [container_name] if container_name else [c.name for c in pod.model.spec.containers] + for container in containers: + self._collect_container_logs(pod_name, container, namespace, component_name) + except (AttributeError, KeyError) as e: + error_file = self.log_dir / f"{component_name}-{pod_name}-error.txt" + error_file.write_text(f"Failed to process pod: {e}") + logger.error("Failed to process pod %s: %s", pod_name, e) + + except OpenShiftPythonException as e: + logger.error("Failed to access %s pods in namespace %s: %s", component_name, namespace, e) + + def _collect_container_logs(self, pod_name: str, container: str, namespace: str, component_name: str): + """Collect logs from a single container.""" + try: + logs = self._fetch_pod_logs(pod_name, container, namespace) + log_file = self.log_dir / f"{component_name}-{pod_name}-{container}.log" + self._save_log_file(log_file, component_name, pod_name, container, namespace, logs) + logger.info("Collected %s logs: %s/%s", component_name, pod_name, container) + except subprocess.TimeoutExpired: + error_file = self.log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text("Timeout while collecting logs") + logger.error("Timeout collecting logs from %s/%s", pod_name, container) + except (OSError, IOError) as e: + error_file = self.log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text(f"Failed to collect logs: {e}") + logger.error("Failed to get logs from %s/%s: %s", pod_name, container, e) + + def _fetch_pod_logs(self, pod_name: str, container: str, namespace: str) -> str: + """Fetch logs from a specific pod container using oc logs command.""" + result = subprocess.run( + [ + "oc", + "logs", + f"pod/{pod_name}", + "-c", + container, + "-n", + namespace, + f"--since-time={self.since_time}", + "--timestamps", + ], + capture_output=True, + text=True, + timeout=30, + check=False, + ) + return result.stdout if result.returncode == 0 else result.stderr + + def _save_log_file( + self, log_file: Path, component_name: str, pod_name: str, container: str, namespace: str, logs: str + ): + """Save collected logs to a file with metadata header.""" + with open(log_file, "w", encoding="utf-8") as f: + f.write(f"# Component: {component_name}\n") + f.write(f"# Pod: {pod_name}\n") + f.write(f"# Container: {container}\n") + f.write(f"# Namespace: {namespace}\n") + f.write(f"# Logs since: {self.since_time}\n") + f.write(f"# {'=' * 70}\n\n") + f.write(logs) + + +def get_log_components(item): + """ + Determine which components to collect logs from based on module configuration. + + Walks up the test hierarchy looking for log_components configuration. + Checks: test module -> parent conftest modules + + Returns a set of component names to collect logs from. + If not configured, returns empty set (no logging). + """ + # Check the test module itself first + if hasattr(item.module, "log_components"): + return set(item.module.log_components) + + # Get conftest modules from pytest's pluginmanager + # Walk up the directory tree to find conftest modules with log_components + try: + if hasattr(item.config, "pluginmanager"): + for plugin in item.config.pluginmanager.get_plugins(): + if hasattr(plugin, "__name__") and "conftest" in getattr(plugin, "__name__", ""): + if hasattr(plugin, "log_components"): + return set(plugin.log_components) + except Exception: # pylint: disable=broad-exception-caught + pass + + # Default: no logging + return set() + + +def _collect_component_logs(collector: LogCollector, component: str, system_namespace: str, gateway=None): + """ + Collect logs for a specific component using the generic LogCollector. + + This function contains the knowledge of how to find each component's pods. + """ + if component == "gateway" and gateway: + try: + logger.info("Collecting Gateway logs...") + gateway_name = gateway.name() + gateway_namespace = gateway.namespace() + collector.collect_logs( + gateway_namespace, {"gateway.networking.k8s.io/gateway-name": gateway_name}, "gateway" + ) + except (AttributeError, KeyError, OpenShiftPythonException) as e: + logger.warning("Could not collect Gateway logs: %s", e) + return + + # Component configurations mapping + if component == "authorino": + namespace = system_namespace + labels = {"authorino-resource": "authorino"} + name = "authorino" + container = None + elif component == "limitador": + namespace = system_namespace + labels = {"app": "limitador"} + name = "limitador" + container = None + elif component == "dns-operator": + namespace = system_namespace + labels = {"control-plane": "dns-operator-controller-manager"} + name = "dns-operator" + container = "manager" + elif component == "authorino-operator": + namespace = system_namespace + labels = {"control-plane": "authorino-operator"} + name = "authorino-operator" + container = "manager" + elif component == "limitador-operator": + namespace = system_namespace + labels = {"app": "limitador-operator"} + name = "limitador-operator" + container = "manager" + elif component == "kuadrant-operator": + namespace = system_namespace + labels = {"control-plane": "kuadrant-operator"} + name = "kuadrant-operator" + container = "manager" + else: + return + + try: + logger.info("Collecting %s logs...", component) + collector.collect_logs(namespace, labels, name, container) + except (OpenShiftPythonException, OSError, IOError, KeyError) as e: + logger.warning("Could not collect %s logs: %s", component, e) + + +def collect_failure_artifacts(item, cluster, start_time: datetime): + """ + Collect logs from configured components when a test fails. + + This function inspects the test's fixtures and module configuration + to determine which components are available and should be logged. + """ + test_name = item.name + worker_id = getattr(item.config, "workerinput", {}).get("workerid", "master") + + # Determine which components to collect logs from + enabled_components = get_log_components(item) + if not enabled_components: + logger.info("No log components configured for %s - skipping log collection", test_name) + return + + # Create log directory + log_dir = Path("test-failures") / worker_id / test_name + log_dir.mkdir(parents=True, exist_ok=True) + + logger.info("Collecting logs for failed test: %s", test_name) + logger.info("Worker: %s", worker_id) + logger.info("Log directory: %s", log_dir) + logger.info("Configured components: %s", ", ".join(sorted(enabled_components))) + + # Create generic collector + since_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + collector = LogCollector(cluster, log_dir, since_time) + system_namespace = settings["service_protection"]["system_project"] + + # Collect logs for each enabled component + gateway = item.funcargs.get("gateway") + for component in enabled_components: + _collect_component_logs(collector, component, system_namespace, gateway) + + logger.info("Log collection complete. Logs saved to: %s", log_dir) diff --git a/testsuite/tests/conftest.py b/testsuite/tests/conftest.py index b5aa000f..e834a9ab 100644 --- a/testsuite/tests/conftest.py +++ b/testsuite/tests/conftest.py @@ -1,6 +1,7 @@ """Root conftest""" import signal +from datetime import datetime, timezone from urllib.parse import urlparse import yaml @@ -15,6 +16,7 @@ from testsuite.config import settings from testsuite.gateway import Exposer, CustomReference from testsuite.httpx import KuadrantClient +from testsuite.log_collection import collect_failure_artifacts from testsuite.mockserver import Mockserver from testsuite.oidc import OIDCProvider from testsuite.oidc.auth0 import Auth0Provider @@ -62,9 +64,22 @@ def pytest_runtest_setup(item): skip_or_fail(f"Unable to locate Kuadrant installation: {error}") +@pytest.fixture(scope="function", autouse=True) +def test_tracker(request): + """ + Track test execution for log collection. + + This fixture records the start time of each test, which is used + to filter logs when collecting artifacts from failed tests. + """ + start_time = datetime.now(timezone.utc) + request.node.test_start_time = start_time + yield + + @pytest.hookimpl(hookwrapper=True) def pytest_runtest_makereport(item, call): # pylint: disable=unused-argument - """Add jira link to html report""" + """Add jira link to html report and collect logs on test failure""" pytest_html = item.config.pluginmanager.getplugin("html") outcome = yield report = outcome.get_result() @@ -80,6 +95,17 @@ def pytest_runtest_makereport(item, call): # pylint: disable=unused-argument extra.append(pytest_html.extras.url(issue, name=label)) report.extra = extra + # Collect logs on test failure + if report.when == "call" and report.failed: + # Get the test start time + start_time = getattr(item, "test_start_time", datetime.now(timezone.utc)) + + # Get cluster fixture if available + if "cluster" in item.fixturenames: + cluster = item.funcargs.get("cluster") + if cluster: + collect_failure_artifacts(item, cluster, start_time) + def pytest_report_header(config): """Adds Kuadrant version string to pytest header output for every cluster.""" diff --git a/testsuite/tests/singlecluster/authorino/conftest.py b/testsuite/tests/singlecluster/authorino/conftest.py index 38c3726a..06c38eb6 100644 --- a/testsuite/tests/singlecluster/authorino/conftest.py +++ b/testsuite/tests/singlecluster/authorino/conftest.py @@ -6,6 +6,11 @@ from testsuite.kuadrant.authorino import AuthorinoCR, PreexistingAuthorino from testsuite.kuadrant.policy.authorization.auth_config import AuthConfig +# Configure which components to collect logs from when tests fail +# Available components: authorino, limitador, gateway, dns-operator, authorino-operator, kuadrant-operator +# If not specified, all available components will be logged +log_components = ["authorino", "authorino-operator", "gateway"] + @pytest.fixture(scope="session") def authorino(kuadrant, cluster, blame, request, testconfig, label): diff --git a/testsuite/tests/singlecluster/limitador/conftest.py b/testsuite/tests/singlecluster/limitador/conftest.py index 8bbb329e..7bf8891a 100644 --- a/testsuite/tests/singlecluster/limitador/conftest.py +++ b/testsuite/tests/singlecluster/limitador/conftest.py @@ -2,6 +2,11 @@ import pytest +# Configure which components to collect logs from when tests fail +# Available components: authorino, limitador, gateway, dns-operator, authorino-operator, kuadrant-operator +# If not specified, all available components will be logged +log_components = ["limitador", "limitador-operator", "gateway"] + @pytest.fixture(scope="session") def limitador(kuadrant):