From 9bfb9039a495b784648c11b2018acd46eb0489ff Mon Sep 17 00:00:00 2001 From: Alexander Cristurean Date: Wed, 22 Oct 2025 17:35:15 +0200 Subject: [PATCH 1/2] feat: added log collection on test failures. Signed-off-by: Alexander Cristurean --- .gitignore | 3 + testsuite/log_collection.py | 415 ++++++++++++++++++ testsuite/tests/conftest.py | 28 +- .../tests/singlecluster/authorino/conftest.py | 5 + .../tests/singlecluster/limitador/conftest.py | 4 + 5 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 testsuite/log_collection.py diff --git a/.gitignore b/.gitignore index 61444760..86b9e735 100644 --- a/.gitignore +++ b/.gitignore @@ -134,5 +134,8 @@ dmypy.json logs/* !logs/.gitkeep +# Test failure artifacts (automatic log collection) +test-failures/ + # macOS system files .DS_Store diff --git a/testsuite/log_collection.py b/testsuite/log_collection.py new file mode 100644 index 00000000..f7e08324 --- /dev/null +++ b/testsuite/log_collection.py @@ -0,0 +1,415 @@ +""" +Generic log collection utilities for test failures + +This module provides automatic log collection for failed tests across all test suites, +including support for parallel test execution with pytest-xdist. + +Configuration: +-------------- +Configure which components to collect logs from by adding a module-level +variable to your conftest.py: + + log_components = ["authorino", "gateway", "limitador"] + +If not configured, no logs will be collected (opt-in by default). + +Available components: +- authorino: Authorino service logs +- limitador: Limitador service logs +- gateway: Gateway/Istio proxy logs +- dns-operator: DNS Operator controller logs +- authorino-operator: Authorino Operator controller logs +- kuadrant-operator: Kuadrant Operator controller logs +""" + +import subprocess +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional +import logging +from testsuite.config import settings + +from openshift_client import selector, OpenShiftPythonException + +logger = logging.getLogger(__name__) + + +def collect_pod_logs( + cluster, + namespace: str, + label_selector: dict, + log_dir: Path, + start_time: datetime, + component_name: str, + container_name: Optional[str] = None, +): + """ + Collect logs from pods matching the label selector with time filtering. + + If container_name is specified, only collect logs from that container. + Otherwise, collect logs from all containers in the pod. + """ + since_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + + try: + target_cluster = cluster.change_project(namespace) + with target_cluster.context: + pods = selector("pod", labels=label_selector) + + if pods.count_existing() == 0: + logger.warning(f"No {component_name} pods found with labels {label_selector} in namespace {namespace}") + return + + for pod in pods.objects(): + pod_name = pod.name() + + try: + # Get container list + containers = [container_name] if container_name else [c.name for c in pod.model.spec.containers] + + for container in containers: + try: + # Use oc logs command for better parameter support + result = subprocess.run( + [ + "oc", + "logs", + f"pod/{pod_name}", + "-c", + container, + "-n", + namespace, + f"--since-time={since_time}", + "--timestamps", + ], + capture_output=True, + text=True, + timeout=30, + ) + logs = result.stdout if result.returncode == 0 else result.stderr + + # Save logs + log_file = log_dir / f"{component_name}-{pod_name}-{container}.log" + with open(log_file, "w") as f: + f.write(f"# Component: {component_name}\n") + f.write(f"# Pod: {pod_name}\n") + f.write(f"# Container: {container}\n") + f.write(f"# Namespace: {namespace}\n") + f.write(f"# Logs since: {since_time}\n") + f.write(f"# {'=' * 70}\n\n") + f.write(logs) + + logger.info(f"Collected {component_name} logs: {pod_name}/{container}") + + except subprocess.TimeoutExpired: + error_file = log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text("Timeout while collecting logs") + logger.error(f"Timeout collecting logs from {pod_name}/{container}") + except Exception as e: + error_file = log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text(f"Failed to collect logs: {e}") + logger.error(f"Failed to get logs from {pod_name}/{container}: {e}") + + except Exception as e: + error_file = log_dir / f"{component_name}-{pod_name}-error.txt" + error_file.write_text(f"Failed to process pod: {e}") + logger.error(f"Failed to process pod {pod_name}: {e}") + + except OpenShiftPythonException as e: + logger.error(f"Failed to access {component_name} pods in namespace {namespace}: {e}") + except Exception as e: + logger.error(f"Unexpected error collecting {component_name} logs: {e}") + + +def collect_authorino_logs(cluster, log_dir: Path, start_time: datetime, authorino): + """Collect logs from Authorino pods""" + try: + authorino_name = authorino.name() + except Exception: + authorino_name = "authorino" + + # Try primary label selector + label_selector = {"authorino-resource": authorino_name} + + # Check if pods exist with primary selector + try: + authorino_cluster = cluster.change_project("kuadrant-system") + with authorino_cluster.context: + pods = selector("pod", labels=label_selector) + if pods.count_existing() == 0: + # Fallback to alternative label + label_selector = {"app": authorino_name} + except Exception: + pass + + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector=label_selector, + log_dir=log_dir, + start_time=start_time, + component_name="authorino", + ) + + +def collect_limitador_logs(cluster, log_dir: Path, start_time: datetime, limitador): + """Collect logs from Limitador pods""" + try: + limitador_name = limitador.name() + except Exception: + limitador_name = "limitador" + + # Try primary label selector + label_selector = {"app": "limitador"} + + # Check if we should use a more specific selector + try: + limitador_cluster = cluster.change_project("kuadrant-system") + with limitador_cluster.context: + pods = selector("pod", labels=label_selector) + if pods.count_existing() == 0: + # Fallback to resource-specific label + label_selector = {"limitador-resource": limitador_name} + except Exception: + pass + + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector=label_selector, + log_dir=log_dir, + start_time=start_time, + component_name="limitador", + ) + + +def collect_dns_operator_logs(cluster, log_dir: Path, start_time: datetime): + """Collect logs from DNS Operator""" + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector={"control-plane": "dns-operator-controller-manager"}, + log_dir=log_dir, + start_time=start_time, + component_name="dns-operator", + container_name="manager", + ) + + +def collect_authorino_operator_logs(cluster, log_dir: Path, start_time: datetime): + """Collect logs from Authorino Operator""" + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector={"control-plane": "authorino-operator"}, + log_dir=log_dir, + start_time=start_time, + component_name="authorino-operator", + container_name="manager", + ) + + +def collect_kuadrant_operator_logs(cluster, log_dir: Path, start_time: datetime): + """Collect logs from Kuadrant Operator""" + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector={"control-plane": "kuadrant-operator"}, + log_dir=log_dir, + start_time=start_time, + component_name="kuadrant-operator", + container_name="manager", + ) + + +def collect_gateway_logs(cluster, log_dir: Path, start_time: datetime, gateway): + """Collect logs from Gateway pods""" + try: + gateway_name = gateway.name() + gateway_namespace = gateway.namespace() + except Exception as e: + print(f" [FAIL] Could not get gateway name/namespace: {e}") + return + + # Try primary label selector for Gateway API + label_selector = {"gateway.networking.k8s.io/gateway-name": gateway_name} + + # Check if pods exist with primary selector + try: + with cluster.context: + pods = selector("pod", labels=label_selector) + if pods.count_existing() == 0: + # Fallback to Istio label + label_selector = {"istio.io/gateway-name": gateway_name} + except Exception: + pass + + collect_pod_logs( + cluster=cluster, + namespace=gateway_namespace, + label_selector=label_selector, + log_dir=log_dir, + start_time=start_time, + component_name="gateway", + ) + + +def get_log_components(item): + """ + Determine which components to collect logs from based on module configuration. + + Walks up the test hierarchy looking for log_components configuration. + Checks: test module -> parent conftest modules + + Returns a set of component names to collect logs from. + If not configured, returns empty set (no logging). + """ + # Check the test module itself first + if hasattr(item.module, "log_components"): + return set(item.module.log_components) + + # Get conftest modules from pytest's pluginmanager + # Walk up the directory tree to find conftest modules with log_components + try: + if hasattr(item.config, "pluginmanager"): + for plugin in item.config.pluginmanager.get_plugins(): + if hasattr(plugin, "__name__") and "conftest" in getattr(plugin, "__name__", ""): + if hasattr(plugin, "log_components"): + return set(plugin.log_components) + except Exception: + pass + + # Default: no logging + return set() + + +def collect_failure_artifacts(item, cluster, start_time: datetime): + """ + Collect logs from configured components when a test fails. + + This function inspects the test's fixtures and module configuration + to determine which components are available and should be logged. + """ + test_name = item.name + worker_id = getattr(item.config, "workerinput", {}).get("workerid", "master") + + # Determine which components to collect logs from + enabled_components = get_log_components(item) + if not enabled_components: + logger.info(f"No log components configured for {test_name} - skipping log collection") + return + + # Create log directory + log_dir = Path("test-failures") / worker_id / test_name + log_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"Collecting logs for failed test: {test_name}") + logger.info(f"Worker: {worker_id}") + logger.info(f"Log directory: {log_dir}") + logger.info(f"Configured components: {', '.join(sorted(enabled_components))}") + + # Collect logs from available components + # Check which fixtures are available in the test + + def should_collect(component_name): + """Check if we should collect logs for this component""" + return component_name in enabled_components + + # Authorino + if should_collect("authorino"): + try: + logger.info("Collecting Authorino logs...") + collect_pod_logs( + cluster=cluster, + namespace=settings["service_protection"]["system_project"], + label_selector={"authorino-resource": "authorino"}, + log_dir=log_dir, + start_time=start_time, + component_name="authorino", + ) + except Exception as e: + logger.warning(f"Could not collect Authorino logs: {e}") + + # Limitador + if should_collect("limitador"): + try: + logger.info("Collecting Limitador logs...") + collect_pod_logs( + cluster=cluster, + namespace=settings["service_protection"]["system_project"], + label_selector={"app": "limitador"}, + log_dir=log_dir, + start_time=start_time, + component_name="limitador", + ) + except Exception as e: + logger.warning(f"Could not collect Limitador logs: {e}") + + # DNS Operator + if should_collect("dns-operator"): + try: + logger.info("Collecting DNS Operator logs...") + collect_pod_logs( + cluster=cluster, + namespace=settings["service_protection"]["system_project"], + label_selector={"control-plane": "dns-operator-controller-manager"}, + log_dir=log_dir, + start_time=start_time, + component_name="dns-operator", + ) + except Exception as e: + logger.warning(f"Could not collect DNS Operator logs: {e}") + + # Authorino Operator + if should_collect("authorino-operator"): + try: + logger.info("Collecting Authorino Operator logs...") + collect_pod_logs( + cluster=cluster, + namespace=settings["service_protection"]["system_project"], + label_selector={"control-plane": "authorino-operator"}, + log_dir=log_dir, + start_time=start_time, + component_name="authorino-operator", + ) + except Exception as e: + logger.warning(f"Could not collect Authorino Operator logs: {e}") + + + # Limitador Operator + if should_collect("limitador-operator"): + try: + logger.info("Collecting Limitador Operator logs...") + collect_pod_logs( + cluster=cluster, + namespace="kuadrant-system", + label_selector={"app": "limitador-operator"}, + log_dir=log_dir, + start_time=start_time, + component_name="limitador-operator", + ) + except Exception as e: + logger.warning(f"Could not collect Limitador Operator logs: {e}") + + # Kuadrant Operator + if should_collect("kuadrant-operator"): + try: + logger.info("Collecting Kuadrant Operator logs...") + collect_kuadrant_operator_logs(cluster, log_dir, start_time) + except Exception as e: + logger.warning(f"Could not collect Kuadrant Operator logs: {e}") + + # Gateway + if should_collect("gateway"): + try: + logger.info("Collecting Gateway logs...") + gateway = item.funcargs.get("gateway") + if gateway: + gateway_name = gateway.name() + gateway_namespace = gateway.namespace() + + collect_pod_logs(cluster, gateway_namespace, {"gateway.networking.k8s.io/gateway-name": gateway_name}, log_dir, start_time, "gateway") + except Exception as e: + logger.warning(f"Could not collect Gateway logs: {e}") + + logger.info(f"Log collection complete. Logs saved to: {log_dir}") diff --git a/testsuite/tests/conftest.py b/testsuite/tests/conftest.py index b5aa000f..e834a9ab 100644 --- a/testsuite/tests/conftest.py +++ b/testsuite/tests/conftest.py @@ -1,6 +1,7 @@ """Root conftest""" import signal +from datetime import datetime, timezone from urllib.parse import urlparse import yaml @@ -15,6 +16,7 @@ from testsuite.config import settings from testsuite.gateway import Exposer, CustomReference from testsuite.httpx import KuadrantClient +from testsuite.log_collection import collect_failure_artifacts from testsuite.mockserver import Mockserver from testsuite.oidc import OIDCProvider from testsuite.oidc.auth0 import Auth0Provider @@ -62,9 +64,22 @@ def pytest_runtest_setup(item): skip_or_fail(f"Unable to locate Kuadrant installation: {error}") +@pytest.fixture(scope="function", autouse=True) +def test_tracker(request): + """ + Track test execution for log collection. + + This fixture records the start time of each test, which is used + to filter logs when collecting artifacts from failed tests. + """ + start_time = datetime.now(timezone.utc) + request.node.test_start_time = start_time + yield + + @pytest.hookimpl(hookwrapper=True) def pytest_runtest_makereport(item, call): # pylint: disable=unused-argument - """Add jira link to html report""" + """Add jira link to html report and collect logs on test failure""" pytest_html = item.config.pluginmanager.getplugin("html") outcome = yield report = outcome.get_result() @@ -80,6 +95,17 @@ def pytest_runtest_makereport(item, call): # pylint: disable=unused-argument extra.append(pytest_html.extras.url(issue, name=label)) report.extra = extra + # Collect logs on test failure + if report.when == "call" and report.failed: + # Get the test start time + start_time = getattr(item, "test_start_time", datetime.now(timezone.utc)) + + # Get cluster fixture if available + if "cluster" in item.fixturenames: + cluster = item.funcargs.get("cluster") + if cluster: + collect_failure_artifacts(item, cluster, start_time) + def pytest_report_header(config): """Adds Kuadrant version string to pytest header output for every cluster.""" diff --git a/testsuite/tests/singlecluster/authorino/conftest.py b/testsuite/tests/singlecluster/authorino/conftest.py index 38c3726a..06c38eb6 100644 --- a/testsuite/tests/singlecluster/authorino/conftest.py +++ b/testsuite/tests/singlecluster/authorino/conftest.py @@ -6,6 +6,11 @@ from testsuite.kuadrant.authorino import AuthorinoCR, PreexistingAuthorino from testsuite.kuadrant.policy.authorization.auth_config import AuthConfig +# Configure which components to collect logs from when tests fail +# Available components: authorino, limitador, gateway, dns-operator, authorino-operator, kuadrant-operator +# If not specified, all available components will be logged +log_components = ["authorino", "authorino-operator", "gateway"] + @pytest.fixture(scope="session") def authorino(kuadrant, cluster, blame, request, testconfig, label): diff --git a/testsuite/tests/singlecluster/limitador/conftest.py b/testsuite/tests/singlecluster/limitador/conftest.py index 8bbb329e..82983380 100644 --- a/testsuite/tests/singlecluster/limitador/conftest.py +++ b/testsuite/tests/singlecluster/limitador/conftest.py @@ -2,6 +2,10 @@ import pytest +# Configure which components to collect logs from when tests fail +# Available components: authorino, limitador, gateway, dns-operator, authorino-operator, kuadrant-operator +# If not specified, all available components will be logged +log_components = ["limitador", "limitador-operator", "gateway"] @pytest.fixture(scope="session") def limitador(kuadrant): From 26ee56ddbeafc64b04da9e48f6632b9219e48feb Mon Sep 17 00:00:00 2001 From: Alexander Cristurean Date: Thu, 23 Oct 2025 13:23:48 +0200 Subject: [PATCH 2/2] fix: added class and some minimal improvements. Signed-off-by: Alexander Cristurean --- testsuite/log_collection.py | 508 ++++++------------ .../tests/singlecluster/limitador/conftest.py | 1 + 2 files changed, 179 insertions(+), 330 deletions(-) diff --git a/testsuite/log_collection.py b/testsuite/log_collection.py index f7e08324..ecaba3d2 100644 --- a/testsuite/log_collection.py +++ b/testsuite/log_collection.py @@ -22,236 +22,119 @@ - kuadrant-operator: Kuadrant Operator controller logs """ +import logging import subprocess -from datetime import datetime, timezone +from datetime import datetime from pathlib import Path from typing import Optional -import logging -from testsuite.config import settings from openshift_client import selector, OpenShiftPythonException -logger = logging.getLogger(__name__) - - -def collect_pod_logs( - cluster, - namespace: str, - label_selector: dict, - log_dir: Path, - start_time: datetime, - component_name: str, - container_name: Optional[str] = None, -): - """ - Collect logs from pods matching the label selector with time filtering. - - If container_name is specified, only collect logs from that container. - Otherwise, collect logs from all containers in the pod. - """ - since_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") - - try: - target_cluster = cluster.change_project(namespace) - with target_cluster.context: - pods = selector("pod", labels=label_selector) - - if pods.count_existing() == 0: - logger.warning(f"No {component_name} pods found with labels {label_selector} in namespace {namespace}") - return - - for pod in pods.objects(): - pod_name = pod.name() - - try: - # Get container list - containers = [container_name] if container_name else [c.name for c in pod.model.spec.containers] - - for container in containers: - try: - # Use oc logs command for better parameter support - result = subprocess.run( - [ - "oc", - "logs", - f"pod/{pod_name}", - "-c", - container, - "-n", - namespace, - f"--since-time={since_time}", - "--timestamps", - ], - capture_output=True, - text=True, - timeout=30, - ) - logs = result.stdout if result.returncode == 0 else result.stderr - - # Save logs - log_file = log_dir / f"{component_name}-{pod_name}-{container}.log" - with open(log_file, "w") as f: - f.write(f"# Component: {component_name}\n") - f.write(f"# Pod: {pod_name}\n") - f.write(f"# Container: {container}\n") - f.write(f"# Namespace: {namespace}\n") - f.write(f"# Logs since: {since_time}\n") - f.write(f"# {'=' * 70}\n\n") - f.write(logs) - - logger.info(f"Collected {component_name} logs: {pod_name}/{container}") - - except subprocess.TimeoutExpired: - error_file = log_dir / f"{component_name}-{pod_name}-{container}-error.txt" - error_file.write_text("Timeout while collecting logs") - logger.error(f"Timeout collecting logs from {pod_name}/{container}") - except Exception as e: - error_file = log_dir / f"{component_name}-{pod_name}-{container}-error.txt" - error_file.write_text(f"Failed to collect logs: {e}") - logger.error(f"Failed to get logs from {pod_name}/{container}: {e}") - - except Exception as e: - error_file = log_dir / f"{component_name}-{pod_name}-error.txt" - error_file.write_text(f"Failed to process pod: {e}") - logger.error(f"Failed to process pod {pod_name}: {e}") - - except OpenShiftPythonException as e: - logger.error(f"Failed to access {component_name} pods in namespace {namespace}: {e}") - except Exception as e: - logger.error(f"Unexpected error collecting {component_name} logs: {e}") - - -def collect_authorino_logs(cluster, log_dir: Path, start_time: datetime, authorino): - """Collect logs from Authorino pods""" - try: - authorino_name = authorino.name() - except Exception: - authorino_name = "authorino" - - # Try primary label selector - label_selector = {"authorino-resource": authorino_name} - - # Check if pods exist with primary selector - try: - authorino_cluster = cluster.change_project("kuadrant-system") - with authorino_cluster.context: - pods = selector("pod", labels=label_selector) - if pods.count_existing() == 0: - # Fallback to alternative label - label_selector = {"app": authorino_name} - except Exception: - pass - - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector=label_selector, - log_dir=log_dir, - start_time=start_time, - component_name="authorino", - ) - - -def collect_limitador_logs(cluster, log_dir: Path, start_time: datetime, limitador): - """Collect logs from Limitador pods""" - try: - limitador_name = limitador.name() - except Exception: - limitador_name = "limitador" - - # Try primary label selector - label_selector = {"app": "limitador"} - - # Check if we should use a more specific selector - try: - limitador_cluster = cluster.change_project("kuadrant-system") - with limitador_cluster.context: - pods = selector("pod", labels=label_selector) - if pods.count_existing() == 0: - # Fallback to resource-specific label - label_selector = {"limitador-resource": limitador_name} - except Exception: - pass - - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector=label_selector, - log_dir=log_dir, - start_time=start_time, - component_name="limitador", - ) - - -def collect_dns_operator_logs(cluster, log_dir: Path, start_time: datetime): - """Collect logs from DNS Operator""" - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector={"control-plane": "dns-operator-controller-manager"}, - log_dir=log_dir, - start_time=start_time, - component_name="dns-operator", - container_name="manager", - ) - - -def collect_authorino_operator_logs(cluster, log_dir: Path, start_time: datetime): - """Collect logs from Authorino Operator""" - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector={"control-plane": "authorino-operator"}, - log_dir=log_dir, - start_time=start_time, - component_name="authorino-operator", - container_name="manager", - ) - - -def collect_kuadrant_operator_logs(cluster, log_dir: Path, start_time: datetime): - """Collect logs from Kuadrant Operator""" - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector={"control-plane": "kuadrant-operator"}, - log_dir=log_dir, - start_time=start_time, - component_name="kuadrant-operator", - container_name="manager", - ) - - -def collect_gateway_logs(cluster, log_dir: Path, start_time: datetime, gateway): - """Collect logs from Gateway pods""" - try: - gateway_name = gateway.name() - gateway_namespace = gateway.namespace() - except Exception as e: - print(f" [FAIL] Could not get gateway name/namespace: {e}") - return +from testsuite.config import settings - # Try primary label selector for Gateway API - label_selector = {"gateway.networking.k8s.io/gateway-name": gateway_name} +logger = logging.getLogger(__name__) - # Check if pods exist with primary selector - try: - with cluster.context: - pods = selector("pod", labels=label_selector) - if pods.count_existing() == 0: - # Fallback to Istio label - label_selector = {"istio.io/gateway-name": gateway_name} - except Exception: - pass - collect_pod_logs( - cluster=cluster, - namespace=gateway_namespace, - label_selector=label_selector, - log_dir=log_dir, - start_time=start_time, - component_name="gateway", - ) +class LogCollector: + """Generic log collector for Kubernetes pods.""" + + def __init__(self, cluster, log_dir: Path, since_time: str): + """ + Initialize the log collector. + + cluster: Kubernetes cluster client + log_dir: Directory to save logs to + since_time: ISO 8601 timestamp string (e.g., "2025-10-23T10:30:00Z") + """ + self.cluster = cluster + self.log_dir = log_dir + self.since_time = since_time + + def collect_logs( + self, namespace: str, label_selector: dict, component_name: str, container_name: Optional[str] = None + ): + """ + Collect logs from pods matching the given criteria. + + namespace: Kubernetes namespace to search in + label_selector: Dictionary of labels to match pods (e.g., {"app": "myapp"}) + component_name: Name to use in log filenames + container_name: Optional specific container name. If None, collects from all containers. + """ + try: + target_cluster = self.cluster.change_project(namespace) + with target_cluster.context: + pods = selector("pod", labels=label_selector) + + if pods.count_existing() == 0: + logger.warning( + "No %s pods found with labels %s in namespace %s", component_name, label_selector, namespace + ) + return + + for pod in pods.objects(): + pod_name = pod.name() + try: + containers = [container_name] if container_name else [c.name for c in pod.model.spec.containers] + for container in containers: + self._collect_container_logs(pod_name, container, namespace, component_name) + except (AttributeError, KeyError) as e: + error_file = self.log_dir / f"{component_name}-{pod_name}-error.txt" + error_file.write_text(f"Failed to process pod: {e}") + logger.error("Failed to process pod %s: %s", pod_name, e) + + except OpenShiftPythonException as e: + logger.error("Failed to access %s pods in namespace %s: %s", component_name, namespace, e) + + def _collect_container_logs(self, pod_name: str, container: str, namespace: str, component_name: str): + """Collect logs from a single container.""" + try: + logs = self._fetch_pod_logs(pod_name, container, namespace) + log_file = self.log_dir / f"{component_name}-{pod_name}-{container}.log" + self._save_log_file(log_file, component_name, pod_name, container, namespace, logs) + logger.info("Collected %s logs: %s/%s", component_name, pod_name, container) + except subprocess.TimeoutExpired: + error_file = self.log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text("Timeout while collecting logs") + logger.error("Timeout collecting logs from %s/%s", pod_name, container) + except (OSError, IOError) as e: + error_file = self.log_dir / f"{component_name}-{pod_name}-{container}-error.txt" + error_file.write_text(f"Failed to collect logs: {e}") + logger.error("Failed to get logs from %s/%s: %s", pod_name, container, e) + + def _fetch_pod_logs(self, pod_name: str, container: str, namespace: str) -> str: + """Fetch logs from a specific pod container using oc logs command.""" + result = subprocess.run( + [ + "oc", + "logs", + f"pod/{pod_name}", + "-c", + container, + "-n", + namespace, + f"--since-time={self.since_time}", + "--timestamps", + ], + capture_output=True, + text=True, + timeout=30, + check=False, + ) + return result.stdout if result.returncode == 0 else result.stderr + + def _save_log_file( + self, log_file: Path, component_name: str, pod_name: str, container: str, namespace: str, logs: str + ): + """Save collected logs to a file with metadata header.""" + with open(log_file, "w", encoding="utf-8") as f: + f.write(f"# Component: {component_name}\n") + f.write(f"# Pod: {pod_name}\n") + f.write(f"# Container: {container}\n") + f.write(f"# Namespace: {namespace}\n") + f.write(f"# Logs since: {self.since_time}\n") + f.write(f"# {'=' * 70}\n\n") + f.write(logs) def get_log_components(item): @@ -276,13 +159,72 @@ def get_log_components(item): if hasattr(plugin, "__name__") and "conftest" in getattr(plugin, "__name__", ""): if hasattr(plugin, "log_components"): return set(plugin.log_components) - except Exception: + except Exception: # pylint: disable=broad-exception-caught pass # Default: no logging return set() +def _collect_component_logs(collector: LogCollector, component: str, system_namespace: str, gateway=None): + """ + Collect logs for a specific component using the generic LogCollector. + + This function contains the knowledge of how to find each component's pods. + """ + if component == "gateway" and gateway: + try: + logger.info("Collecting Gateway logs...") + gateway_name = gateway.name() + gateway_namespace = gateway.namespace() + collector.collect_logs( + gateway_namespace, {"gateway.networking.k8s.io/gateway-name": gateway_name}, "gateway" + ) + except (AttributeError, KeyError, OpenShiftPythonException) as e: + logger.warning("Could not collect Gateway logs: %s", e) + return + + # Component configurations mapping + if component == "authorino": + namespace = system_namespace + labels = {"authorino-resource": "authorino"} + name = "authorino" + container = None + elif component == "limitador": + namespace = system_namespace + labels = {"app": "limitador"} + name = "limitador" + container = None + elif component == "dns-operator": + namespace = system_namespace + labels = {"control-plane": "dns-operator-controller-manager"} + name = "dns-operator" + container = "manager" + elif component == "authorino-operator": + namespace = system_namespace + labels = {"control-plane": "authorino-operator"} + name = "authorino-operator" + container = "manager" + elif component == "limitador-operator": + namespace = system_namespace + labels = {"app": "limitador-operator"} + name = "limitador-operator" + container = "manager" + elif component == "kuadrant-operator": + namespace = system_namespace + labels = {"control-plane": "kuadrant-operator"} + name = "kuadrant-operator" + container = "manager" + else: + return + + try: + logger.info("Collecting %s logs...", component) + collector.collect_logs(namespace, labels, name, container) + except (OpenShiftPythonException, OSError, IOError, KeyError) as e: + logger.warning("Could not collect %s logs: %s", component, e) + + def collect_failure_artifacts(item, cluster, start_time: datetime): """ Collect logs from configured components when a test fails. @@ -296,120 +238,26 @@ def collect_failure_artifacts(item, cluster, start_time: datetime): # Determine which components to collect logs from enabled_components = get_log_components(item) if not enabled_components: - logger.info(f"No log components configured for {test_name} - skipping log collection") + logger.info("No log components configured for %s - skipping log collection", test_name) return # Create log directory log_dir = Path("test-failures") / worker_id / test_name log_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Collecting logs for failed test: {test_name}") - logger.info(f"Worker: {worker_id}") - logger.info(f"Log directory: {log_dir}") - logger.info(f"Configured components: {', '.join(sorted(enabled_components))}") - - # Collect logs from available components - # Check which fixtures are available in the test - - def should_collect(component_name): - """Check if we should collect logs for this component""" - return component_name in enabled_components - - # Authorino - if should_collect("authorino"): - try: - logger.info("Collecting Authorino logs...") - collect_pod_logs( - cluster=cluster, - namespace=settings["service_protection"]["system_project"], - label_selector={"authorino-resource": "authorino"}, - log_dir=log_dir, - start_time=start_time, - component_name="authorino", - ) - except Exception as e: - logger.warning(f"Could not collect Authorino logs: {e}") - - # Limitador - if should_collect("limitador"): - try: - logger.info("Collecting Limitador logs...") - collect_pod_logs( - cluster=cluster, - namespace=settings["service_protection"]["system_project"], - label_selector={"app": "limitador"}, - log_dir=log_dir, - start_time=start_time, - component_name="limitador", - ) - except Exception as e: - logger.warning(f"Could not collect Limitador logs: {e}") + logger.info("Collecting logs for failed test: %s", test_name) + logger.info("Worker: %s", worker_id) + logger.info("Log directory: %s", log_dir) + logger.info("Configured components: %s", ", ".join(sorted(enabled_components))) - # DNS Operator - if should_collect("dns-operator"): - try: - logger.info("Collecting DNS Operator logs...") - collect_pod_logs( - cluster=cluster, - namespace=settings["service_protection"]["system_project"], - label_selector={"control-plane": "dns-operator-controller-manager"}, - log_dir=log_dir, - start_time=start_time, - component_name="dns-operator", - ) - except Exception as e: - logger.warning(f"Could not collect DNS Operator logs: {e}") - - # Authorino Operator - if should_collect("authorino-operator"): - try: - logger.info("Collecting Authorino Operator logs...") - collect_pod_logs( - cluster=cluster, - namespace=settings["service_protection"]["system_project"], - label_selector={"control-plane": "authorino-operator"}, - log_dir=log_dir, - start_time=start_time, - component_name="authorino-operator", - ) - except Exception as e: - logger.warning(f"Could not collect Authorino Operator logs: {e}") - - - # Limitador Operator - if should_collect("limitador-operator"): - try: - logger.info("Collecting Limitador Operator logs...") - collect_pod_logs( - cluster=cluster, - namespace="kuadrant-system", - label_selector={"app": "limitador-operator"}, - log_dir=log_dir, - start_time=start_time, - component_name="limitador-operator", - ) - except Exception as e: - logger.warning(f"Could not collect Limitador Operator logs: {e}") + # Create generic collector + since_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + collector = LogCollector(cluster, log_dir, since_time) + system_namespace = settings["service_protection"]["system_project"] - # Kuadrant Operator - if should_collect("kuadrant-operator"): - try: - logger.info("Collecting Kuadrant Operator logs...") - collect_kuadrant_operator_logs(cluster, log_dir, start_time) - except Exception as e: - logger.warning(f"Could not collect Kuadrant Operator logs: {e}") + # Collect logs for each enabled component + gateway = item.funcargs.get("gateway") + for component in enabled_components: + _collect_component_logs(collector, component, system_namespace, gateway) - # Gateway - if should_collect("gateway"): - try: - logger.info("Collecting Gateway logs...") - gateway = item.funcargs.get("gateway") - if gateway: - gateway_name = gateway.name() - gateway_namespace = gateway.namespace() - - collect_pod_logs(cluster, gateway_namespace, {"gateway.networking.k8s.io/gateway-name": gateway_name}, log_dir, start_time, "gateway") - except Exception as e: - logger.warning(f"Could not collect Gateway logs: {e}") - - logger.info(f"Log collection complete. Logs saved to: {log_dir}") + logger.info("Log collection complete. Logs saved to: %s", log_dir) diff --git a/testsuite/tests/singlecluster/limitador/conftest.py b/testsuite/tests/singlecluster/limitador/conftest.py index 82983380..7bf8891a 100644 --- a/testsuite/tests/singlecluster/limitador/conftest.py +++ b/testsuite/tests/singlecluster/limitador/conftest.py @@ -7,6 +7,7 @@ # If not specified, all available components will be logged log_components = ["limitador", "limitador-operator", "gateway"] + @pytest.fixture(scope="session") def limitador(kuadrant): """Returns Limitador CR"""