Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 229 additions & 0 deletions tests/image/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# Copyright (c) 2025-2026 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
# mypy: disable-error-code="misc,no-any-return"
from __future__ import annotations

import tempfile
from typing import Callable

import numpy as np
import pytest

from trtutils.image.preprocessors import (
CPUPreprocessor,
CUDAPreprocessor,
TRTPreprocessor,
)

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
PREPROC_SIZE = (640, 640)
PREPROC_RANGE = (0.0, 1.0)
PREPROC_DTYPE = np.dtype(np.float32)
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

# Tolerance for CPU/GPU parity
CUDA_MAG_BOUNDS = 0.01


# ---------------------------------------------------------------------------
# Build support detection
# ---------------------------------------------------------------------------
@pytest.fixture(scope="session")
def _trt_build_supported() -> bool:
"""Check if TRT can build engines on this hardware (session-cached)."""
try:
from pathlib import Path

from trtutils.builder._build import build_engine

onnx_path = Path(__file__).parent.parent.parent / "data" / "simple.onnx"
if not onnx_path.exists():
return False
with tempfile.NamedTemporaryFile(suffix=".engine", delete=True) as f:
build_engine(onnx_path, f.name, optimization_level=1)
return True
except RuntimeError:
return False
except Exception:
return False


# ---------------------------------------------------------------------------
# Parametrized fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(params=["cpu", "cuda", "trt"])
def preprocessor_type(request: pytest.FixtureRequest) -> str:
"""Provide preprocessor type identifiers."""
return request.param


@pytest.fixture(params=["linear", "letterbox"])
def resize_method(request: pytest.FixtureRequest) -> str:
"""Provide resize method identifiers."""
return request.param


# ---------------------------------------------------------------------------
# Preprocessor factory
# ---------------------------------------------------------------------------
@pytest.fixture
def make_preprocessor(
_trt_build_supported: bool,
) -> Callable[..., CPUPreprocessor | CUDAPreprocessor | TRTPreprocessor]:
"""Return a factory that builds preprocessors by type."""

def _make(
ptype: str,
*,
mean: tuple[float, float, float] | None = None,
std: tuple[float, float, float] | None = None,
batch_size: int = 4,
) -> CPUPreprocessor | CUDAPreprocessor | TRTPreprocessor:
if ptype == "cpu":
return CPUPreprocessor(PREPROC_SIZE, PREPROC_RANGE, PREPROC_DTYPE, mean=mean, std=std)
if ptype == "cuda":
return CUDAPreprocessor(PREPROC_SIZE, PREPROC_RANGE, PREPROC_DTYPE, mean=mean, std=std)
if ptype == "trt":
if not _trt_build_supported:
pytest.skip("TRT cannot build engines for this GPU")
return TRTPreprocessor(
PREPROC_SIZE, PREPROC_RANGE, PREPROC_DTYPE, mean=mean, std=std, batch_size=batch_size
)
err_msg = f"Unknown preprocessor type: {ptype}"
raise ValueError(err_msg)

return _make


# ---------------------------------------------------------------------------
# Output mock generators
# ---------------------------------------------------------------------------
@pytest.fixture
def make_yolov10_output() -> Callable[[int, int], list[np.ndarray]]:
"""Return a factory for YOLOv10-like outputs."""

def _make(batch_size: int, num_dets: int = 10) -> list[np.ndarray]:
output = np.zeros((batch_size, 300, 6), dtype=np.float32)
for b in range(batch_size):
for i in range(num_dets):
offset = b * 50
output[b, i] = [
100 + i * 10 + offset,
100 + i * 10 + offset,
200 + i * 10 + offset,
200 + i * 10 + offset,
0.9 - i * 0.05,
i % 10,
]
return [output]

return _make


@pytest.fixture
def make_efficient_nms_output() -> Callable[[int, int], list[np.ndarray]]:
"""Return a factory for EfficientNMS-like outputs."""

def _make(batch_size: int, num_dets: int = 10) -> list[np.ndarray]:
max_dets = 100
num_dets_arr = np.full((batch_size,), num_dets, dtype=np.int32)
bboxes = np.zeros((batch_size, max_dets, 4), dtype=np.float32)
scores = np.zeros((batch_size, max_dets), dtype=np.float32)
class_ids = np.zeros((batch_size, max_dets), dtype=np.float32)
for b in range(batch_size):
offset = b * 50
for i in range(num_dets):
bboxes[b, i] = [
100 + i * 10 + offset,
100 + i * 10 + offset,
200 + i * 10 + offset,
200 + i * 10 + offset,
]
scores[b, i] = 0.9 - i * 0.05
class_ids[b, i] = i % 10
return [num_dets_arr, bboxes, scores, class_ids]

return _make


@pytest.fixture
def make_rfdetr_output() -> Callable[[int, int, int, int], list[np.ndarray]]:
"""Return a factory for RF-DETR-like outputs."""

def _make(
batch_size: int, num_queries: int = 300, num_classes: int = 80, num_dets: int = 10
) -> list[np.ndarray]:
dets = np.zeros((batch_size, num_queries, 4), dtype=np.float32)
labels = np.full((batch_size, num_queries, num_classes), -10.0, dtype=np.float32)
for b in range(batch_size):
for i in range(num_dets):
cx = (150 + i * 10 + b * 30) / 640.0
cy = (150 + i * 10 + b * 30) / 640.0
w = 100 / 640.0
h = 100 / 640.0
dets[b, i] = [cx, cy, w, h]
class_idx = i % num_classes
labels[b, i, class_idx] = 5.0 - i * 0.3
return [dets, labels]

return _make


@pytest.fixture
def make_detr_output() -> Callable[[int, int, int], list[np.ndarray]]:
"""Return a factory for DETR-like outputs."""

def _make(batch_size: int, num_queries: int = 300, num_dets: int = 10) -> list[np.ndarray]:
scores = np.zeros((batch_size, num_queries), dtype=np.float32)
labels = np.zeros((batch_size, num_queries), dtype=np.float32)
boxes = np.zeros((batch_size, num_queries, 4), dtype=np.float32)
for b in range(batch_size):
offset = b * 50
for i in range(num_dets):
scores[b, i] = 0.9 - i * 0.05
labels[b, i] = i % 10
boxes[b, i] = [
100 + i * 10 + offset,
100 + i * 10 + offset,
200 + i * 10 + offset,
200 + i * 10 + offset,
]
return [scores, labels, boxes]

return _make


@pytest.fixture
def make_classification_output() -> Callable[[int, int], list[np.ndarray]]:
"""Return a factory for classification outputs."""
rng = np.random.default_rng()

def _make(batch_size: int, num_classes: int = 1000) -> list[np.ndarray]:
output = rng.standard_normal((batch_size, num_classes)).astype(np.float32)
for b in range(batch_size):
output[b, b % num_classes] = 10.0
output[b, (b + 1) % num_classes] = 8.0
return [output]

return _make


# ---------------------------------------------------------------------------
# Ratios/padding factory
# ---------------------------------------------------------------------------
@pytest.fixture
def make_ratios_padding() -> Callable[
[int], tuple[list[tuple[float, float]], list[tuple[float, float]]]
]:
"""Return a factory for ratios and padding lists."""

def _make(batch_size: int) -> tuple[list[tuple[float, float]], list[tuple[float, float]]]:
ratios = [(1.0, 1.0) for _ in range(batch_size)]
padding = [(0.0, 0.0) for _ in range(batch_size)]
return ratios, padding

return _make
16 changes: 16 additions & 0 deletions tests/image/kernels/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2025-2026 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations

import pytest


@pytest.fixture
def cuda_stream():
"""Create and destroy a CUDA stream for kernel tests."""
from trtutils.core import create_stream, destroy_stream

stream = create_stream()
yield stream
destroy_stream(stream)
143 changes: 143 additions & 0 deletions tests/image/kernels/test_letterbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Copyright (c) 2026 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
# mypy: disable-error-code="misc"
"""Tests for the letterbox resize CUDA kernel."""

from __future__ import annotations

import math
from pathlib import Path

import cv2
import numpy as np
import pytest

from trtutils.core import (
Kernel,
create_binding,
create_stream,
destroy_stream,
memcpy_device_to_host_async,
memcpy_host_to_device_async,
stream_synchronize,
)
from trtutils.image import kernels

try:
from cv2ext.image import letterbox as cv2ext_letterbox # type: ignore[import-untyped]

_CV2EXT_AVAILABLE = True
except ImportError:
cv2ext_letterbox = None # type: ignore[assignment]
_CV2EXT_AVAILABLE = False

_DATA_DIR = Path(__file__).parent.parent.parent.parent / "data"
_HORSE_IMAGE_PATH = _DATA_DIR / "horse.jpg"


def _run_letterbox_kernel(
img: np.ndarray,
output_shape: tuple[int, int],
) -> np.ndarray:
"""Run the letterbox kernel and return result."""
o_width, o_height = output_shape
height, width = img.shape[:2]

stream = create_stream()

num_threads: tuple[int, int, int] = (32, 32, 1)
num_blocks: tuple[int, int, int] = (
math.ceil(o_width / num_threads[1]),
math.ceil(o_height / num_threads[0]),
1,
)

input_binding = create_binding(img, is_input=True)
dummy_output = np.zeros((o_height, o_width, 3), dtype=np.uint8)
output_binding = create_binding(dummy_output, pagelocked_mem=True)

scale_x = o_width / width
scale_y = o_height / height
scale = min(scale_x, scale_y)
new_width = int(width * scale)
new_height = int(height * scale)
pad_x = int((o_width - new_width) / 2)
pad_y = int((o_height - new_height) / 2)

kernel = Kernel(kernels.LETTERBOX_RESIZE[0], kernels.LETTERBOX_RESIZE[1])
args = kernel.create_args(
input_binding.allocation,
output_binding.allocation,
width,
height,
o_width,
o_height,
pad_x,
pad_y,
new_width,
new_height,
)

memcpy_host_to_device_async(input_binding.allocation, img, stream)
kernel.call(num_blocks, num_threads, stream, args)
memcpy_device_to_host_async(output_binding.host_allocation, output_binding.allocation, stream)
stream_synchronize(stream)

result = output_binding.host_allocation.copy()

destroy_stream(stream)
input_binding.free()
output_binding.free()
kernel.free()

return result


class TestLetterboxKernel:
"""Tests for the letterbox CUDA kernel."""

def test_compiles(self) -> None:
"""Letterbox kernel compiles without error."""
stream = create_stream()
compiled = Kernel(kernels.LETTERBOX_RESIZE[0], kernels.LETTERBOX_RESIZE[1])
assert compiled is not None
destroy_stream(stream)

@pytest.mark.skipif(not _CV2EXT_AVAILABLE, reason="cv2ext not installed")
def test_correctness_against_cv2ext(self) -> None:
"""GPU letterbox result matches cv2ext.letterbox()."""
if not _HORSE_IMAGE_PATH.exists():
pytest.skip("Horse test image not found")
img = cv2.imread(str(_HORSE_IMAGE_PATH))
if img is None:
pytest.skip("Failed to read test image")

output_shape = (640, 480)
assert cv2ext_letterbox is not None
resized_img, _, _ = cv2ext_letterbox(img, output_shape) # type: ignore[misc]
cuda_result = _run_letterbox_kernel(img, output_shape)

assert cuda_result.shape == resized_img.shape
cpu_mean = np.mean(resized_img)
assert cpu_mean - 0.5 <= np.mean(cuda_result) <= cpu_mean + 0.5
diff_mask = np.any(resized_img != cuda_result, axis=-1)
avg_diff = np.mean(np.abs(resized_img[diff_mask] - cuda_result[diff_mask]))
assert avg_diff < 1.0

@pytest.mark.parametrize(
"output_shape",
[(640, 640), (416, 416), (320, 320)],
ids=["640x640", "416x416", "320x320"],
)
def test_various_target_sizes(self, output_shape: tuple[int, int]) -> None:
"""Letterbox kernel works with various target sizes."""
if not _HORSE_IMAGE_PATH.exists():
pytest.skip("Horse test image not found")
img = cv2.imread(str(_HORSE_IMAGE_PATH))
if img is None:
pytest.skip("Failed to read test image")

o_width, o_height = output_shape
result = _run_letterbox_kernel(img, output_shape)
assert result.shape == (o_height, o_width, 3)
Loading
Loading