From bdb262ccbb176aaee022768845efcc854fdbe5c7 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Mon, 16 Feb 2026 21:51:26 +0000
Subject: [PATCH 1/5] add python script to create multi-res tissue masks via
 HSV filtering

---
 README.md                       |  42 ++
 scripts/generate_tissue_mask.py | 981 ++++++++++++++++++++++++++++++++
 2 files changed, 1023 insertions(+)
 create mode 100644 scripts/generate_tissue_mask.py

diff --git a/README.md b/README.md
index f128f52..4993bb9 100644
--- a/README.md
+++ b/README.md
@@ -113,3 +113,45 @@ These visualizations are useful for double-checking that the tiling or sampling
 ### Process summary
 
 - **`process_list.csv`**: a summary file listing each processed slide, indicating whether processing was successful or failed. If a failure occurred, the traceback is provided to help diagnose the issue.
+
+## Standalone tissue segmentator
+
+For quick mask generation outside the full pipeline, use the standalone script:
+
+```shell
+# Single slide
+python scripts/generate_tissue_mask.py \
+    --wsi /path/to/slide.tif \
+    --output /path/to/tissue-mask-pyramid.tif \
+    --spacing 4.0 \
+    --tolerance 0.1
+
+# Multiple slides
+python scripts/generate_tissue_mask.py \
+    --wsi /path/to/slide_dir/*.tif \
+    --output-dir /path/to/output_dir \
+    --spacing 4.0 \
+    --tolerance 0.1
+```
+
+This script:
+- reads the WSI with `wholeslidedata` (OpenSlide backend by default),
+- computes a binary tissue mask using HSV thresholding (`0=background`, `1=tissue`),
+- writes a pyramidal TIFF mask where each level is downsampled from the previous one,
+- prints a final recap of how many slides succeeded, skipped, and failed.
+
+Useful options:
+- `--backend` to switch the wholeslidedata backend (default: `openslide`)
+- `--output` for single-slide mode and `--output-dir` for multi-slide mode
+- `--num-workers` to control parallelism
+- `--no-cache` to disable cache-based skipping and force recomputation
+- `--tolerance` to control how much a natural spacing can deviate from target spacing when selecting the best level for reading the whole slide
+- `--min-component-area-um2` to remove tiny tissue blobs
+- `--min-hole-area-um2` to fill small holes inside tissue
+- `--gaussian-sigma-um` to apply optional pre-threshold Gaussian smoothing
+- `--open-radius-um` / `--close-radius-um` for spacing-aware morphological smoothing
+- `--spacing-at-level-0` to override level-0 spacing when metadata is incorrect
+- `--compression` and `--tile-size` to tune TIFF output
+
+The summary file is saved as `summary.csv` in `--output-dir` (multi-slide mode) or next to `--output` (single-slide mode).
+The cache manifest used for skip inference is saved as `cache_manifest.json` in the same directory.
diff --git a/scripts/generate_tissue_mask.py b/scripts/generate_tissue_mask.py
new file mode 100644
index 0000000..220d8a7
--- /dev/null
+++ b/scripts/generate_tissue_mask.py
@@ -0,0 +1,981 @@
+from __future__ import annotations
+
+import argparse
+import csv
+import glob
+import hashlib
+import json
+import multiprocessing as mp
+import os
+import traceback
+from pathlib import Path
+
+import cv2
+import numpy as np
+import tifffile
+from PIL import Image
+from tqdm import tqdm
+
+import wholeslidedata as wsd
+
+
+HSV_LOWER = np.array([90, 8, 103], dtype=np.uint8)
+HSV_UPPER = np.array([180, 255, 255], dtype=np.uint8)
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Compute a tissue mask using HSV thresholding from a whole-slide image "
+            "and save it as a pyramidal TIFF."
+        )
+    )
+    parser.add_argument(
+        "--wsi",
+        type=str,
+        nargs="+",
+        required=True,
+        help=(
+            "Input whole-slide image path(s) or glob pattern(s). "
+            "Examples: /path/slide.tif or '/path/slides/*.tif'"
+        ),
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help="Output pyramidal TIFF path for single-slide mode.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=None,
+        help=(
+            "Output directory for multi-slide mode. Each output is written as "
+            "{output-dir}/{slide_stem}.tif"
+        ),
+    )
+    parser.add_argument(
+        "--spacing",
+        type=float,
+        required=True,
+        help="Requested level-0 spacing for the output mask in microns per pixel.",
+    )
+    parser.add_argument(
+        "--tolerance",
+        type=float,
+        required=True,
+        help="Tolerance deciding how much a natural spacing can deviate from target spacing when selecting the best level for reading (expressed as a fraction of the target spacing, e.g. 0.1 for 10%).",
+    )
+    parser.add_argument(
+        "--downsample-per-level",
+        type=float,
+        default=2.0,
+        help="Pyramid downsample factor between successive levels (must be > 1).",
+    )
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default="asap",
+        help="WholeSlideData backend for reading the WSI.",
+    )
+    parser.add_argument(
+        "--spacing-at-level-0",
+        type=float,
+        default=None,
+        help="Optional override for the input WSI level-0 spacing in microns per pixel.",
+    )
+    parser.add_argument(
+        "--min-component-area-um2",
+        type=float,
+        default=2000.0,
+        help=(
+            "Remove tissue connected components smaller than this area in square microns "
+            "(default: 2000)."
+        ),
+    )
+    parser.add_argument(
+        "--min-hole-area-um2",
+        type=float,
+        default=5000.0,
+        help=(
+            "Fill holes inside tissue up to this area in square microns "
+            "(default: 5000)."
+        ),
+    )
+    parser.add_argument(
+        "--open-radius-um",
+        type=float,
+        default=1.0,
+        help="Morphological opening radius in microns (default: 1.0).",
+    )
+    parser.add_argument(
+        "--gaussian-sigma-um",
+        type=float,
+        default=1,
+        help="Optional Gaussian blur sigma in microns before HSV thresholding (default: 0 disables).",
+    )
+    parser.add_argument(
+        "--close-radius-um",
+        type=float,
+        default=10.0,
+        help="Morphological closing radius in microns (default: 10.0).",
+    )
+    parser.add_argument(
+        "--compression",
+        type=str,
+        default="deflate",
+        help="TIFF compression codec for output mask.",
+    )
+    parser.add_argument(
+        "--tile-size",
+        type=int,
+        default=512,
+        help="Tile size for writing TIFF levels when dimensions allow tiling.",
+    )
+    parser.add_argument(
+        "--min-size",
+        type=int,
+        default=None,
+        help="Stop pyramid generation before creating a level with min(height, width) < min-size.",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Print per-level pyramid details and output path.",
+    )
+    parser.add_argument(
+        "--no-cache",
+        action="store_true",
+        help="Disable cache-based skipping and force recomputation for all slides.",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=max(1, min(4, os.cpu_count() or 1)),
+        help="Number of worker processes for slide processing (default: min(4, CPU count)).",
+    )
+    return parser.parse_args()
+
+
+def get_downsamples(wsi: wsd.WholeSlideImage) -> list[tuple[float, float]]:
+    """
+    Calculate the downsample factors for each level of the image pyramid.
+
+    This method computes the downsample factors for each level in the image
+    pyramid relative to the base level (level 0). The downsample factor for
+    each level is represented as a tuple of two values, corresponding to the
+    downsampling in the width and height dimensions.
+
+    Returns:
+        list of tuple: A list of tuples where each tuple contains two float
+        values representing the downsample factors (width_factor, height_factor)
+        for each level relative to the base level.
+    """
+    level_downsamples = []
+    dim_0 = wsi.shapes[0]
+    for dim in wsi.shapes:
+        level_downsample = (dim_0[0] / float(dim[0]), dim_0[1] / float(dim[1]))
+        level_downsamples.append(level_downsample)
+    return level_downsamples
+
+
+def get_best_level_for_downsample_custom(downsamples: list[tuple[int, int]], target_downsample: int):
+    """
+    Determines the best level for a given downsample factor based on the available
+    level downsample values.
+
+    Args:
+        downsample (float): Target downsample factor.
+
+    Returns:
+        int: Index of the best matching level for the given downsample factor.
+    """
+    level = int(np.argmin([abs(x - target_downsample) for x, _ in downsamples]))
+    return level
+
+
+def get_best_level_for_spacing(
+    wsi: wsd.WholeSlideImage, target_spacing: float, tolerance: float
+):
+    """
+    Determines the best level in a multi-resolution image pyramid for a given target spacing.
+
+    Ensures that the spacing of the returned level is either within the specified tolerance of the target
+    spacing or smaller than the target spacing to avoid upsampling.
+
+    Args:
+        target_spacing (float): Desired spacing.
+        tolerance (float, optional): Tolerance for matching the spacing, deciding how much
+            spacing can deviate from those specified in the slide metadata.
+
+    Returns:
+        level (int): Index of the best matching level in the image pyramid.
+    """
+    spacing_at_0 = wsi.spacings[0]
+    target_downsample = target_spacing / spacing_at_0
+    level_downsamples = get_downsamples(wsi)
+    level = get_best_level_for_downsample_custom(level_downsamples, target_downsample)
+    level_spacing = wsi.spacings[level]
+
+    # check if the level_spacing is within the tolerance of the target_spacing
+    is_within_tolerance = False
+    if abs(level_spacing - target_spacing) / target_spacing <= tolerance:
+        is_within_tolerance = True
+        return level, is_within_tolerance
+
+    # otherwise, look for a spacing smaller than or equal to the target_spacing
+    else:
+        while level > 0 and level_spacing > target_spacing:
+            level -= 1
+            level_spacing = wsi.spacings[level]
+            if abs(level_spacing - target_spacing) / target_spacing <= tolerance:
+                is_within_tolerance = True
+                break
+
+    assert (
+        level_spacing <= target_spacing
+        or abs(level_spacing - target_spacing) / target_spacing <= tolerance
+    ), f"Unable to find a spacing less than or equal to the target spacing ({target_spacing}) or within {int(tolerance * 100)}% of the target spacing."
+    return level, is_within_tolerance
+
+
+def load_wsi_at_spacing(
+    *,
+    wsi_path: Path,
+    target_spacing: float,
+    tolerance: float,
+    backend: str,
+    spacing_at_level_0: float | None = None,
+) -> tuple[np.ndarray, float]:
+
+    wsi = wsd.WholeSlideImage(wsi_path, backend=backend)
+    base_spacings = list(wsi.spacings)
+
+    if spacing_at_level_0 is None:
+        spacings = base_spacings
+    else:
+        spacings = [spacing_at_level_0 * sp / base_spacings[0] for sp in base_spacings]
+
+    level, is_within_tolerance = get_best_level_for_spacing(wsi, target_spacing, tolerance)
+    wsi_arr = wsi.get_slide(spacing=spacings[level])
+
+    if not is_within_tolerance:
+        # means the selected level's spacing is smaller than the target_spacing
+        # find power of 2 downsample factor to apply to the selected level to get as close as possible to the target_spacing
+        # we want the end spacing to be within the tolerance of the target_spacing
+        power = int(np.ceil(np.log2(target_spacing / spacings[level])))
+        final_spacing = spacings[level] * (2**power)
+        if abs(final_spacing - target_spacing) / target_spacing > tolerance:
+            raise ValueError(
+                f"Unable to achieve target spacing within tolerance after downsampling. Closest achievable spacing is {final_spacing:.2f} mpp, which is {abs(final_spacing - target_spacing) / target_spacing:.2%} away from the target spacing. Consider increasing the tolerance or adjusting the target spacing."
+            )
+        wsi_arr = cv2.resize(
+            wsi_arr,
+            dsize=None,
+            fx=final_spacing / spacings[level],
+            fy=final_spacing / spacings[level],
+            interpolation=cv2.INTER_AREA,
+        )
+        effective_spacing = final_spacing
+    else:
+        effective_spacing = spacings[level]
+
+    return wsi_arr, effective_spacing
+
+
+def segment_tissue_hsv(
+    wsi_arr: np.ndarray,
+    lower: np.ndarray = HSV_LOWER,
+    upper: np.ndarray = HSV_UPPER,
+    gaussian_sigma_px: float = 0.0,
+) -> np.ndarray:
+    if gaussian_sigma_px > 0:
+        wsi_arr = cv2.GaussianBlur(wsi_arr, (0, 0), sigmaX=gaussian_sigma_px, sigmaY=gaussian_sigma_px)
+
+    img_hsv = cv2.cvtColor(wsi_arr, cv2.COLOR_RGB2HSV)
+    mask = (cv2.inRange(img_hsv, lower, upper) > 0).astype(np.uint8)
+
+    return mask
+
+
+def _area_um2_to_px(area_um2: float, spacing_um_per_px: float) -> int:
+    if area_um2 <= 0:
+        return 0
+    px_area = area_um2 / (spacing_um_per_px**2)
+    return max(1, int(round(px_area)))
+
+
+def _radius_um_to_px(radius_um: float, spacing_um_per_px: float) -> int:
+    if radius_um <= 0:
+        return 0
+    px_radius = radius_um / spacing_um_per_px
+    return max(1, int(round(px_radius)))
+
+
+def _sigma_um_to_px(sigma_um: float, spacing_um_per_px: float) -> float:
+    if sigma_um <= 0:
+        return 0.0
+    return float(sigma_um / spacing_um_per_px)
+
+
+def _remove_small_tissue_components(mask: np.ndarray, min_area_px: int) -> np.ndarray:
+    if min_area_px <= 0:
+        return mask
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask.astype(np.uint8), connectivity=8)
+    if num_labels <= 1:
+        return mask
+    keep = np.zeros_like(mask, dtype=np.uint8)
+    for label in range(1, num_labels):
+        area = int(stats[label, cv2.CC_STAT_AREA])
+        if area >= min_area_px:
+            keep[labels == label] = 1
+    return keep
+
+
+def _fill_small_holes(mask: np.ndarray, max_hole_area_px: int) -> np.ndarray:
+    if max_hole_area_px <= 0:
+        return mask
+
+    h, w = mask.shape[:2]
+    inv = (1 - mask).astype(np.uint8)
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(inv, connectivity=8)
+    if num_labels <= 1:
+        return mask
+
+    filled = mask.copy().astype(np.uint8)
+    for label in range(1, num_labels):
+        x = int(stats[label, cv2.CC_STAT_LEFT])
+        y = int(stats[label, cv2.CC_STAT_TOP])
+        bw = int(stats[label, cv2.CC_STAT_WIDTH])
+        bh = int(stats[label, cv2.CC_STAT_HEIGHT])
+        area = int(stats[label, cv2.CC_STAT_AREA])
+
+        touches_border = (x == 0) or (y == 0) or (x + bw >= w) or (y + bh >= h)
+        if touches_border:
+            continue
+        if area <= max_hole_area_px:
+            filled[labels == label] = 1
+
+    return filled
+
+
+def postprocess_mask(
+    *,
+    mask: np.ndarray,
+    spacing_um_per_px: float,
+    min_component_area_um2: float = 0.0,
+    min_hole_area_um2: float = 0.0,
+    open_radius_um: float = 0.0,
+    close_radius_um: float = 0.0,
+) -> np.ndarray:
+    processed = mask.astype(np.uint8)
+
+    min_component_area_px = _area_um2_to_px(min_component_area_um2, spacing_um_per_px)
+    max_hole_area_px = _area_um2_to_px(min_hole_area_um2, spacing_um_per_px)
+    open_radius_px = _radius_um_to_px(open_radius_um, spacing_um_per_px)
+    close_radius_px = _radius_um_to_px(close_radius_um, spacing_um_per_px)
+
+    processed = _remove_small_tissue_components(processed, min_component_area_px)
+    processed = _fill_small_holes(processed, max_hole_area_px)
+
+    if close_radius_px > 0:
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (2 * close_radius_px + 1, 2 * close_radius_px + 1),
+        )
+        processed = cv2.morphologyEx(processed, cv2.MORPH_CLOSE, kernel)
+
+    if open_radius_px > 0:
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (2 * open_radius_px + 1, 2 * open_radius_px + 1),
+        )
+        processed = cv2.morphologyEx(processed, cv2.MORPH_OPEN, kernel)
+
+    return (processed > 0).astype(np.uint8)
+
+
+def build_mask_pyramid(
+    level0_mask: np.ndarray,
+    downsample_per_level: float,
+    min_size: int,
+) -> list[np.ndarray]:
+    if downsample_per_level <= 1:
+        raise ValueError("downsample_per_level must be > 1")
+    if min_size < 1:
+        raise ValueError("min_size must be >= 1")
+
+    levels = [level0_mask.astype(np.uint8)]
+    while True:
+        previous = levels[-1]
+        prev_h, prev_w = previous.shape[:2]
+        next_w = max(1, int(round(prev_w / downsample_per_level)))
+        next_h = max(1, int(round(prev_h / downsample_per_level)))
+        if min(next_h, next_w) < min_size:
+            break
+        next_level = cv2.resize(previous, (next_w, next_h), interpolation=cv2.INTER_NEAREST)
+        levels.append(next_level)
+
+    return levels
+
+
+def _spacing_to_pixels_per_cm(spacing_um_per_px: float) -> float:
+    return 10000.0 / spacing_um_per_px
+
+
+def resolve_wsi_paths(wsi_inputs: list[str]) -> list[Path]:
+    resolved: list[Path] = []
+    seen: set[Path] = set()
+
+    for raw_input in wsi_inputs:
+        matches = [Path(match) for match in glob.glob(raw_input, recursive=True)]
+        if not matches:
+            matches = [Path(raw_input)]
+
+        for candidate in sorted(matches):
+            if not candidate.is_file():
+                continue
+            key = candidate.resolve()
+            if key in seen:
+                continue
+            seen.add(key)
+            resolved.append(candidate)
+
+    if not resolved:
+        raise ValueError("No input slides were found from --wsi paths/patterns")
+
+    return resolved
+
+
+def build_output_mapping(
+    *,
+    input_wsi_paths: list[Path],
+    output_path: Path | None,
+    output_dir: Path | None,
+) -> list[tuple[Path, Path]]:
+    if len(input_wsi_paths) == 1:
+        if output_path is not None and output_dir is not None:
+            raise ValueError("For single-slide mode, provide only one of --output or --output-dir")
+        if output_path is None and output_dir is None:
+            raise ValueError("For single-slide mode, provide --output or --output-dir")
+        slide_path = input_wsi_paths[0]
+        final_output = output_path if output_path is not None else output_dir / f"{slide_path.stem}.tif"
+        return [(slide_path, final_output)]
+
+    if output_path is not None:
+        raise ValueError("For multi-slide mode, use --output-dir (not --output)")
+    if output_dir is None:
+        raise ValueError("For multi-slide mode, --output-dir is required")
+
+    used_names: set[str] = set()
+    mapping: list[tuple[Path, Path]] = []
+    for slide_path in input_wsi_paths:
+        out_name = f"{slide_path.stem}.tif"
+        if out_name in used_names:
+            raise ValueError(
+                "Duplicate slide stem detected while using --output-dir: "
+                f"{out_name}. Rename files or process separately."
+            )
+        used_names.add(out_name)
+        mapping.append((slide_path, output_dir / out_name))
+    return mapping
+
+
+def write_pyramidal_mask_tiff(
+    *,
+    levels: list[np.ndarray],
+    output_path: Path,
+    level0_spacing: float,
+    downsample_per_level: float,
+    compression: str,
+    tile_size: int,
+) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    def _write_level(
+        tif: tifffile.TiffWriter,
+        level_arr: np.ndarray,
+        spacing: float,
+        *,
+        subifds: int | None = None,
+        subfiletype: int | None = None,
+    ) -> None:
+        height, width = level_arr.shape[:2]
+        write_kwargs = {
+            "data": level_arr,
+            "photometric": "minisblack",
+            "compression": compression,
+            "resolution": (_spacing_to_pixels_per_cm(spacing), _spacing_to_pixels_per_cm(spacing)),
+            "resolutionunit": "CENTIMETER",
+            "metadata": None,
+        }
+        if height >= tile_size and width >= tile_size:
+            write_kwargs["tile"] = (tile_size, tile_size)
+        if subifds is not None:
+            write_kwargs["subifds"] = subifds
+        if subfiletype is not None:
+            write_kwargs["subfiletype"] = subfiletype
+        tif.write(**write_kwargs)
+
+    with tifffile.TiffWriter(output_path, bigtiff=True) as tif:
+        _write_level(tif, levels[0], level0_spacing)
+
+        for idx, level in enumerate(levels[1:], start=1):
+            spacing = level0_spacing * (downsample_per_level ** idx)
+            _write_level(
+                tif,
+                level,
+                spacing,
+                subfiletype=1,
+            )
+
+
+def build_command_signature(
+    *,
+    spacing: float,
+    tolerance: float,
+    downsample_per_level: float,
+    backend: str,
+    spacing_at_level_0: float | None,
+    min_component_area_um2: float,
+    min_hole_area_um2: float,
+    gaussian_sigma_um: float,
+    open_radius_um: float,
+    close_radius_um: float,
+    compression: str,
+    tile_size: int,
+    min_size: int | None,
+) -> str:
+    script_hash = hashlib.sha256(Path(__file__).read_bytes()).hexdigest()
+    payload = {
+        "spacing": spacing,
+        "tolerance": tolerance,
+        "downsample_per_level": downsample_per_level,
+        "backend": backend,
+        "spacing_at_level_0": spacing_at_level_0,
+        "min_component_area_um2": min_component_area_um2,
+        "min_hole_area_um2": min_hole_area_um2,
+        "gaussian_sigma_um": gaussian_sigma_um,
+        "open_radius_um": open_radius_um,
+        "close_radius_um": close_radius_um,
+        "compression": compression,
+        "tile_size": tile_size,
+        "min_size": min_size,
+        "script_hash": script_hash,
+    }
+    payload_bytes = json.dumps(payload, sort_keys=True).encode("utf-8")
+    return hashlib.sha256(payload_bytes).hexdigest()
+
+
+def get_file_fingerprint(path: Path) -> dict[str, int]:
+    stat = path.stat()
+    return {
+        "size": int(stat.st_size),
+        "mtime_ns": int(stat.st_mtime_ns),
+    }
+
+
+def load_cache_manifest(cache_manifest_path: Path) -> dict[str, object]:
+    if not cache_manifest_path.is_file():
+        return {"command_signature": None, "entries": {}}
+    try:
+        with cache_manifest_path.open("r", encoding="utf-8") as f:
+            payload = json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return {"command_signature": None, "entries": {}}
+
+    if not isinstance(payload, dict):
+        return {"command_signature": None, "entries": {}}
+
+    entries = payload.get("entries")
+    if not isinstance(entries, dict):
+        entries = {}
+    return {
+        "command_signature": payload.get("command_signature"),
+        "entries": entries,
+    }
+
+
+def save_cache_manifest(
+    cache_manifest_path: Path,
+    *,
+    command_signature: str,
+    entries: dict[str, dict[str, object]],
+) -> None:
+    cache_manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    payload = {
+        "command_signature": command_signature,
+        "entries": entries,
+    }
+    with cache_manifest_path.open("w", encoding="utf-8") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+
+
+def is_valid_output_tiff(output_path: Path) -> bool:
+    if not output_path.is_file():
+        return False
+    try:
+        with tifffile.TiffFile(output_path) as tf:
+            return len(tf.pages) > 0
+    except Exception:
+        return False
+
+
+def can_skip_slide(
+    *,
+    input_wsi: Path,
+    output_path: Path,
+    cached_entry: dict[str, object] | None,
+) -> bool:
+    if cached_entry is None:
+        return False
+    if not is_valid_output_tiff(output_path):
+        return False
+
+    expected_output_path = str(output_path.resolve())
+    if cached_entry.get("output_path") != expected_output_path:
+        return False
+
+    expected_input_fingerprint = cached_entry.get("input_fingerprint")
+    expected_output_fingerprint = cached_entry.get("output_fingerprint")
+    if not isinstance(expected_input_fingerprint, dict) or not isinstance(expected_output_fingerprint, dict):
+        return False
+
+    current_input_fingerprint = get_file_fingerprint(input_wsi)
+    current_output_fingerprint = get_file_fingerprint(output_path)
+    return (
+        current_input_fingerprint == expected_input_fingerprint
+        and current_output_fingerprint == expected_output_fingerprint
+    )
+
+
+def _process_slide_job(job: dict[str, object]) -> dict[str, object]:
+    input_wsi = Path(str(job["input_wsi"]))
+    output_path = Path(str(job["output_path"]))
+    downsample_per_level = float(job["downsample_per_level"])
+
+    try:
+        wsi_arr, effective_spacing = load_wsi_at_spacing(
+            wsi_path=input_wsi,
+            target_spacing=float(job["spacing"]),
+            tolerance=float(job["tolerance"]),
+            backend=str(job["backend"]),
+            spacing_at_level_0=job["spacing_at_level_0"],
+        )
+        gaussian_sigma_px = _sigma_um_to_px(float(job["gaussian_sigma_um"]), effective_spacing)
+        mask_l0 = segment_tissue_hsv(wsi_arr=wsi_arr, gaussian_sigma_px=gaussian_sigma_px)
+        mask_l0 = postprocess_mask(
+            mask=mask_l0,
+            spacing_um_per_px=effective_spacing,
+            min_component_area_um2=float(job["min_component_area_um2"]),
+            min_hole_area_um2=float(job["min_hole_area_um2"]),
+            open_radius_um=float(job["open_radius_um"]),
+            close_radius_um=float(job["close_radius_um"]),
+        )
+        resolved_min_size = job["min_size"] if job["min_size"] is not None else int(job["tile_size"])
+        levels = build_mask_pyramid(
+            level0_mask=mask_l0,
+            downsample_per_level=downsample_per_level,
+            min_size=int(resolved_min_size),
+        )
+
+        write_pyramidal_mask_tiff(
+            levels=levels,
+            output_path=output_path,
+            level0_spacing=effective_spacing,
+            downsample_per_level=downsample_per_level,
+            compression=str(job["compression"]),
+            tile_size=int(job["tile_size"]),
+        )
+
+        level_info = []
+        for idx, level in enumerate(levels):
+            level_spacing = effective_spacing * (downsample_per_level ** idx)
+            height, width = level.shape[:2]
+            level_info.append(
+                {
+                    "level": idx,
+                    "width": int(width),
+                    "height": int(height),
+                    "spacing": float(level_spacing),
+                }
+            )
+
+        return {
+            "index": int(job["index"]),
+            "slide_path": str(input_wsi),
+            "output_path": str(output_path),
+            "status": "success",
+            "traceback": "",
+            "cache_key": str(input_wsi.resolve()),
+            "cache_entry": {
+                "slide_path": str(input_wsi.resolve()),
+                "output_path": str(output_path.resolve()),
+                "input_fingerprint": get_file_fingerprint(input_wsi),
+                "output_fingerprint": get_file_fingerprint(output_path),
+            },
+            "level_info": level_info,
+        }
+    except Exception:
+        return {
+            "index": int(job["index"]),
+            "slide_path": str(input_wsi),
+            "output_path": str(output_path),
+            "status": "failed",
+            "traceback": traceback.format_exc(),
+            "cache_key": str(input_wsi.resolve()),
+            "cache_entry": None,
+            "level_info": [],
+        }
+
+
+def process_slides(
+    *,
+    output_mapping: list[tuple[Path, Path]],
+    spacing: float,
+    tolerance: float,
+    backend: str,
+    spacing_at_level_0: float | None,
+    downsample_per_level: float,
+    min_size: int | None,
+    compression: str,
+    tile_size: int,
+    verbose: bool,
+    command_signature: str,
+    cache_manifest_path: Path,
+    no_cache: bool = False,
+    num_workers: int = 1,
+    min_component_area_um2: float = 0.0,
+    min_hole_area_um2: float = 0.0,
+    gaussian_sigma_um: float = 0.0,
+    open_radius_um: float = 0.0,
+    close_radius_um: float = 0.0,
+) -> list[dict[str, str]]:
+    results: list[dict[str, str]] = []
+    cache_manifest = load_cache_manifest(cache_manifest_path) if not no_cache else {
+        "command_signature": None,
+        "entries": {},
+    }
+    cached_command_signature = cache_manifest.get("command_signature")
+    cached_entries_raw = cache_manifest.get("entries", {})
+    cached_entries = cached_entries_raw if isinstance(cached_entries_raw, dict) else {}
+
+    if cached_command_signature == command_signature:
+        updated_entries: dict[str, dict[str, object]] = {
+            k: v for k, v in cached_entries.items() if isinstance(v, dict)
+        }
+    else:
+        updated_entries = {}
+
+    jobs: list[dict[str, object]] = []
+    progress_bar = tqdm(total=len(output_mapping), desc="Generating tissue masks", unit="slide")
+
+    for index, (input_wsi, output_path) in enumerate(output_mapping, start=1):
+        traceback_text = ""
+        status = "success"
+        cache_key = str(input_wsi.resolve())
+
+        if (not no_cache) and cached_command_signature == command_signature and can_skip_slide(
+            input_wsi=input_wsi,
+            output_path=output_path,
+            cached_entry=updated_entries.get(cache_key),
+        ):
+            status = "skipped"
+            if verbose:
+                print(f"[{index}/{len(output_mapping)}] Skipped (cache hit): {input_wsi}")
+            results.append(
+                {
+                    "slide_path": str(input_wsi),
+                    "output_path": str(output_path),
+                    "status": status,
+                    "traceback": traceback_text,
+                }
+            )
+            progress_bar.update(1)
+            continue
+
+        jobs.append(
+            {
+                "index": index,
+                "input_wsi": str(input_wsi),
+                "output_path": str(output_path),
+                "spacing": spacing,
+                "tolerance": tolerance,
+                "backend": backend,
+                "spacing_at_level_0": spacing_at_level_0,
+                "downsample_per_level": downsample_per_level,
+                "min_size": min_size,
+                "compression": compression,
+                "tile_size": tile_size,
+                "min_component_area_um2": min_component_area_um2,
+                "min_hole_area_um2": min_hole_area_um2,
+                "gaussian_sigma_um": gaussian_sigma_um,
+                "open_radius_um": open_radius_um,
+                "close_radius_um": close_radius_um,
+            }
+        )
+
+    def _handle_job_result(result: dict[str, object]) -> None:
+        index = int(result["index"])
+        slide_path = str(result["slide_path"])
+        output_path = str(result["output_path"])
+        status = str(result["status"])
+        traceback_text = str(result["traceback"])
+        cache_key = str(result["cache_key"])
+
+        if status == "success":
+            cache_entry = result.get("cache_entry")
+            if isinstance(cache_entry, dict):
+                updated_entries[cache_key] = cache_entry
+            if verbose:
+                print(f"[{index}/{len(output_mapping)}] {slide_path}")
+                level_info = result.get("level_info", [])
+                if isinstance(level_info, list):
+                    print(f"Generated mask pyramid with {len(level_info)} level(s):")
+                    for info in level_info:
+                        print(
+                            f"  L{info['level']}: {info['width']}x{info['height']} px @ {info['spacing']:.6f} um/px"
+                        )
+                print(f"Saved pyramidal tissue mask to {output_path}")
+        else:
+            updated_entries.pop(cache_key, None)
+            print(f"[{index}/{len(output_mapping)}] Failed: {slide_path}")
+
+        results.append(
+            {
+                "slide_path": slide_path,
+                "output_path": output_path,
+                "status": status,
+                "traceback": traceback_text,
+            }
+        )
+
+    try:
+        if jobs:
+            if num_workers > 1:
+                worker_count = min(num_workers, len(jobs))
+                ctx = mp.get_context("spawn")
+                with ctx.Pool(processes=worker_count) as pool:
+                    for result in pool.imap_unordered(_process_slide_job, jobs):
+                        _handle_job_result(result)
+                        progress_bar.update(1)
+            else:
+                for job in jobs:
+                    result = _process_slide_job(job)
+                    _handle_job_result(result)
+                    progress_bar.update(1)
+    finally:
+        progress_bar.close()
+
+    save_cache_manifest(
+        cache_manifest_path,
+        command_signature=command_signature,
+        entries=updated_entries,
+    )
+
+    return results
+
+
+def write_summary_csv(summary_csv_path: Path, results: list[dict[str, str]]) -> None:
+    summary_csv_path.parent.mkdir(parents=True, exist_ok=True)
+    fieldnames = ["slide_path", "output_path", "status", "traceback"]
+    with summary_csv_path.open("w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(results)
+
+
+def get_summary_csv_path(*, output_path: Path | None, output_dir: Path | None) -> Path:
+    if output_dir is not None:
+        return output_dir / "summary.csv"
+    assert output_path is not None
+    return output_path.parent / "summary.csv"
+
+
+def get_cache_manifest_path(*, output_path: Path | None, output_dir: Path | None) -> Path:
+    if output_dir is not None:
+        return output_dir / "cache_manifest.json"
+    assert output_path is not None
+    return output_path.parent / "cache_manifest.json"
+
+
+def main() -> None:
+    args = _parse_args()
+
+    if args.spacing <= 0:
+        raise ValueError("spacing must be > 0")
+    if args.downsample_per_level <= 1:
+        raise ValueError("downsample-per-level must be > 1")
+    if args.num_workers < 1:
+        raise ValueError("num-workers must be >= 1")
+    if args.min_component_area_um2 < 0:
+        raise ValueError("min-component-area-um2 must be >= 0")
+    if args.min_hole_area_um2 < 0:
+        raise ValueError("min-hole-area-um2 must be >= 0")
+    if args.gaussian_sigma_um < 0:
+        raise ValueError("gaussian-sigma-um must be >= 0")
+    if args.open_radius_um < 0:
+        raise ValueError("open-radius-um must be >= 0")
+    if args.close_radius_um < 0:
+        raise ValueError("close-radius-um must be >= 0")
+
+    input_wsi_paths = resolve_wsi_paths(args.wsi)
+    output_mapping = build_output_mapping(
+        input_wsi_paths=input_wsi_paths,
+        output_path=args.output,
+        output_dir=args.output_dir,
+    )
+    command_signature = build_command_signature(
+        spacing=args.spacing,
+        tolerance=args.tolerance,
+        downsample_per_level=args.downsample_per_level,
+        backend=args.backend,
+        spacing_at_level_0=args.spacing_at_level_0,
+        min_component_area_um2=args.min_component_area_um2,
+        min_hole_area_um2=args.min_hole_area_um2,
+        gaussian_sigma_um=args.gaussian_sigma_um,
+        open_radius_um=args.open_radius_um,
+        close_radius_um=args.close_radius_um,
+        compression=args.compression,
+        tile_size=args.tile_size,
+        min_size=args.min_size,
+    )
+    cache_manifest_path = get_cache_manifest_path(output_path=args.output, output_dir=args.output_dir)
+
+    results = process_slides(
+        output_mapping=output_mapping,
+        spacing=args.spacing,
+        tolerance=args.tolerance,
+        backend=args.backend,
+        spacing_at_level_0=args.spacing_at_level_0,
+        downsample_per_level=args.downsample_per_level,
+        min_size=args.min_size,
+        compression=args.compression,
+        tile_size=args.tile_size,
+        verbose=args.verbose,
+        command_signature=command_signature,
+        cache_manifest_path=cache_manifest_path,
+        no_cache=args.no_cache,
+        num_workers=args.num_workers,
+        min_component_area_um2=args.min_component_area_um2,
+        min_hole_area_um2=args.min_hole_area_um2,
+        gaussian_sigma_um=args.gaussian_sigma_um,
+        open_radius_um=args.open_radius_um,
+        close_radius_um=args.close_radius_um,
+    )
+
+    summary_csv_path = get_summary_csv_path(output_path=args.output, output_dir=args.output_dir)
+    write_summary_csv(summary_csv_path, results)
+
+    success_count = sum(row["status"] == "success" for row in results)
+    skipped_count = sum(row["status"] == "skipped" for row in results)
+    failed_count = sum(row["status"] == "failed" for row in results)
+    print(
+        f"Processing summary: {success_count} succeeded, {skipped_count} skipped, {failed_count} failed"
+    )
+    print(f"Summary CSV: {summary_csv_path}")
+
+
+if __name__ == "__main__":
+    main()

From 514452adfd41443489b7761b8d8cf13aed7cde38 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Mon, 16 Feb 2026 21:53:01 +0000
Subject: [PATCH 2/5] add tifffile deps comment

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 4993bb9..4a5de1a 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,8 @@ These visualizations are useful for double-checking that the tiling or sampling
 For quick mask generation outside the full pipeline, use the standalone script:
 
 ```shell
+python -m pip install tifffile # need extra tifffile deps
+
 # Single slide
 python scripts/generate_tissue_mask.py \
     --wsi /path/to/slide.tif \

From 8b6e2047c9d80810a720277043ff11fba8ebb9d5 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Mon, 16 Feb 2026 21:58:57 +0000
Subject: [PATCH 3/5] minor changes

---
 README.md                       | 10 +++++-----
 scripts/generate_tissue_mask.py | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 4a5de1a..4820dd2 100644
--- a/README.md
+++ b/README.md
@@ -137,13 +137,13 @@ python scripts/generate_tissue_mask.py \
 ```
 
 This script:
-- reads the WSI with `wholeslidedata` (OpenSlide backend by default),
-- computes a binary tissue mask using HSV thresholding (`0=background`, `1=tissue`),
-- writes a pyramidal TIFF mask where each level is downsampled from the previous one,
-- prints a final recap of how many slides succeeded, skipped, and failed.
+- reads the WSI with `wholeslidedata`
+- computes a binary tissue mask using HSV thresholding (`0=background`, `1=tissue`)
+- writes a pyramidal TIFF mask at a desired `spacing`, where each level is downsampled from the previous one
+- prints a final recap of how many slides succeeded, skipped, and failed
 
 Useful options:
-- `--backend` to switch the wholeslidedata backend (default: `openslide`)
+- `--backend` to switch the wholeslidedata backend (default: `asap`)
 - `--output` for single-slide mode and `--output-dir` for multi-slide mode
 - `--num-workers` to control parallelism
 - `--no-cache` to disable cache-based skipping and force recomputation
diff --git a/scripts/generate_tissue_mask.py b/scripts/generate_tissue_mask.py
index 220d8a7..e242227 100644
--- a/scripts/generate_tissue_mask.py
+++ b/scripts/generate_tissue_mask.py
@@ -91,7 +91,7 @@ def _parse_args() -> argparse.Namespace:
         default=2000.0,
         help=(
             "Remove tissue connected components smaller than this area in square microns "
-            "(default: 2000)."
+            "(default: 2000um2 ; 0 to disable)."
         ),
     )
     parser.add_argument(
@@ -100,26 +100,26 @@ def _parse_args() -> argparse.Namespace:
         default=5000.0,
         help=(
             "Fill holes inside tissue up to this area in square microns "
-            "(default: 5000)."
+            "(default: 5000um2 ; 0 to disable)."
         ),
     )
     parser.add_argument(
         "--open-radius-um",
         type=float,
         default=1.0,
-        help="Morphological opening radius in microns (default: 1.0).",
+        help="Morphological opening radius in microns (default: 1um ; 0 to disable).",
     )
     parser.add_argument(
         "--gaussian-sigma-um",
         type=float,
         default=1,
-        help="Optional Gaussian blur sigma in microns before HSV thresholding (default: 0 disables).",
+        help="Optional Gaussian blur sigma in microns before HSV thresholding (default: 1um ; 0 to disable).",
     )
     parser.add_argument(
         "--close-radius-um",
         type=float,
-        default=10.0,
-        help="Morphological closing radius in microns (default: 10.0).",
+        default=12.0,
+        help="Morphological closing radius in microns (default: 12um ; 0 to disable).",
     )
     parser.add_argument(
         "--compression",

From 484728430141ee12051d9bc23e85f84f17264bf8 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Tue, 17 Feb 2026 13:22:06 +0000
Subject: [PATCH 4/5] minor changes

---
 README.md                       |   3 +
 hs2p/__init__.py                |   3 +
 hs2p/wsi/__init__.py            |   6 +-
 scripts/generate_tissue_mask.py | 324 ++++++++++++++++++++++++++++++--
 4 files changed, 315 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 4820dd2..405ccd3 100644
--- a/README.md
+++ b/README.md
@@ -139,6 +139,7 @@ python scripts/generate_tissue_mask.py \
 This script:
 - reads the WSI with `wholeslidedata`
 - computes a binary tissue mask using HSV thresholding (`0=background`, `1=tissue`)
+- uses a coarse-to-fine ROI shortcut by default to avoid loading the full target-spacing WSI into memory
 - writes a pyramidal TIFF mask at a desired `spacing`, where each level is downsampled from the previous one
 - prints a final recap of how many slides succeeded, skipped, and failed
 
@@ -147,6 +148,8 @@ Useful options:
 - `--output` for single-slide mode and `--output-dir` for multi-slide mode
 - `--num-workers` to control parallelism
 - `--no-cache` to disable cache-based skipping and force recomputation
+- `--disable-coarse-roi-shortcut` to force legacy full-frame loading at target spacing
+- `--coarse-spacing`, `--coarse-roi-margin-um`, and `--processing-tile-size` to tune coarse-to-fine ROI processing
 - `--tolerance` to control how much a natural spacing can deviate from target spacing when selecting the best level for reading the whole slide
 - `--min-component-area-um2` to remove tiny tissue blobs
 - `--min-hole-area-um2` to fill small holes inside tissue
diff --git a/hs2p/__init__.py b/hs2p/__init__.py
index 6849410..1b9559f 100644
--- a/hs2p/__init__.py
+++ b/hs2p/__init__.py
@@ -1 +1,4 @@
+from hs2p.wsi.wsi import WholeSlideImage
+from hs2p.wsi import extract_coordinates, sample_coordinates, overlay_mask_on_slide, visualize_coordinates
+
 __version__ = "1.1.0"
diff --git a/hs2p/wsi/__init__.py b/hs2p/wsi/__init__.py
index 0bb7bb2..5377f73 100644
--- a/hs2p/wsi/__init__.py
+++ b/hs2p/wsi/__init__.py
@@ -255,7 +255,7 @@ def filter_coordinates(
                 if mask_pct >= pct:
                     filtered_coordinates[annotation].append(coord)
                     filtered_contour_indices[annotation].append(contour_idx)
-    return filtered_coordinates, filtered_contour_indices     
+    return filtered_coordinates, filtered_contour_indices
 
 
 def save_coordinates(
@@ -382,7 +382,7 @@ def overlay_mask_on_slide(
         mask_spacing = mask_object.spacings[mask_level]
         mask_width, _ = mask_object.level_dimensions[mask_level]
         scale = mask_width / width
-    
+
     mask_arr = mask_object.get_slide(spacing=mask_spacing)
     mask_arr = mask_arr[:, :, 0]
     mask_height, mask_width = mask_arr.shape
@@ -538,7 +538,7 @@ def draw_grid_from_coordinates(
 
                 # paste the valid part into the white tile
                 tile[:valid_height, :valid_width, :] = overlayed_tile
-            
+
             else:
                 valid_tile = np.array(valid_tile)
                 # paste the valid part into the white tile
diff --git a/scripts/generate_tissue_mask.py b/scripts/generate_tissue_mask.py
index e242227..1da6e02 100644
--- a/scripts/generate_tissue_mask.py
+++ b/scripts/generate_tissue_mask.py
@@ -7,6 +7,7 @@
 import json
 import multiprocessing as mp
 import os
+import tempfile
 import traceback
 from pathlib import Path
 
@@ -65,7 +66,7 @@ def _parse_args() -> argparse.Namespace:
         "--tolerance",
         type=float,
         required=True,
-        help="Tolerance deciding how much a natural spacing can deviate from target spacing when selecting the best level for reading (expressed as a fraction of the target spacing, e.g. 0.1 for 10%).",
+        help="Tolerance deciding how much a natural spacing can deviate from target spacing when selecting the best level for reading (expressed as a fraction of the target spacing, e.g. 0.1 for 10%%).",
     )
     parser.add_argument(
         "--downsample-per-level",
@@ -155,6 +156,32 @@ def _parse_args() -> argparse.Namespace:
         default=max(1, min(4, os.cpu_count() or 1)),
         help="Number of worker processes for slide processing (default: min(4, CPU count)).",
     )
+    parser.add_argument(
+        "--disable-coarse-roi-shortcut",
+        action="store_true",
+        help=(
+            "Disable coarse-to-fine ROI shortcut and force full-frame loading at target spacing. "
+            "By default, the shortcut is enabled."
+        ),
+    )
+    parser.add_argument(
+        "--coarse-spacing",
+        type=float,
+        default=4.0,
+        help="Spacing used for coarse ROI pre-pass in microns per pixel (default: 4.0).",
+    )
+    parser.add_argument(
+        "--coarse-roi-margin-um",
+        type=float,
+        default=64.0,
+        help="ROI expansion margin in microns in target mask coordinates (default: 64).",
+    )
+    parser.add_argument(
+        "--processing-tile-size",
+        type=int,
+        default=2048,
+        help="Tile size (pixels) for high-resolution ROI processing (default: 2048).",
+    )
     return parser.parse_args()
 
 
@@ -284,6 +311,49 @@ def load_wsi_at_spacing(
     return wsi_arr, effective_spacing
 
 
+def _get_spacings(
+    *,
+    wsi: wsd.WholeSlideImage,
+    spacing_at_level_0: float | None,
+) -> list[float]:
+    base_spacings = list(wsi.spacings)
+    if spacing_at_level_0 is None:
+        return base_spacings
+    return [spacing_at_level_0 * sp / base_spacings[0] for sp in base_spacings]
+
+
+def _resolve_spacing_plan(
+    *,
+    wsi: wsd.WholeSlideImage,
+    target_spacing: float,
+    tolerance: float,
+    spacing_at_level_0: float | None,
+) -> tuple[int, float, float, bool]:
+    spacings = _get_spacings(wsi=wsi, spacing_at_level_0=spacing_at_level_0)
+    target_downsample = target_spacing / spacings[0]
+    level_downsamples = get_downsamples(wsi)
+    level = get_best_level_for_downsample_custom(level_downsamples, target_downsample)
+    level_spacing = spacings[level]
+
+    if abs(level_spacing - target_spacing) / target_spacing <= tolerance:
+        return level, level_spacing, level_spacing, False
+
+    while level > 0 and level_spacing > target_spacing:
+        level -= 1
+        level_spacing = spacings[level]
+        if abs(level_spacing - target_spacing) / target_spacing <= tolerance:
+            return level, level_spacing, level_spacing, False
+
+    power = int(np.ceil(np.log2(target_spacing / level_spacing)))
+    effective_spacing = level_spacing * (2**power)
+    if abs(effective_spacing - target_spacing) / target_spacing > tolerance:
+        raise ValueError(
+            f"Unable to achieve target spacing within tolerance after downsampling. Closest achievable spacing is {effective_spacing:.2f} mpp, which is {abs(effective_spacing - target_spacing) / target_spacing:.2%} away from the target spacing. Consider increasing the tolerance or adjusting the target spacing."
+        )
+
+    return level, level_spacing, effective_spacing, True
+
+
 def segment_tissue_hsv(
     wsi_arr: np.ndarray,
     lower: np.ndarray = HSV_LOWER,
@@ -360,6 +430,171 @@ def _fill_small_holes(mask: np.ndarray, max_hole_area_px: int) -> np.ndarray:
     return filled
 
 
+def _extract_roi_boxes(mask: np.ndarray) -> list[tuple[int, int, int, int]]:
+    binary = (mask > 0).astype(np.uint8)
+    if binary.sum() == 0:
+        return []
+
+    num_labels, _, stats, _ = cv2.connectedComponentsWithStats(binary, connectivity=8)
+    boxes: list[tuple[int, int, int, int]] = []
+    for label in range(1, num_labels):
+        x = int(stats[label, cv2.CC_STAT_LEFT])
+        y = int(stats[label, cv2.CC_STAT_TOP])
+        w = int(stats[label, cv2.CC_STAT_WIDTH])
+        h = int(stats[label, cv2.CC_STAT_HEIGHT])
+        boxes.append((x, y, x + w, y + h))
+    return boxes
+
+
+def _boxes_intersect(
+    box_a: tuple[int, int, int, int],
+    box_b: tuple[int, int, int, int],
+) -> bool:
+    ax0, ay0, ax1, ay1 = box_a
+    bx0, by0, bx1, by1 = box_b
+    return not (ax1 <= bx0 or bx1 <= ax0 or ay1 <= by0 or by1 <= ay0)
+
+
+def _coarse_boxes_to_target_boxes(
+    *,
+    coarse_boxes: list[tuple[int, int, int, int]],
+    coarse_shape: tuple[int, int],
+    target_shape: tuple[int, int],
+    margin_px: int,
+) -> list[tuple[int, int, int, int]]:
+    coarse_h, coarse_w = coarse_shape
+    target_h, target_w = target_shape
+    if coarse_h == 0 or coarse_w == 0:
+        return []
+
+    scale_x = target_w / float(coarse_w)
+    scale_y = target_h / float(coarse_h)
+    target_boxes: list[tuple[int, int, int, int]] = []
+    for x0, y0, x1, y1 in coarse_boxes:
+        tx0 = max(0, int(np.floor(x0 * scale_x)) - margin_px)
+        ty0 = max(0, int(np.floor(y0 * scale_y)) - margin_px)
+        tx1 = min(target_w, int(np.ceil(x1 * scale_x)) + margin_px)
+        ty1 = min(target_h, int(np.ceil(y1 * scale_y)) + margin_px)
+        if tx1 > tx0 and ty1 > ty0:
+            target_boxes.append((tx0, ty0, tx1, ty1))
+    return target_boxes
+
+
+def _compute_level0_mask_with_coarse_roi_shortcut(
+    *,
+    wsi_path: Path,
+    target_spacing: float,
+    tolerance: float,
+    backend: str,
+    spacing_at_level_0: float | None,
+    coarse_spacing: float,
+    coarse_roi_margin_um: float,
+    processing_tile_size: int,
+    min_component_area_um2: float,
+    min_hole_area_um2: float,
+    gaussian_sigma_um: float,
+    open_radius_um: float,
+    close_radius_um: float,
+) -> tuple[np.ndarray, float, str]:
+    wsi = wsd.WholeSlideImage(wsi_path, backend=backend)
+    spacing_at_l0 = _get_spacings(wsi=wsi, spacing_at_level_0=spacing_at_level_0)[0]
+    level, read_spacing, effective_spacing, needs_resampling = _resolve_spacing_plan(
+        wsi=wsi,
+        target_spacing=target_spacing,
+        tolerance=tolerance,
+        spacing_at_level_0=spacing_at_level_0,
+    )
+
+    coarse_arr, coarse_effective_spacing = load_wsi_at_spacing(
+        wsi_path=wsi_path,
+        target_spacing=coarse_spacing,
+        tolerance=tolerance,
+        backend=backend,
+        spacing_at_level_0=spacing_at_level_0,
+    )
+    coarse_sigma_px = _sigma_um_to_px(gaussian_sigma_um, coarse_effective_spacing)
+    coarse_mask = segment_tissue_hsv(wsi_arr=coarse_arr, gaussian_sigma_px=coarse_sigma_px)
+    coarse_mask = postprocess_mask(
+        mask=coarse_mask,
+        spacing_um_per_px=coarse_effective_spacing,
+        min_component_area_um2=min_component_area_um2,
+        min_hole_area_um2=min_hole_area_um2,
+        open_radius_um=open_radius_um,
+        close_radius_um=close_radius_um,
+    )
+
+    level_w, level_h = wsi.shapes[level]
+    spacing_ratio = read_spacing / effective_spacing
+    target_w = max(1, int(round(level_w * spacing_ratio)))
+    target_h = max(1, int(round(level_h * spacing_ratio)))
+    target_shape = (target_h, target_w)
+    margin_px = _radius_um_to_px(coarse_roi_margin_um, effective_spacing)
+    target_boxes = _coarse_boxes_to_target_boxes(
+        coarse_boxes=_extract_roi_boxes(coarse_mask),
+        coarse_shape=coarse_mask.shape[:2],
+        target_shape=target_shape,
+        margin_px=margin_px,
+    )
+
+    tmp_file = tempfile.NamedTemporaryFile(prefix="hs2p-mask-", suffix=".mmap", delete=False)
+    tmp_path = Path(tmp_file.name)
+    tmp_file.close()
+    try:
+        mask_l0 = np.memmap(tmp_path, dtype=np.uint8, mode="w+", shape=target_shape)
+        mask_l0[:] = 0
+
+        sigma_px = _sigma_um_to_px(gaussian_sigma_um, effective_spacing)
+        halo_px = max(
+            _radius_um_to_px(open_radius_um, effective_spacing),
+            _radius_um_to_px(close_radius_um, effective_spacing),
+            int(np.ceil(3.0 * sigma_px)),
+        )
+
+        for y0 in range(0, target_shape[0], processing_tile_size):
+            y1 = min(target_shape[0], y0 + processing_tile_size)
+            for x0 in range(0, target_shape[1], processing_tile_size):
+                x1 = min(target_shape[1], x0 + processing_tile_size)
+
+                rx0 = max(0, x0 - halo_px)
+                ry0 = max(0, y0 - halo_px)
+                rx1 = min(target_shape[1], x1 + halo_px)
+                ry1 = min(target_shape[0], y1 + halo_px)
+
+                rx0_l0 = int(round(rx0 * (effective_spacing / spacing_at_l0)))
+                ry0_l0 = int(round(ry0 * (effective_spacing / spacing_at_l0)))
+
+                tile = wsi.get_patch(
+                    rx0_l0,
+                    ry0_l0,
+                    rx1 - rx0,
+                    ry1 - ry0,
+                    spacing=effective_spacing,
+                    center=False,
+                )
+                tile_mask = segment_tissue_hsv(wsi_arr=tile, gaussian_sigma_px=sigma_px)
+
+                crop_x0 = x0 - rx0
+                crop_y0 = y0 - ry0
+                crop_x1 = crop_x0 + (x1 - x0)
+                crop_y1 = crop_y0 + (y1 - y0)
+                mask_l0[y0:y1, x0:x1] = tile_mask[crop_y0:crop_y1, crop_x0:crop_x1]
+
+        mask_l0 = postprocess_mask(
+            mask=np.asarray(mask_l0),
+            spacing_um_per_px=effective_spacing,
+            min_component_area_um2=min_component_area_um2,
+            min_hole_area_um2=min_hole_area_um2,
+            open_radius_um=open_radius_um,
+            close_radius_um=close_radius_um,
+        )
+
+        mode = "coarse-roi-resampled" if needs_resampling else "coarse-roi"
+        return np.asarray(mask_l0), effective_spacing, mode
+    finally:
+        if tmp_path.exists():
+            tmp_path.unlink(missing_ok=True)
+
+
 def postprocess_mask(
     *,
     mask: np.ndarray,
@@ -543,6 +778,10 @@ def build_command_signature(
     gaussian_sigma_um: float,
     open_radius_um: float,
     close_radius_um: float,
+    disable_coarse_roi_shortcut: bool,
+    coarse_spacing: float,
+    coarse_roi_margin_um: float,
+    processing_tile_size: int,
     compression: str,
     tile_size: int,
     min_size: int | None,
@@ -559,6 +798,10 @@ def build_command_signature(
         "gaussian_sigma_um": gaussian_sigma_um,
         "open_radius_um": open_radius_um,
         "close_radius_um": close_radius_um,
+        "disable_coarse_roi_shortcut": disable_coarse_roi_shortcut,
+        "coarse_spacing": coarse_spacing,
+        "coarse_roi_margin_um": coarse_roi_margin_um,
+        "processing_tile_size": processing_tile_size,
         "compression": compression,
         "tile_size": tile_size,
         "min_size": min_size,
@@ -656,23 +899,42 @@ def _process_slide_job(job: dict[str, object]) -> dict[str, object]:
     downsample_per_level = float(job["downsample_per_level"])
 
     try:
-        wsi_arr, effective_spacing = load_wsi_at_spacing(
-            wsi_path=input_wsi,
-            target_spacing=float(job["spacing"]),
-            tolerance=float(job["tolerance"]),
-            backend=str(job["backend"]),
-            spacing_at_level_0=job["spacing_at_level_0"],
-        )
-        gaussian_sigma_px = _sigma_um_to_px(float(job["gaussian_sigma_um"]), effective_spacing)
-        mask_l0 = segment_tissue_hsv(wsi_arr=wsi_arr, gaussian_sigma_px=gaussian_sigma_px)
-        mask_l0 = postprocess_mask(
-            mask=mask_l0,
-            spacing_um_per_px=effective_spacing,
-            min_component_area_um2=float(job["min_component_area_um2"]),
-            min_hole_area_um2=float(job["min_hole_area_um2"]),
-            open_radius_um=float(job["open_radius_um"]),
-            close_radius_um=float(job["close_radius_um"]),
-        )
+        if bool(job.get("disable_coarse_roi_shortcut", False)):
+            wsi_arr, effective_spacing = load_wsi_at_spacing(
+                wsi_path=input_wsi,
+                target_spacing=float(job["spacing"]),
+                tolerance=float(job["tolerance"]),
+                backend=str(job["backend"]),
+                spacing_at_level_0=job["spacing_at_level_0"],
+            )
+            gaussian_sigma_px = _sigma_um_to_px(float(job["gaussian_sigma_um"]), effective_spacing)
+            mask_l0 = segment_tissue_hsv(wsi_arr=wsi_arr, gaussian_sigma_px=gaussian_sigma_px)
+            mask_l0 = postprocess_mask(
+                mask=mask_l0,
+                spacing_um_per_px=effective_spacing,
+                min_component_area_um2=float(job["min_component_area_um2"]),
+                min_hole_area_um2=float(job["min_hole_area_um2"]),
+                open_radius_um=float(job["open_radius_um"]),
+                close_radius_um=float(job["close_radius_um"]),
+            )
+            processing_mode = "full"
+        else:
+            mask_l0, effective_spacing, processing_mode = _compute_level0_mask_with_coarse_roi_shortcut(
+                wsi_path=input_wsi,
+                target_spacing=float(job["spacing"]),
+                tolerance=float(job["tolerance"]),
+                backend=str(job["backend"]),
+                spacing_at_level_0=job["spacing_at_level_0"],
+                coarse_spacing=float(job["coarse_spacing"]),
+                coarse_roi_margin_um=float(job["coarse_roi_margin_um"]),
+                processing_tile_size=int(job["processing_tile_size"]),
+                min_component_area_um2=float(job["min_component_area_um2"]),
+                min_hole_area_um2=float(job["min_hole_area_um2"]),
+                gaussian_sigma_um=float(job["gaussian_sigma_um"]),
+                open_radius_um=float(job["open_radius_um"]),
+                close_radius_um=float(job["close_radius_um"]),
+            )
+
         resolved_min_size = job["min_size"] if job["min_size"] is not None else int(job["tile_size"])
         levels = build_mask_pyramid(
             level0_mask=mask_l0,
@@ -716,6 +978,7 @@ def _process_slide_job(job: dict[str, object]) -> dict[str, object]:
                 "output_fingerprint": get_file_fingerprint(output_path),
             },
             "level_info": level_info,
+            "processing_mode": processing_mode,
         }
     except Exception:
         return {
@@ -727,6 +990,7 @@ def _process_slide_job(job: dict[str, object]) -> dict[str, object]:
             "cache_key": str(input_wsi.resolve()),
             "cache_entry": None,
             "level_info": [],
+            "processing_mode": "failed",
         }
 
 
@@ -751,6 +1015,10 @@ def process_slides(
     gaussian_sigma_um: float = 0.0,
     open_radius_um: float = 0.0,
     close_radius_um: float = 0.0,
+    disable_coarse_roi_shortcut: bool = False,
+    coarse_spacing: float = 4.0,
+    coarse_roi_margin_um: float = 64.0,
+    processing_tile_size: int = 2048,
 ) -> list[dict[str, str]]:
     results: list[dict[str, str]] = []
     cache_manifest = load_cache_manifest(cache_manifest_path) if not no_cache else {
@@ -813,6 +1081,10 @@ def process_slides(
                 "gaussian_sigma_um": gaussian_sigma_um,
                 "open_radius_um": open_radius_um,
                 "close_radius_um": close_radius_um,
+                "disable_coarse_roi_shortcut": disable_coarse_roi_shortcut,
+                "coarse_spacing": coarse_spacing,
+                "coarse_roi_margin_um": coarse_roi_margin_um,
+                "processing_tile_size": processing_tile_size,
             }
         )
 
@@ -831,6 +1103,8 @@ def _handle_job_result(result: dict[str, object]) -> None:
             if verbose:
                 print(f"[{index}/{len(output_mapping)}] {slide_path}")
                 level_info = result.get("level_info", [])
+                processing_mode = result.get("processing_mode", "unknown")
+                print(f"Processing mode: {processing_mode}")
                 if isinstance(level_info, list):
                     print(f"Generated mask pyramid with {len(level_info)} level(s):")
                     for info in level_info:
@@ -915,6 +1189,12 @@ def main() -> None:
         raise ValueError("min-hole-area-um2 must be >= 0")
     if args.gaussian_sigma_um < 0:
         raise ValueError("gaussian-sigma-um must be >= 0")
+    if args.coarse_spacing <= 0:
+        raise ValueError("coarse-spacing must be > 0")
+    if args.coarse_roi_margin_um < 0:
+        raise ValueError("coarse-roi-margin-um must be >= 0")
+    if args.processing_tile_size < 1:
+        raise ValueError("processing-tile-size must be >= 1")
     if args.open_radius_um < 0:
         raise ValueError("open-radius-um must be >= 0")
     if args.close_radius_um < 0:
@@ -937,6 +1217,10 @@ def main() -> None:
         gaussian_sigma_um=args.gaussian_sigma_um,
         open_radius_um=args.open_radius_um,
         close_radius_um=args.close_radius_um,
+        disable_coarse_roi_shortcut=args.disable_coarse_roi_shortcut,
+        coarse_spacing=args.coarse_spacing,
+        coarse_roi_margin_um=args.coarse_roi_margin_um,
+        processing_tile_size=args.processing_tile_size,
         compression=args.compression,
         tile_size=args.tile_size,
         min_size=args.min_size,
@@ -963,6 +1247,10 @@ def main() -> None:
         gaussian_sigma_um=args.gaussian_sigma_um,
         open_radius_um=args.open_radius_um,
         close_radius_um=args.close_radius_um,
+        disable_coarse_roi_shortcut=args.disable_coarse_roi_shortcut,
+        coarse_spacing=args.coarse_spacing,
+        coarse_roi_margin_um=args.coarse_roi_margin_um,
+        processing_tile_size=args.processing_tile_size,
     )
 
     summary_csv_path = get_summary_csv_path(output_path=args.output, output_dir=args.output_dir)

From 6af7699ab66f12d662ece97a71b4292f96916b08 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Tue, 17 Feb 2026 13:22:54 +0000
Subject: [PATCH 5/5] black formatting

---
 hs2p/__init__.py       |  7 ++-
 hs2p/sampling.py       | 96 ++++++++++++++++++++++++++----------------
 hs2p/tiling.py         | 81 ++++++++++++++++++++---------------
 hs2p/utils/__init__.py |  2 +-
 hs2p/utils/utils.py    | 12 ++++--
 hs2p/wsi/__init__.py   | 25 +++++++----
 hs2p/wsi/utils.py      | 33 ++++++++++++---
 hs2p/wsi/wsi.py        | 79 ++++++++++++++++++++++------------
 release.py             | 15 +++++--
 9 files changed, 232 insertions(+), 118 deletions(-)

diff --git a/hs2p/__init__.py b/hs2p/__init__.py
index 1b9559f..ea6a614 100644
--- a/hs2p/__init__.py
+++ b/hs2p/__init__.py
@@ -1,4 +1,9 @@
 from hs2p.wsi.wsi import WholeSlideImage
-from hs2p.wsi import extract_coordinates, sample_coordinates, overlay_mask_on_slide, visualize_coordinates
+from hs2p.wsi import (
+    extract_coordinates,
+    sample_coordinates,
+    overlay_mask_on_slide,
+    visualize_coordinates,
+)
 
 __version__ = "1.1.0"
diff --git a/hs2p/sampling.py b/hs2p/sampling.py
index 0ea4f74..dbc6528 100644
--- a/hs2p/sampling.py
+++ b/hs2p/sampling.py
@@ -10,7 +10,15 @@
 from collections.abc import Sequence
 
 from hs2p.utils import setup, load_csv, fix_random_seeds
-from hs2p.wsi import extract_coordinates, filter_coordinates, sample_coordinates, save_coordinates, visualize_coordinates, overlay_mask_on_slide, SamplingParameters
+from hs2p.wsi import (
+    extract_coordinates,
+    filter_coordinates,
+    sample_coordinates,
+    save_coordinates,
+    visualize_coordinates,
+    overlay_mask_on_slide,
+    SamplingParameters,
+)
 
 
 def _validate_visualization_color_mapping(
@@ -36,7 +44,9 @@ def _validate_visualization_color_mapping(
             raise ValueError(
                 f"color_mapping['{annotation}'] must be None or a length-3 RGB sequence"
             )
-        if any((not isinstance(c, (int, np.integer)) or c < 0 or c > 255) for c in color):
+        if any(
+            (not isinstance(c, (int, np.integer)) or c < 0 or c > 255) for c in color
+        ):
             raise ValueError(
                 f"color_mapping['{annotation}'] must contain integers in [0, 255]"
             )
@@ -60,7 +70,7 @@ def get_args_parser(add_help: bool = True):
     )
     parser.add_argument(
         "opts",
-        help="Modify config options at the end of the command using \"path.key=value\".",
+        help='Modify config options at the end of the command using "path.key=value".',
         default=None,
         nargs=argparse.REMAINDER,
     )
@@ -123,18 +133,22 @@ def process_slide(
         if not cfg.tiling.sampling_params.independant_sampling:
             tissue_mask_visu_path = None
             if cfg.visualize and mask_visualize_dir is not None:
-                tissue_mask_visu_path = Path(mask_visualize_dir, f"{wsi_name}-tissue.png")
-            coordinates, contour_indices, tile_level, resize_factor, tile_size_lv0 = extract_coordinates(
-                wsi_path=wsi_path,
-                mask_path=mask_path,
-                backend=cfg.tiling.backend,
-                tiling_params=cfg.tiling.params,
-                segment_params=cfg.tiling.seg_params,
-                filter_params=cfg.tiling.filter_params,
-                sampling_params=sampling_params,
-                mask_visu_path=tissue_mask_visu_path,
-                disable_tqdm=disable_tqdm,
-                num_workers=num_workers,
+                tissue_mask_visu_path = Path(
+                    mask_visualize_dir, f"{wsi_name}-tissue.png"
+                )
+            coordinates, contour_indices, tile_level, resize_factor, tile_size_lv0 = (
+                extract_coordinates(
+                    wsi_path=wsi_path,
+                    mask_path=mask_path,
+                    backend=cfg.tiling.backend,
+                    tiling_params=cfg.tiling.params,
+                    segment_params=cfg.tiling.seg_params,
+                    filter_params=cfg.tiling.filter_params,
+                    sampling_params=sampling_params,
+                    mask_visu_path=tissue_mask_visu_path,
+                    disable_tqdm=disable_tqdm,
+                    num_workers=num_workers,
+                )
             )
             filtered_coordinates, filtered_contour_indices = filter_coordinates(
                 wsi_path=wsi_path,
@@ -147,7 +161,7 @@ def process_slide(
                 tiling_params=cfg.tiling.params,
                 sampling_params=sampling_params,
                 disable_tqdm=disable_tqdm,
-            ) # a dict mapping annotation -> coordinates
+            )  # a dict mapping annotation -> coordinates
             for annotation, coordinates in filtered_coordinates.items():
                 if len(coordinates) == 0:
                     continue
@@ -188,7 +202,13 @@ def process_slide(
                     annotation_mask_dir = mask_visualize_dir / annotation
                     annotation_mask_dir.mkdir(exist_ok=True, parents=True)
                     tissue_mask_visu_path = Path(annotation_mask_dir, f"{wsi_name}.jpg")
-                coordinates, contour_indices, tile_level, resize_factor, tile_size_lv0 = sample_coordinates(
+                (
+                    coordinates,
+                    contour_indices,
+                    tile_level,
+                    resize_factor,
+                    tile_size_lv0,
+                ) = sample_coordinates(
                     wsi_path=wsi_path,
                     mask_path=mask_path,
                     backend=cfg.tiling.backend,
@@ -270,7 +290,9 @@ def main(args):
     if process_list.is_file() and cfg.resume:
         process_df = pd.read_csv(process_list)
         if "mask_path" not in process_df.columns:
-            process_df["mask_path"] = [str(p) if p is not None else p for p in mask_paths]
+            process_df["mask_path"] = [
+                str(p) if p is not None else p for p in mask_paths
+            ]
         else:
             process_df["mask_path"] = process_df["mask_path"].apply(
                 lambda x: str(x) if pd.notna(x) else None
@@ -288,14 +310,20 @@ def main(args):
 
     skip_sampling = process_df["sampling_status"].str.contains("success").all()
 
-    pixel_mapping = {k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()}
-    tissue_percentage = {k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()}
+    pixel_mapping = {
+        k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()
+    }
+    tissue_percentage = {
+        k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()
+    }
     tissue_key_present = True
     if "tissue" not in tissue_percentage:
         tissue_key_present = False
         tissue_percentage["tissue"] = cfg.tiling.params.min_tissue_percentage
     if cfg.tiling.sampling_params.color_mapping is not None:
-        color_mapping = {k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()}
+        color_mapping = {
+            k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()
+        }
     else:
         color_mapping = None
 
@@ -311,9 +339,7 @@ def main(args):
         process_stack = process_df[mask]
         total = len(process_stack)
 
-        wsi_paths_to_process = [
-            Path(x) for x in process_stack.wsi_path.values.tolist()
-        ]
+        wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
         mask_paths_to_process = [
             Path(x) if x is not None and not pd.isna(x) else x
             for x in process_stack.mask_path.values.tolist()
@@ -344,9 +370,7 @@ def main(args):
                     "disable_tqdm": True,
                     "num_workers": parallel_workers,
                 }
-                for wsi_fp, mask_fp in zip(
-                    wsi_paths_to_process, mask_paths_to_process
-                )
+                for wsi_fp, mask_fp in zip(wsi_paths_to_process, mask_paths_to_process)
             ]
             results = list(
                 tqdm.tqdm(
@@ -361,16 +385,16 @@ def main(args):
             sampling_updates[wsi_path] = status_info
 
         for wsi_path, status_info in sampling_updates.items():
-            process_df.loc[
-                process_df["wsi_path"] == wsi_path, "sampling_status"
-            ] = status_info["status"]
+            process_df.loc[process_df["wsi_path"] == wsi_path, "sampling_status"] = (
+                status_info["status"]
+            )
             if "error" in status_info:
-                process_df.loc[
-                    process_df["wsi_path"] == wsi_path, "error"
-                ] = status_info["error"]
-                process_df.loc[
-                    process_df["wsi_path"] == wsi_path, "traceback"
-                ] = status_info["traceback"]
+                process_df.loc[process_df["wsi_path"] == wsi_path, "error"] = (
+                    status_info["error"]
+                )
+                process_df.loc[process_df["wsi_path"] == wsi_path, "traceback"] = (
+                    status_info["traceback"]
+                )
         process_df.to_csv(process_list, index=False)
 
         # summary logging
diff --git a/hs2p/tiling.py b/hs2p/tiling.py
index 06d57e3..6f375f5 100644
--- a/hs2p/tiling.py
+++ b/hs2p/tiling.py
@@ -8,7 +8,12 @@
 from pathlib import Path
 
 from hs2p.utils import setup, load_csv, fix_random_seeds
-from hs2p.wsi import extract_coordinates, save_coordinates, visualize_coordinates, SamplingParameters
+from hs2p.wsi import (
+    extract_coordinates,
+    save_coordinates,
+    visualize_coordinates,
+    SamplingParameters,
+)
 
 
 def get_args_parser(add_help: bool = True):
@@ -29,7 +34,7 @@ def get_args_parser(add_help: bool = True):
     )
     parser.add_argument(
         "opts",
-        help="Modify config options at the end of the command using \"path.key=value\".",
+        help='Modify config options at the end of the command using "path.key=value".',
         default=None,
         nargs=argparse.REMAINDER,
     )
@@ -57,7 +62,11 @@ def process_slide(
     wsi_name = wsi_path.stem.replace(" ", "_")
     if cfg.tiling.read_coordinates_from is not None:
         coordinates_path = Path(cfg.tiling.read_coordinates_from, f"{wsi_name}.npy")
-        if coordinates_path.is_file() and cfg.visualize and tile_visualize_dir is not None:
+        if (
+            coordinates_path.is_file()
+            and cfg.visualize
+            and tile_visualize_dir is not None
+        ):
             coordinates_arr = np.load(coordinates_path, allow_pickle=True)
             coordinates = list(zip(coordinates_arr["x"], coordinates_arr["y"]))
             tile_size_lv0 = coordinates_arr["tile_size_lv0"][0]
@@ -74,17 +83,19 @@ def process_slide(
         tissue_mask_visu_path = None
         if cfg.visualize and mask_visualize_dir is not None:
             tissue_mask_visu_path = Path(mask_visualize_dir, f"{wsi_name}.jpg")
-        coordinates, contour_indices, tile_level, resize_factor, tile_size_lv0 = extract_coordinates(
-            wsi_path=wsi_path,
-            mask_path=mask_path,
-            backend=cfg.tiling.backend,
-            tiling_params=cfg.tiling.params,
-            segment_params=cfg.tiling.seg_params,
-            filter_params=cfg.tiling.filter_params,
-            sampling_params=sampling_params,
-            mask_visu_path=tissue_mask_visu_path,
-            disable_tqdm=disable_tqdm,
-            num_workers=num_workers,
+        coordinates, contour_indices, tile_level, resize_factor, tile_size_lv0 = (
+            extract_coordinates(
+                wsi_path=wsi_path,
+                mask_path=mask_path,
+                backend=cfg.tiling.backend,
+                tiling_params=cfg.tiling.params,
+                segment_params=cfg.tiling.seg_params,
+                filter_params=cfg.tiling.filter_params,
+                sampling_params=sampling_params,
+                mask_visu_path=tissue_mask_visu_path,
+                disable_tqdm=disable_tqdm,
+                num_workers=num_workers,
+            )
         )
         coordinates_dir = Path(cfg.output_dir, "coordinates")
         coordinates_path = Path(coordinates_dir, f"{wsi_name}.npy")
@@ -136,7 +147,9 @@ def main(args):
     if process_list.is_file() and cfg.resume:
         process_df = pd.read_csv(process_list)
         if "mask_path" not in process_df.columns:
-            process_df["mask_path"] = [str(p) if p is not None else p for p in mask_paths]
+            process_df["mask_path"] = [
+                str(p) if p is not None else p for p in mask_paths
+            ]
         else:
             process_df["mask_path"] = process_df["mask_path"].apply(
                 lambda x: str(x) if pd.notna(x) else None
@@ -154,12 +167,18 @@ def main(args):
 
     skip_tiling = process_df["tiling_status"].str.contains("success").all()
 
-    pixel_mapping = {k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()}
-    tissue_percentage = {k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()}
+    pixel_mapping = {
+        k: v for e in cfg.tiling.sampling_params.pixel_mapping for k, v in e.items()
+    }
+    tissue_percentage = {
+        k: v for e in cfg.tiling.sampling_params.tissue_percentage for k, v in e.items()
+    }
     if "tissue" not in tissue_percentage:
         tissue_percentage["tissue"] = cfg.tiling.params.min_tissue_percentage
     if cfg.tiling.sampling_params.color_mapping is not None:
-        color_mapping = {k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()}
+        color_mapping = {
+            k: v for e in cfg.tiling.sampling_params.color_mapping for k, v in e.items()
+        }
     else:
         color_mapping = None
 
@@ -175,9 +194,7 @@ def main(args):
         process_stack = process_df[mask]
         total = len(process_stack)
 
-        wsi_paths_to_process = [
-            Path(x) for x in process_stack.wsi_path.values.tolist()
-        ]
+        wsi_paths_to_process = [Path(x) for x in process_stack.wsi_path.values.tolist()]
         mask_paths_to_process = [
             Path(x) if x is not None and not pd.isna(x) else x
             for x in process_stack.mask_path.values.tolist()
@@ -211,9 +228,7 @@ def main(args):
                     "disable_tqdm": True,
                     "num_workers": parallel_workers,
                 }
-                for wsi_fp, mask_fp in zip(
-                    wsi_paths_to_process, mask_paths_to_process
-                )
+                for wsi_fp, mask_fp in zip(wsi_paths_to_process, mask_paths_to_process)
             ]
             results = list(
                 tqdm.tqdm(
@@ -228,16 +243,16 @@ def main(args):
             tiling_updates[wsi_path] = status_info
 
         for wsi_path, status_info in tiling_updates.items():
-            process_df.loc[
-                process_df["wsi_path"] == wsi_path, "tiling_status"
-            ] = status_info["status"]
+            process_df.loc[process_df["wsi_path"] == wsi_path, "tiling_status"] = (
+                status_info["status"]
+            )
             if "error" in status_info:
-                process_df.loc[
-                    process_df["wsi_path"] == wsi_path, "error"
-                ] = status_info["error"]
-                process_df.loc[
-                    process_df["wsi_path"] == wsi_path, "traceback"
-                ] = status_info["traceback"]
+                process_df.loc[process_df["wsi_path"] == wsi_path, "error"] = (
+                    status_info["error"]
+                )
+                process_df.loc[process_df["wsi_path"] == wsi_path, "traceback"] = (
+                    status_info["traceback"]
+                )
         process_df.to_csv(process_list, index=False)
 
         # summary logging
diff --git a/hs2p/utils/__init__.py b/hs2p/utils/__init__.py
index 6e6ced7..d78da20 100644
--- a/hs2p/utils/__init__.py
+++ b/hs2p/utils/__init__.py
@@ -6,4 +6,4 @@
     update_state_dict,
 )
 from .log_utils import setup_logging
-from .config import setup
\ No newline at end of file
+from .config import setup
diff --git a/hs2p/utils/utils.py b/hs2p/utils/utils.py
index 23d49a7..f63cde2 100644
--- a/hs2p/utils/utils.py
+++ b/hs2p/utils/utils.py
@@ -116,9 +116,15 @@ def load_csv(cfg):
     elif "slide_path" in df.columns:
         wsi_paths = [Path(x) for x in df.slide_path.values.tolist()]
     if "mask_path" in df.columns:
-        mask_paths = [Path(x) if x is not None and not pd.isna(x) else None for x in df.mask_path.values.tolist()]
+        mask_paths = [
+            Path(x) if x is not None and not pd.isna(x) else None
+            for x in df.mask_path.values.tolist()
+        ]
     elif "segmentation_mask_path" in df.columns:
-        mask_paths = [Path(x) if x is not None and not pd.isna(x) else None for x in df.segmentation_mask_path.values.tolist()]
+        mask_paths = [
+            Path(x) if x is not None and not pd.isna(x) else None
+            for x in df.segmentation_mask_path.values.tolist()
+        ]
     else:
         mask_paths = [None for _ in wsi_paths]
     return wsi_paths, mask_paths
@@ -177,4 +183,4 @@ def update_state_dict(
         msg += f"\n{shape_mismatch} weight(s) not loaded due to mismatching shapes: {shape_mismatch_list}"
     if missing_keys > 0:
         msg += f"\n{missing_keys} key(s) from checkpoint not found in model: {missing_keys_list}"
-    return updated_state_dict, msg
\ No newline at end of file
+    return updated_state_dict, msg
diff --git a/hs2p/wsi/__init__.py b/hs2p/wsi/__init__.py
index 5377f73..2d3b488 100644
--- a/hs2p/wsi/__init__.py
+++ b/hs2p/wsi/__init__.py
@@ -24,7 +24,9 @@ def sort_coordinates_with_tissue(coordinates, tissue_percentages, contour_indice
     dedup_tissue_percentages = []
     dedup_contour_indices = []
     # deduplicate
-    for (x, y), tissue, contour_idx in zip(coordinates, tissue_percentages, contour_indices):
+    for (x, y), tissue, contour_idx in zip(
+        coordinates, tissue_percentages, contour_indices
+    ):
         key = (x, y)
         if key in seen:
             continue
@@ -35,7 +37,14 @@ def sort_coordinates_with_tissue(coordinates, tissue_percentages, contour_indice
     # mock filenames
     mocked_filenames = [f"{x}_{y}.jpg" for x, y in dedup_coordinates]
     # sort combined list by mocked filenames
-    combined = list(zip(mocked_filenames, dedup_coordinates, dedup_tissue_percentages, dedup_contour_indices))
+    combined = list(
+        zip(
+            mocked_filenames,
+            dedup_coordinates,
+            dedup_tissue_percentages,
+            dedup_contour_indices,
+        )
+    )
     sorted_combined = sorted(combined, key=lambda x: x[0])
     # extract sorted coordinates and tissue percentages
     sorted_coordinates = [coord for _, coord, _, _ in sorted_combined]
@@ -251,7 +260,9 @@ def filter_coordinates(
                 )
                 if masked_tile.shape[-1] == 1:
                     masked_tile = np.squeeze(masked_tile, axis=-1)
-                mask_pct = get_mask_coverage(masked_tile, sampling_params.pixel_mapping[annotation])
+                mask_pct = get_mask_coverage(
+                    masked_tile, sampling_params.pixel_mapping[annotation]
+                )
                 if mask_pct >= pct:
                     filtered_coordinates[annotation].append(coord)
                     filtered_contour_indices[annotation].append(contour_idx)
@@ -428,7 +439,7 @@ def draw_grid_from_coordinates(
     vis_level: int,
     thickness: int = 2,
     indices: list[int] | None = None,
-    mask = None,
+    mask=None,
     palette: dict[str, int] | None = None,
     pixel_mapping: dict[str, int] | None = None,
     color_mapping: dict[str, list[int] | None] | None = None,
@@ -493,11 +504,7 @@ def draw_grid_from_coordinates(
             valid_tile = Image.fromarray(valid_tile).convert("RGB")
 
             if mask is not None:
-                if (
-                    palette is None
-                    or pixel_mapping is None
-                    or color_mapping is None
-                ):
+                if palette is None or pixel_mapping is None or color_mapping is None:
                     raise ValueError(
                         "palette, pixel_mapping, and color_mapping are required when mask overlay is enabled"
                     )
diff --git a/hs2p/wsi/utils.py b/hs2p/wsi/utils.py
index 9567d9e..76cb63e 100644
--- a/hs2p/wsi/utils.py
+++ b/hs2p/wsi/utils.py
@@ -3,7 +3,18 @@
 
 
 class HasEnoughTissue(object):
-    def __init__(self, contour, contour_holes, tissue_mask, target_tile_size, tile_spacing, resize_factor, seg_spacing, spacing_at_level_0, pct=0.01):
+    def __init__(
+        self,
+        contour,
+        contour_holes,
+        tissue_mask,
+        target_tile_size,
+        tile_spacing,
+        resize_factor,
+        seg_spacing,
+        spacing_at_level_0,
+        pct=0.01,
+    ):
         self.cont = contour
         self.holes = contour_holes
         self.mask = tissue_mask // 255
@@ -23,7 +34,7 @@ def __init__(self, contour, contour_holes, tissue_mask, target_tile_size, tile_s
             self.downsampled_tile_size > 0
         ), "downsampled tile_size is equal to zero, aborting; please consider using a smaller seg_params.downsample parameter"
 
-        self.tile_size_resized = int(round(target_tile_size * resize_factor,0))
+        self.tile_size_resized = int(round(target_tile_size * resize_factor, 0))
 
         # precompute the combined tissue mask
         self.precomputed_mask = self._precompute_tissue_mask()
@@ -112,11 +123,21 @@ def get_tile_mask(self, x, y):
         sub_mask = self._extract_sub_mask(x_tile, y_tile)
 
         # handle edge cases where sub_mask is smaller than expected
-        if sub_mask.shape[0] != self.downsampled_tile_size or sub_mask.shape[1] != self.downsampled_tile_size:
-            padded_mask = np.zeros((self.downsampled_tile_size, self.downsampled_tile_size), dtype=sub_mask.dtype)
-            padded_mask[:sub_mask.shape[0], :sub_mask.shape[1]] = sub_mask
+        if (
+            sub_mask.shape[0] != self.downsampled_tile_size
+            or sub_mask.shape[1] != self.downsampled_tile_size
+        ):
+            padded_mask = np.zeros(
+                (self.downsampled_tile_size, self.downsampled_tile_size),
+                dtype=sub_mask.dtype,
+            )
+            padded_mask[: sub_mask.shape[0], : sub_mask.shape[1]] = sub_mask
             sub_mask = padded_mask
 
         # upsample the mask to the original tile size
-        mask = cv2.resize(sub_mask, (self.tile_size_resized, self.tile_size_resized), interpolation=cv2.INTER_NEAREST)
+        mask = cv2.resize(
+            sub_mask,
+            (self.tile_size_resized, self.tile_size_resized),
+            interpolation=cv2.INTER_NEAREST,
+        )
         return mask
diff --git a/hs2p/wsi/wsi.py b/hs2p/wsi/wsi.py
index 5a3f8d7..74fd89f 100644
--- a/hs2p/wsi/wsi.py
+++ b/hs2p/wsi/wsi.py
@@ -23,6 +23,7 @@ class SegmentationParameters(NamedTuple):
     """
     Parameters for filtering contours.
     """
+
     downsample: int  # dowsample factor for loading segmentation mask
     sthresh: int  # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True)
     sthresh_up: int  # upper threshold value for scaling the binary mask
@@ -36,6 +37,7 @@ class FilterParameters(NamedTuple):
     """
     Parameters for filtering contours.
     """
+
     ref_tile_size: int  # reference tile size for filtering
     a_t: int  # contour area threshold for filtering
     a_h: int  # hole area threshold for filtering
@@ -44,13 +46,16 @@ class FilterParameters(NamedTuple):
     filter_black: bool  # whether to filter out mostly black tiles
     white_threshold: int  # threshold for white pixels (0-255)
     black_threshold: int  # threshold for black pixels (0-255)
-    fraction_threshold: float  # fraction of pixels that must be white/black to filter out the tile
+    fraction_threshold: (
+        float  # fraction of pixels that must be white/black to filter out the tile
+    )
 
 
 class TilingParameters(NamedTuple):
     """
     Parameters for tiling.
     """
+
     target_spacing: float  # spacing at which to tile the slide, in microns per pixel
     tolerance: float  # for matching the target_spacing, deciding how much target_spacing can deviate from those specified in the slide metadata.
     target_tile_size: int  # size of the tiles to extract, in pixels
@@ -64,9 +69,16 @@ class SamplingParameters(NamedTuple):
     """
     Parameters for sampling.
     """
-    pixel_mapping: dict[str, int]  # mapping from annotation names to pixel values in the annotation mask
-    color_mapping: dict[str, list[int] | None]  # mapping from annotation names to RGB color codes for overlaying
-    tissue_percentage: dict[str, float | None]  # minimum percentage of tile coverage for each category required to sample tiles
+
+    pixel_mapping: dict[
+        str, int
+    ]  # mapping from annotation names to pixel values in the annotation mask
+    color_mapping: dict[
+        str, list[int] | None
+    ]  # mapping from annotation names to RGB color codes for overlaying
+    tissue_percentage: dict[
+        str, float | None
+    ]  # minimum percentage of tile coverage for each category required to sample tiles
 
 
 class WholeSlideImage(object):
@@ -131,7 +143,9 @@ def __init__(
         self.mask_path = mask_path
         if mask_path is not None:
             self.mask = wsd.WholeSlideImage(mask_path, backend=backend)
-            self.seg_level = self.load_segmentation(segment_params, sampling_params=sampling_params)
+            self.seg_level = self.load_segmentation(
+                segment_params, sampling_params=sampling_params
+            )
         elif segment:
             self.seg_level = self.segment_tissue(segment_params)
 
@@ -221,9 +235,7 @@ def get_level_spacing(self, level: int):
             self._level_spacing_cache[level] = self.spacings[level]
         return self._level_spacing_cache[level]
 
-    def get_best_level_for_spacing(
-        self, target_spacing: float, tolerance: float
-    ):
+    def get_best_level_for_spacing(self, target_spacing: float, tolerance: float):
         """
         Determines the best level in a multi-resolution image pyramid for a given target spacing.
 
@@ -331,8 +343,10 @@ def load_segmentation(
         self.annotation_mask = {"tissue": m}
         for annotation, val in sampling_params.pixel_mapping.items():
             if annotation != "background":
-                self.annotation_mask[annotation] = (mask == val).astype("uint8") * segment_params.sthresh_up
-       
+                self.annotation_mask[annotation] = (mask == val).astype(
+                    "uint8"
+                ) * segment_params.sthresh_up
+
         return seg_level
 
     def segment_tissue(
@@ -596,7 +610,9 @@ def filter_black_and_white_tiles(
                             spacing=tile_spacing,
                         )
                         # restrict coverage to valid portion of the slide
-                        x_downsample, y_downsample = int(x / downsample), int(y / downsample)
+                        x_downsample, y_downsample = int(x / downsample), int(
+                            y / downsample
+                        )
                         valid_w = int(min(tile_size, img_w - x_downsample))
                         valid_h = int(min(tile_size, img_h - y_downsample))
                         valid_tile = tile[:valid_h, :valid_w, :]
@@ -605,20 +621,14 @@ def filter_black_and_white_tiles(
                                 valid_tile[:, :, :3] > filter_params.white_threshold,
                                 axis=-1,
                             )
-                            if (
-                                np.mean(white_pixels)
-                                > filter_params.fraction_threshold
-                            ):
+                            if np.mean(white_pixels) > filter_params.fraction_threshold:
                                 keep = 0
                         if keep and filter_params.filter_black:
                             black_pixels = np.all(
                                 valid_tile[:, :, :3] < filter_params.black_threshold,
                                 axis=-1,
                             )
-                            if (
-                                np.mean(black_pixels)
-                                > filter_params.fraction_threshold
-                            ):
+                            if np.mean(black_pixels) > filter_params.fraction_threshold:
                                 keep = 0
                     except Exception as e:
                         error_count += 1
@@ -723,8 +733,12 @@ def detect_contours(
         target_scale = self.level_downsamples[self.seg_level]
         scale = tuple(a / b for a, b in zip(target_scale, current_scale))
         ref_tile_size = (filter_params.ref_tile_size, filter_params.ref_tile_size)
-        ref_tile_size_at_target_scale = tuple(a / b for a, b in zip(ref_tile_size, scale))
-        scaled_ref_tile_area = int(ref_tile_size_at_target_scale[0] * ref_tile_size_at_target_scale[1])
+        ref_tile_size_at_target_scale = tuple(
+            a / b for a, b in zip(ref_tile_size, scale)
+        )
+        scaled_ref_tile_area = int(
+            ref_tile_size_at_target_scale[0] * ref_tile_size_at_target_scale[1]
+        )
 
         adjusted_filter_params = FilterParameters(
             ref_tile_size=filter_params.ref_tile_size,
@@ -739,7 +753,11 @@ def detect_contours(
         )
 
         # find and filter contours
-        mask = self.annotation_mask["tissue"] if annotation is None else self.annotation_mask[annotation]
+        mask = (
+            self.annotation_mask["tissue"]
+            if annotation is None
+            else self.annotation_mask[annotation]
+        )
         if mask.ndim == 3:  # If the mask has 3 channels
             mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
 
@@ -985,7 +1003,7 @@ def process_contour(
             resize_factor >= 1
         ), f"Resize factor should be greater than or equal to 1. Got {resize_factor}"
 
-        tile_size_resized = int(round(target_tile_size * resize_factor,0))
+        tile_size_resized = int(round(target_tile_size * resize_factor, 0))
         step_size = int(tile_size_resized * (1.0 - overlap))
 
         if contour is not None:
@@ -1018,8 +1036,16 @@ def process_contour(
         scale = self.level_downsamples[self.seg_level]
         cont = self.scaleContourDim([contour], (1.0 / scale[0], 1.0 / scale[1]))[0]
 
-        mask = self.annotation_mask["tissue"] if annotation is None else self.annotation_mask[annotation]
-        pct = self.annotation_pct["tissue"] if annotation is None else self.annotation_pct[annotation]
+        mask = (
+            self.annotation_mask["tissue"]
+            if annotation is None
+            else self.annotation_mask[annotation]
+        )
+        pct = (
+            self.annotation_pct["tissue"]
+            if annotation is None
+            else self.annotation_pct[annotation]
+        )
         seg_spacing = self.get_level_spacing(self.seg_level)
         tissue_checker = HasEnoughTissue(
             contour=cont,
@@ -1060,7 +1086,8 @@ def process_contour(
 
         if drop_holes:
             keep_flags = [
-                flag and not self.isInHoles(contour_holes, coord, tile_size_at_level_0[0])
+                flag
+                and not self.isInHoles(contour_holes, coord, tile_size_at_level_0[0])
                 for flag, coord in zip(keep_flags, coord_candidates)
             ]
 
diff --git a/release.py b/release.py
index 263b5b3..fa70fc1 100644
--- a/release.py
+++ b/release.py
@@ -86,9 +86,18 @@ def open_release_draft(tag: str) -> None:
     import argparse
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--level", choices=["patch", "minor", "major"], default="patch", help="Version bump level")
-    parser.add_argument("--no-pr", action="store_true", help="Don't create pull request")
-    parser.add_argument("--no-draft", action="store_true", help="Don't open GitHub release page")
+    parser.add_argument(
+        "--level",
+        choices=["patch", "minor", "major"],
+        default="patch",
+        help="Version bump level",
+    )
+    parser.add_argument(
+        "--no-pr", action="store_true", help="Don't create pull request"
+    )
+    parser.add_argument(
+        "--no-draft", action="store_true", help="Don't open GitHub release page"
+    )
     args = parser.parse_args()
 
     # always start from main branch