From 8af1644a8fb7f5f27e3fb29becd858e784f4adb4 Mon Sep 17 00:00:00 2001 From: brendancol Date: Wed, 7 Jan 2026 11:08:01 -0500 Subject: [PATCH 1/6] added horizontal vertical unit mismatch heuristic to help address issue #840 --- xrspatial/slope.py | 9 +- xrspatial/tests/test_utils.py | 100 ++++++++++++++++++++++- xrspatial/utils.py | 150 ++++++++++++++++++++++++++++++++++ 3 files changed, 256 insertions(+), 3 deletions(-) diff --git a/xrspatial/slope.py b/xrspatial/slope.py index 042dfd27..8b72d8f5 100644 --- a/xrspatial/slope.py +++ b/xrspatial/slope.py @@ -22,7 +22,11 @@ class cupy(object): from numba import cuda # local modules -from xrspatial.utils import (ArrayTypeFunctionMapping, cuda_args, get_dataarray_resolution, ngjit) +from xrspatial.utils import ArrayTypeFunctionMapping +from xrspatial.utils import cuda_args +from xrspatial.utils import get_dataarray_resolution +from xrspatial.utils import ngjit +from xrspatial.utils import warn_if_unit_mismatch @ngjit @@ -178,6 +182,9 @@ def slope(agg: xr.DataArray, Dimensions without coordinates: dim_0, dim_1 """ + # warn if we strongly suspect degrees + meters mismatch + warn_if_unit_mismatch(agg) + cellsize_x, cellsize_y = get_dataarray_resolution(agg) mapper = ArrayTypeFunctionMapping( numpy_func=_run_numpy, diff --git a/xrspatial/tests/test_utils.py b/xrspatial/tests/test_utils.py index 1a3fcd41..7ba4f7d1 100644 --- a/xrspatial/tests/test_utils.py +++ b/xrspatial/tests/test_utils.py @@ -1,5 +1,11 @@ +import numpy as np +import xarray as xr +import pytest +import warnings + + from xrspatial.datasets import make_terrain -from xrspatial.utils import canvas_like +from xrspatial import utils from xrspatial.tests.general_checks import dask_array_available @@ -8,5 +14,95 @@ def test_canvas_like(): # aspect ratio is 1:1 terrain_shape = (1000, 1000) terrain = make_terrain(shape=terrain_shape) - terrain_res = canvas_like(terrain, width=50) + terrain_res = utils.canvas_like(terrain, width=50) assert terrain_res.shape == (50, 50) + + +def test_warn_if_unit_mismatch_degrees_horizontal_elevation_vertical(monkeypatch): + """ + If coordinates look like degrees (lon/lat) and values look like elevation + (e.g., meters), warn the user about a likely unit mismatch. + """ + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + + # Coordinates in degrees (lon/lat-ish) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, # elevation in meters + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(utils, "get_dataarray_resolution", fake_get_dataarray_resolution) + + # Here we *do* expect a warning + with pytest.warns(UserWarning, match="appears to have coordinates in degrees"): + utils.warn_if_unit_mismatch(da) + + +def test_warn_if_unit_mismatch_no_warning_for_projected_like_grid(monkeypatch): + """ + If coordinates look like projected linear units (e.g., meters) and values + look like elevation, we should NOT warn. + """ + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + + # Coordinates in meters (projected-looking) + y = np.arange(10) * 30.0 # 0, 30, 60, ... + x = 500_000.0 + np.arange(10) * 30.0 # UTM-ish eastings + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, # elevation in meters + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) # 30 m + + monkeypatch.setattr(utils, "get_dataarray_resolution", fake_get_dataarray_resolution) + + # Capture warnings using the stdlib warnings module + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + utils.warn_if_unit_mismatch(da) + + assert len(w) == 0, "Expected no warnings for projected-like coordinates" + + +def test_warn_if_unit_mismatch_degrees_but_angle_vertical(monkeypatch): + """ + If coordinates are in degrees but the DataArray itself looks like an angle + (e.g., units='degrees'), we should NOT warn; slope/aspect outputs fall into + this category. + """ + data = np.linspace(0, 90, 10 * 10, dtype=float).reshape(10, 10) + + # Coordinates in degrees again + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "degrees"}, # angle, not elevation + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(utils, "get_dataarray_resolution", fake_get_dataarray_resolution) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + utils.warn_if_unit_mismatch(da) + + assert len(w) == 0, "Expected no warnings when vertical units are angles" diff --git a/xrspatial/utils.py b/xrspatial/utils.py index 9bff209f..3b76d453 100644 --- a/xrspatial/utils.py +++ b/xrspatial/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations from math import ceil +import warnings import datashader as ds import datashader.transfer_functions as tf @@ -449,3 +450,152 @@ def _convert_color(c): _converted_colors = {k: _convert_color(v) for k, v in color_key.items()} f = np.vectorize(lambda v: _converted_colors.get(v, 0)) return tf.Image(f(agg.data)) + + + +def _infer_coord_unit_type(coord: xr.DataArray, cellsize: float) -> str: + """ + Heuristic to classify a spatial coordinate axis as: + - 'degrees' + - 'linear' (meters/feet/etc) + - 'unknown' + + Parameters + ---------- + coord : xr.DataArray + 1D coordinate variable (x or y). + cellsize : float + Mean spacing along this coordinate. + + Returns + ------- + str + """ + units = str(coord.attrs.get("units", "")).lower() + + # 1) Explicit units, if present + if "degree" in units or units in ("deg", "degrees"): + return "degrees" + if units in ("m", "meter", "metre", "meters", "metres", + "km", "kilometer", "kilometre", "kilometers", "kilometres", + "ft", "foot", "feet"): + return "linear" + + # 2) Numeric heuristics (very conservative) + vals = coord.values + if vals.size < 2 or not np.issubdtype(vals.dtype, np.number): + return "unknown" + + vmin = float(np.nanmin(vals)) + vmax = float(np.nanmax(vals)) + span = abs(vmax - vmin) + dx = abs(float(cellsize)) + + # Typical global geographic axes: span <= 360, spacing ~1e-5–0.5 deg + if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0: + if 1e-5 <= dx <= 0.5: + return "degrees" + + # Typical projected axes in meters: span >> 1, spacing > ~0.1 + # (e.g. UTM / national grids) + if span > 1000.0 and dx >= 0.1: + return "linear" + + return "unknown" + + +def _infer_vertical_unit_type(agg: xr.DataArray) -> str: + """ + Heuristic to classify the DataArray values as: + - 'elevation' (meters/feet etc) + - 'angle' (degrees/radians) + - 'unknown' + """ + units = str(agg.attrs.get("units", "")).lower() + + # 1) Explicit units + if any(k in units for k in ("degree", "deg")): + return "angle" + if "rad" in units: + return "angle" + if units in ("m", "meter", "metre", "meters", "metres", + "km", "kilometer", "kilometre", "kilometers", "kilometres", + "ft", "foot", "feet"): + return "elevation" + + # 2) Numeric heuristics on data range + data = agg.values + if not np.issubdtype(data.dtype, np.number): + return "unknown" + + finite = np.isfinite(data) + if not np.any(finite): + return "unknown" + + vmin = float(data[finite].min()) + vmax = float(data[finite].max()) + span = vmax - vmin + + # Elevation-like: tens–thousands of units, typical DEM ranges. + if 10.0 <= span <= 20000.0 and vmin > -500.0: + return "elevation" + + # Angle-like: often 0–360, -180–180, or small (-pi, pi) + if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0: + # If the span is not huge, treat as angle-ish + if span <= 720.0: + return "angle" + + return "unknown" + +def warn_if_unit_mismatch(agg: xr.DataArray) -> None: + """ + Heuristic check for horizontal vs vertical unit mismatch. + + Intended to catch the common case of: + - coordinates in degrees (lon/lat) + - elevation values in meters/feet + + Emits a UserWarning if a likely mismatch is detected. + """ + try: + cellsize_x, cellsize_y = get_dataarray_resolution(agg) + except Exception: + # If we can't even get a resolution, we also can't say much + return + + # pick "x" and "y" coords in a generic way: + # - typically dims are ('y', 'x') or ('lat', 'lon') + # - fall back to last two dims + if len(agg.dims) < 2: + return + + dim_y, dim_x = agg.dims[-2], agg.dims[-1] + coord_x = agg.coords.get(dim_x, None) + coord_y = agg.coords.get(dim_y, None) + + if coord_x is None or coord_y is None: + # Can't infer spatial types without coords + return + + horiz_x = _infer_coord_unit_type(coord_x, cellsize_x) + horiz_y = _infer_coord_unit_type(coord_y, cellsize_y) + vert = _infer_vertical_unit_type(agg) + + horiz_types = {horiz_x, horiz_y} - {"unknown"} + + # Only act if we have some signal about horizontal AND vertical + if not horiz_types or vert == "unknown": + return + + # If any axis looks like degrees and vertical looks like elevation, + # it's almost certainly "lat/lon degrees + meter elevations" + if "degrees" in horiz_types and vert == "elevation": + warnings.warn( + "xrspatial: input DataArray appears to have coordinates in degrees " + "but elevation values in a linear unit (e.g. meters/feet). " + "Slope/aspect operations expect horizontal distances in the same " + "units as vertical. Consider reprojecting to a projected CRS with " + "meter-based coordinates before calling `slope`.", + UserWarning, + ) From 33d0f5f257746096e470eb19028627028eef10d7 Mon Sep 17 00:00:00 2001 From: brendancol Date: Wed, 7 Jan 2026 16:25:48 -0500 Subject: [PATCH 2/6] added unit mismatch check to aspect --- xrspatial/aspect.py | 8 +++++++- xrspatial/tests/general_checks.py | 8 +++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/xrspatial/aspect.py b/xrspatial/aspect.py index 33ed3f37..00d64be3 100644 --- a/xrspatial/aspect.py +++ b/xrspatial/aspect.py @@ -14,7 +14,10 @@ import xarray as xr from numba import cuda -from xrspatial.utils import ArrayTypeFunctionMapping, cuda_args, ngjit +from xrspatial.utils import ArrayTypeFunctionMapping +from xrspatial.utils import cuda_args +from xrspatial.utils import ngjit +from xrspatial.utils import warn_if_unit_mismatch # 3rd-party try: @@ -262,6 +265,9 @@ def aspect(agg: xr.DataArray, dtype=float32) Dimensions without coordinates: y, x """ + + warn_if_unit_mismatch(agg) + mapper = ArrayTypeFunctionMapping( numpy_func=_run_numpy, dask_func=_run_dask_numpy, diff --git a/xrspatial/tests/general_checks.py b/xrspatial/tests/general_checks.py index 75011fd6..859952ad 100644 --- a/xrspatial/tests/general_checks.py +++ b/xrspatial/tests/general_checks.py @@ -32,7 +32,7 @@ def create_test_raster( backend='numpy', name='myraster', dims=['y', 'x'], - attrs={'res': (0.5, 0.5), 'crs': 'EPSG: 4326'}, + attrs={'res': (0.5, 0.5), 'crs': 'EPSG: 5070'}, chunks=(3, 3) ): raster = xr.DataArray(data, name=name, dims=dims, attrs=attrs) @@ -42,12 +42,14 @@ def create_test_raster( if attrs is not None: if 'res' in attrs: res = attrs['res'] + # set coords for test raster, 2D coords only raster[dims[0]] = np.linspace((data.shape[0] - 1) * res[0], 0, data.shape[0]) raster[dims[1]] = np.linspace(0, (data.shape[1] - 1) * res[1], data.shape[1]) - raster[dims[0]] = np.linspace((data.shape[0] - 1)/2, 0, data.shape[0]) - raster[dims[1]] = np.linspace(0, (data.shape[1] - 1)/2, data.shape[1]) + # assign units to coords + raster[dims[0]].attrs["units"] = "m" + raster[dims[1]].attrs["units"] = "m" if has_cuda_and_cupy() and 'cupy' in backend: import cupy From ab30e4885b8a71efdc9819fba86ac3513e1e492b Mon Sep 17 00:00:00 2001 From: brendancol Date: Wed, 7 Jan 2026 16:26:05 -0500 Subject: [PATCH 3/6] added unit mismatch check to curvature --- xrspatial/curvature.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xrspatial/curvature.py b/xrspatial/curvature.py index fb8708a1..0fbdf7c9 100644 --- a/xrspatial/curvature.py +++ b/xrspatial/curvature.py @@ -21,7 +21,11 @@ class cupy(object): from numba import cuda # local modules -from xrspatial.utils import (ArrayTypeFunctionMapping, cuda_args, get_dataarray_resolution, ngjit) +from xrspatial.utils import ArrayTypeFunctionMapping +from xrspatial.utils import cuda_args +from xrspatial.utils import get_dataarray_resolution +from xrspatial.utils import ngjit +from xrspatial.utils import warn_if_unit_mismatch @ngjit @@ -220,6 +224,7 @@ def curvature(agg: xr.DataArray, Attributes: res: (10, 10) """ + warn_if_unit_mismatch(agg) cellsize_x, cellsize_y = get_dataarray_resolution(agg) cellsize = (cellsize_x + cellsize_y) / 2 From ccbb894feaa3a2530a270064099c1c775995cd9d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 9 Jan 2026 11:18:51 -0800 Subject: [PATCH 4/6] fixed cupy and dask cases for the unit mismatch heuristic --- xrspatial/utils.py | 142 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 117 insertions(+), 25 deletions(-) diff --git a/xrspatial/utils.py b/xrspatial/utils.py index 3b76d453..75e63523 100644 --- a/xrspatial/utils.py +++ b/xrspatial/utils.py @@ -452,7 +452,6 @@ def _convert_color(c): return tf.Image(f(agg.data)) - def _infer_coord_unit_type(coord: xr.DataArray, cellsize: float) -> str: """ Heuristic to classify a spatial coordinate axis as: @@ -504,50 +503,40 @@ def _infer_coord_unit_type(coord: xr.DataArray, cellsize: float) -> str: return "unknown" -def _infer_vertical_unit_type(agg: xr.DataArray) -> str: - """ - Heuristic to classify the DataArray values as: - - 'elevation' (meters/feet etc) - - 'angle' (degrees/radians) - - 'unknown' - """ +def _infer_vertical_unit_type(agg): units = str(agg.attrs.get("units", "")).lower() - # 1) Explicit units - if any(k in units for k in ("degree", "deg")): - return "angle" - if "rad" in units: + # Cheap / reliable first + if any(k in units for k in ("degree", "deg")) or "rad" in units: return "angle" if units in ("m", "meter", "metre", "meters", "metres", "km", "kilometer", "kilometre", "kilometers", "kilometres", "ft", "foot", "feet"): return "elevation" - # 2) Numeric heuristics on data range - data = agg.values - if not np.issubdtype(data.dtype, np.number): + # Numeric fallback: sample only (never full compute) + data = agg.data + try: + vmin, vmax = _sample_windows_min_max(data, max_window_elems=65536, windows=5) + except Exception: return "unknown" - finite = np.isfinite(data) - if not np.any(finite): + if not np.isfinite(vmin) or not np.isfinite(vmax): return "unknown" - vmin = float(data[finite].min()) - vmax = float(data[finite].max()) span = vmax - vmin - # Elevation-like: tens–thousands of units, typical DEM ranges. + # Elevation-ish heuristic if 10.0 <= span <= 20000.0 and vmin > -500.0: return "elevation" - # Angle-like: often 0–360, -180–180, or small (-pi, pi) - if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0: - # If the span is not huge, treat as angle-ish - if span <= 720.0: - return "angle" + # Angle-ish heuristic + if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0 and span <= 720.0: + return "angle" return "unknown" + def warn_if_unit_mismatch(agg: xr.DataArray) -> None: """ Heuristic check for horizontal vs vertical unit mismatch. @@ -599,3 +588,106 @@ def warn_if_unit_mismatch(agg: xr.DataArray) -> None: "meter-based coordinates before calling `slope`.", UserWarning, ) + + +def _to_float_scalar(x) -> float: + """Convert numpy/cupy scalar or 0-d array to python float safely.""" + if cupy is not None: + # cupy.ndarray scalar + if isinstance(x, cupy.ndarray): + return float(cupy.asnumpy(x).item()) + # cupy scalar type + if x.__class__.__module__.startswith("cupy") and hasattr(x, "item"): + return float(x.item()) + + if hasattr(x, "item"): + return float(x.item()) + return float(x) + + +def _sample_windows_min_max( + data, + *, + max_window_elems: int = 65536, # e.g. 256x256 + windows: int = 5, # corners + center default +) -> tuple[float, float]: + """ + Estimate (nanmin, nanmax) from a small sample of windows. + + Works for numpy, cupy, dask+numpy, dask+cupy. Only computes on the sampled + windows, not the full array. + """ + # Normalize to last-2D sampling (y,x). For higher dims, sample first index. + if hasattr(data, "ndim") and data.ndim >= 3: + prefix = (0,) * (data.ndim - 2) + else: + prefix = () + + # Determine y/x sizes + shape = data.shape + ny, nx = shape[-2], shape[-1] + + if ny == 0 or nx == 0: + return np.nan, np.nan + + # Choose a square-ish window size bounded by array shape + w = int(np.sqrt(max_window_elems)) + w = max(1, min(w, ny, nx)) + + # Define window anchor positions: (top-left), (top-right), (bottom-left), (bottom-right), (center) + anchors = [ + (0, 0), + (0, max(0, nx - w)), + (max(0, ny - w), 0), + (max(0, ny - w), max(0, nx - w)), + ] + if windows >= 5: + anchors.append((max(0, ny // 2 - w // 2), max(0, nx // 2 - w // 2))) + + # If windows > 5, sprinkle additional evenly-spaced anchors (optional) + if windows > 5: + extra = windows - 5 + ys = np.linspace(0, max(0, ny - w), extra + 2, dtype=int)[1:-1] + xs = np.linspace(0, max(0, nx - w), extra + 2, dtype=int)[1:-1] + for y0, x0 in zip(ys, xs): + anchors.append((int(y0), int(x0))) + + # Reduce min/max across sampled windows + mins = [] + maxs = [] + + for y0, x0 in anchors: + sl = prefix + (slice(y0, y0 + w), slice(x0, x0 + w)) + win = data[sl] + + if da is not None and isinstance(win, da.Array): + # Compute scalars only on this window + mins.append(da.nanmin(win)) + maxs.append(da.nanmax(win)) + elif cupy is not None and isinstance(win, cupy.ndarray): + mins.append(cupy.nanmin(win)) + maxs.append(cupy.nanmax(win)) + else: + mins.append(np.nanmin(win)) + maxs.append(np.nanmax(win)) + + # Finalize: if dask, compute the scalar graph now (still tiny) + if da is not None and any(isinstance(m, da.Array) for m in mins): + mn = da.nanmin(da.stack(mins)).compute() + mx = da.nanmax(da.stack(maxs)).compute() + return _to_float_scalar(mn), _to_float_scalar(mx) + + # If cupy scalars, convert safely + if cupy is not None and (any(isinstance(m, cupy.ndarray) for m in mins) or + any(getattr(m.__class__, "__module__", "").startswith("cupy") for m in mins)): + mn = mins[0] + mx = maxs[0] + # reduce on device + for m in mins[1:]: + mn = cupy.minimum(mn, m) + for m in maxs[1:]: + mx = cupy.maximum(mx, m) + return _to_float_scalar(mn), _to_float_scalar(mx) + + # numpy scalars + return float(np.nanmin(np.array(mins, dtype=float))), float(np.nanmax(np.array(maxs, dtype=float))) From d44f4518d17ee3bf100faa11e5c4418c9ef2e640 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 22 Jan 2026 06:55:34 -0800 Subject: [PATCH 5/6] removed the checks at beginning of slope, aspect, and curvature; added diagnostics module; --- xrspatial/__init__.py | 1 + xrspatial/aspect.py | 3 - xrspatial/curvature.py | 3 - xrspatial/diagnostics.py | 166 +++++++++++++++ xrspatial/slope.py | 4 - xrspatial/tests/test_diagnostics.py | 316 ++++++++++++++++++++++++++++ 6 files changed, 483 insertions(+), 10 deletions(-) create mode 100644 xrspatial/diagnostics.py create mode 100644 xrspatial/tests/test_diagnostics.py diff --git a/xrspatial/__init__.py b/xrspatial/__init__.py index cafab640..80b4baac 100644 --- a/xrspatial/__init__.py +++ b/xrspatial/__init__.py @@ -1,6 +1,7 @@ from xrspatial.aspect import aspect # noqa from xrspatial.bump import bump # noqa from xrspatial.classify import binary # noqa +from xrspatial.diagnostics import diagnose # noqa from xrspatial.classify import equal_interval # noqa from xrspatial.classify import natural_breaks # noqa from xrspatial.classify import quantile # noqa diff --git a/xrspatial/aspect.py b/xrspatial/aspect.py index 00d64be3..57ad079b 100644 --- a/xrspatial/aspect.py +++ b/xrspatial/aspect.py @@ -17,7 +17,6 @@ from xrspatial.utils import ArrayTypeFunctionMapping from xrspatial.utils import cuda_args from xrspatial.utils import ngjit -from xrspatial.utils import warn_if_unit_mismatch # 3rd-party try: @@ -266,8 +265,6 @@ def aspect(agg: xr.DataArray, Dimensions without coordinates: y, x """ - warn_if_unit_mismatch(agg) - mapper = ArrayTypeFunctionMapping( numpy_func=_run_numpy, dask_func=_run_dask_numpy, diff --git a/xrspatial/curvature.py b/xrspatial/curvature.py index 0fbdf7c9..97eff8d0 100644 --- a/xrspatial/curvature.py +++ b/xrspatial/curvature.py @@ -25,7 +25,6 @@ class cupy(object): from xrspatial.utils import cuda_args from xrspatial.utils import get_dataarray_resolution from xrspatial.utils import ngjit -from xrspatial.utils import warn_if_unit_mismatch @ngjit @@ -224,8 +223,6 @@ def curvature(agg: xr.DataArray, Attributes: res: (10, 10) """ - warn_if_unit_mismatch(agg) - cellsize_x, cellsize_y = get_dataarray_resolution(agg) cellsize = (cellsize_x + cellsize_y) / 2 diff --git a/xrspatial/diagnostics.py b/xrspatial/diagnostics.py new file mode 100644 index 00000000..a28401c7 --- /dev/null +++ b/xrspatial/diagnostics.py @@ -0,0 +1,166 @@ +""" +Diagnostics module for xarray-spatial. + +Provides utilities to help users identify common pitfalls and issues +with DataArrays before running xarray-spatial operations. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import List, Optional + +import xarray as xr + +from xrspatial.utils import ( + _infer_coord_unit_type, + _infer_vertical_unit_type, + get_dataarray_resolution, +) + + +@dataclass +class DiagnosticIssue: + """Represents a single diagnostic issue found during analysis.""" + code: str + severity: str # 'warning' or 'error' + message: str + suggestion: str + + +@dataclass +class DiagnosticReport: + """Results from diagnosing a DataArray.""" + issues: List[DiagnosticIssue] = field(default_factory=list) + horizontal_unit_type: Optional[str] = None + vertical_unit_type: Optional[str] = None + resolution: Optional[tuple] = None + + @property + def has_issues(self) -> bool: + return len(self.issues) > 0 + + @property + def has_warnings(self) -> bool: + return any(i.severity == 'warning' for i in self.issues) + + @property + def has_errors(self) -> bool: + return any(i.severity == 'error' for i in self.issues) + + def __str__(self) -> str: + if not self.issues: + return "No issues detected." + + lines = [] + for issue in self.issues: + lines.append(f"[{issue.severity.upper()}] {issue.code}: {issue.message}") + lines.append(f" Suggestion: {issue.suggestion}") + return "\n".join(lines) + + +def _check_unit_mismatch(agg: xr.DataArray, report: DiagnosticReport) -> None: + """ + Check for horizontal vs vertical unit mismatch. + + Detects the common case of coordinates in degrees (lon/lat) with + elevation values in meters/feet. + """ + try: + cellsize_x, cellsize_y = get_dataarray_resolution(agg) + report.resolution = (cellsize_x, cellsize_y) + except Exception: + return + + if len(agg.dims) < 2: + return + + dim_y, dim_x = agg.dims[-2], agg.dims[-1] + coord_x = agg.coords.get(dim_x, None) + coord_y = agg.coords.get(dim_y, None) + + if coord_x is None or coord_y is None: + return + + horiz_x = _infer_coord_unit_type(coord_x, cellsize_x) + horiz_y = _infer_coord_unit_type(coord_y, cellsize_y) + vert = _infer_vertical_unit_type(agg) + + report.vertical_unit_type = vert + + horiz_types = {horiz_x, horiz_y} - {"unknown"} + if horiz_types: + report.horizontal_unit_type = next(iter(horiz_types)) + + if not horiz_types or vert == "unknown": + return + + if "degrees" in horiz_types and vert == "elevation": + report.issues.append(DiagnosticIssue( + code="UNIT_MISMATCH", + severity="warning", + message=( + "Input DataArray appears to have coordinates in degrees " + "but elevation values in a linear unit (e.g. meters/feet)." + ), + suggestion=( + "Slope/aspect/curvature operations expect horizontal distances " + "in the same units as vertical. Consider reprojecting to a " + "projected CRS with meter-based coordinates." + ), + )) + + +def diagnose(agg: xr.DataArray, tool: Optional[str] = None) -> DiagnosticReport: + """ + Diagnose a DataArray for common xarray-spatial pitfalls. + + Runs a series of heuristic checks to identify potential issues + that could lead to incorrect results when using xarray-spatial + functions. + + Parameters + ---------- + agg : xr.DataArray + The input DataArray to diagnose. + tool : str, optional + Name of the xarray-spatial tool you intend to use (e.g., 'slope', + 'aspect', 'curvature'). When specified, only diagnostics relevant + to that tool are run. If None, all diagnostics are run. + + Returns + ------- + DiagnosticReport + A report containing any issues found, along with inferred + metadata about the DataArray. + + Examples + -------- + >>> import xarray as xr + >>> import numpy as np + >>> from xrspatial.diagnostics import diagnose + >>> # Create a DataArray with lon/lat coordinates but meter elevations + >>> data = np.random.rand(100, 100) * 1000 + 500 + >>> da = xr.DataArray( + ... data, + ... dims=['y', 'x'], + ... coords={ + ... 'y': np.linspace(40.0, 41.0, 100), + ... 'x': np.linspace(-105.0, -104.0, 100), + ... } + ... ) + >>> report = diagnose(da) + >>> print(report) + [WARNING] UNIT_MISMATCH: Input DataArray appears to have coordinates... + >>> if report.has_warnings: + ... print("Consider reprojecting your data!") + """ + report = DiagnosticReport() + + # Tools that are sensitive to unit mismatch + unit_mismatch_tools = {'slope', 'aspect', 'curvature', 'hillshade'} + + # Run unit mismatch check if tool is relevant or no tool specified + if tool is None or tool.lower() in unit_mismatch_tools: + _check_unit_mismatch(agg, report) + + return report diff --git a/xrspatial/slope.py b/xrspatial/slope.py index 8b72d8f5..cd2aff87 100644 --- a/xrspatial/slope.py +++ b/xrspatial/slope.py @@ -26,7 +26,6 @@ class cupy(object): from xrspatial.utils import cuda_args from xrspatial.utils import get_dataarray_resolution from xrspatial.utils import ngjit -from xrspatial.utils import warn_if_unit_mismatch @ngjit @@ -182,9 +181,6 @@ def slope(agg: xr.DataArray, Dimensions without coordinates: dim_0, dim_1 """ - # warn if we strongly suspect degrees + meters mismatch - warn_if_unit_mismatch(agg) - cellsize_x, cellsize_y = get_dataarray_resolution(agg) mapper = ArrayTypeFunctionMapping( numpy_func=_run_numpy, diff --git a/xrspatial/tests/test_diagnostics.py b/xrspatial/tests/test_diagnostics.py new file mode 100644 index 00000000..8da29025 --- /dev/null +++ b/xrspatial/tests/test_diagnostics.py @@ -0,0 +1,316 @@ +import numpy as np +import xarray as xr +import pytest + +from xrspatial import diagnostics +from xrspatial.diagnostics import diagnose, DiagnosticReport, DiagnosticIssue + + +class TestDiagnosticIssue: + def test_dataclass_fields(self): + issue = DiagnosticIssue( + code="TEST_CODE", + severity="warning", + message="Test message", + suggestion="Test suggestion", + ) + assert issue.code == "TEST_CODE" + assert issue.severity == "warning" + assert issue.message == "Test message" + assert issue.suggestion == "Test suggestion" + + +class TestDiagnosticReport: + def test_empty_report(self): + report = DiagnosticReport() + assert report.issues == [] + assert report.horizontal_unit_type is None + assert report.vertical_unit_type is None + assert report.resolution is None + assert not report.has_issues + assert not report.has_warnings + assert not report.has_errors + + def test_has_issues_with_warning(self): + report = DiagnosticReport() + report.issues.append(DiagnosticIssue( + code="TEST", + severity="warning", + message="msg", + suggestion="sug", + )) + assert report.has_issues + assert report.has_warnings + assert not report.has_errors + + def test_has_issues_with_error(self): + report = DiagnosticReport() + report.issues.append(DiagnosticIssue( + code="TEST", + severity="error", + message="msg", + suggestion="sug", + )) + assert report.has_issues + assert not report.has_warnings + assert report.has_errors + + def test_str_no_issues(self): + report = DiagnosticReport() + assert str(report) == "No issues detected." + + def test_str_with_issues(self): + report = DiagnosticReport() + report.issues.append(DiagnosticIssue( + code="UNIT_MISMATCH", + severity="warning", + message="Test message", + suggestion="Test suggestion", + )) + output = str(report) + assert "[WARNING] UNIT_MISMATCH: Test message" in output + assert "Suggestion: Test suggestion" in output + + +class TestDiagnoseUnitMismatch: + def test_degrees_horizontal_elevation_vertical(self, monkeypatch): + """ + If coordinates look like degrees (lon/lat) and values look like elevation + (e.g., meters), diagnose should report a UNIT_MISMATCH warning. + """ + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + + # Coordinates in degrees (lon/lat-ish) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, # elevation in meters + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da) + + assert report.has_issues + assert report.has_warnings + assert len(report.issues) == 1 + assert report.issues[0].code == "UNIT_MISMATCH" + assert report.horizontal_unit_type == "degrees" + assert report.vertical_unit_type == "elevation" + + def test_no_warning_for_projected_like_grid(self, monkeypatch): + """ + If coordinates look like projected linear units (e.g., meters) and values + look like elevation, we should NOT report any issues. + """ + data = np.linspace(0, 999, 100 * 100, dtype=float).reshape(100, 100) + + # Coordinates in meters (projected-looking) with larger span + y = 4_500_000.0 + np.arange(100) * 30.0 # UTM-ish northings + x = 500_000.0 + np.arange(100) * 30.0 # UTM-ish eastings + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, # elevation in meters + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) # 30 m + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da) + + assert not report.has_issues + assert report.horizontal_unit_type == "linear" + assert report.vertical_unit_type == "elevation" + + def test_degrees_but_angle_vertical(self, monkeypatch): + """ + If coordinates are in degrees but the DataArray itself looks like an angle + (e.g., units='degrees'), we should NOT report issues; slope/aspect outputs + fall into this category. + """ + data = np.linspace(0, 90, 10 * 10, dtype=float).reshape(10, 10) + + # Coordinates in degrees + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "degrees"}, # angle, not elevation + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da) + + assert not report.has_issues + assert report.horizontal_unit_type == "degrees" + assert report.vertical_unit_type == "angle" + + +class TestDiagnoseWithToolParameter: + def test_slope_tool_runs_unit_mismatch_check(self, monkeypatch): + """Unit mismatch check should run when tool='slope'.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da, tool='slope') + assert report.has_issues + assert report.issues[0].code == "UNIT_MISMATCH" + + def test_aspect_tool_runs_unit_mismatch_check(self, monkeypatch): + """Unit mismatch check should run when tool='aspect'.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da, tool='aspect') + assert report.has_issues + + def test_curvature_tool_runs_unit_mismatch_check(self, monkeypatch): + """Unit mismatch check should run when tool='curvature'.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da, tool='curvature') + assert report.has_issues + + def test_hillshade_tool_runs_unit_mismatch_check(self, monkeypatch): + """Unit mismatch check should run when tool='hillshade'.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da, tool='hillshade') + assert report.has_issues + + def test_tool_name_case_insensitive(self, monkeypatch): + """Tool name matching should be case insensitive.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.linspace(5.0, 5.0025, 10) + x = np.linspace(-74.93, -74.9275, 10) + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + attrs={"units": "m"}, + ) + + def fake_get_dataarray_resolution(arr): + return float(x[1] - x[0]), float(y[1] - y[0]) + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da, tool='SLOPE') + assert report.has_issues + + +class TestDiagnoseEdgeCases: + def test_1d_array_no_crash(self): + """diagnose should handle 1D arrays gracefully.""" + da = xr.DataArray(np.arange(10), dims=["x"]) + report = diagnose(da) + assert not report.has_issues + + def test_missing_coords_no_crash(self): + """diagnose should handle arrays without coordinates gracefully.""" + da = xr.DataArray(np.random.rand(10, 10), dims=["y", "x"]) + # No coords assigned + report = diagnose(da) + # Should not crash, may or may not have issues depending on heuristics + assert isinstance(report, DiagnosticReport) + + def test_resolution_stored_in_report(self, monkeypatch): + """Resolution should be stored in the report.""" + data = np.linspace(0, 999, 10 * 10, dtype=float).reshape(10, 10) + y = np.arange(10) * 30.0 + x = 500_000.0 + np.arange(10) * 30.0 + + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": y, "x": x}, + ) + + def fake_get_dataarray_resolution(arr): + return 30.0, 30.0 + + monkeypatch.setattr(diagnostics, "get_dataarray_resolution", fake_get_dataarray_resolution) + + report = diagnose(da) + assert report.resolution == (30.0, 30.0) + + +class TestTopLevelImport: + def test_diagnose_importable_from_xrspatial(self): + """diagnose should be importable from the top-level xrspatial package.""" + from xrspatial import diagnose as diagnose_top + assert diagnose_top is diagnose From ef61ed3cc88eb99f18e55ebc36df297139f08b3e Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 22 Jan 2026 07:09:39 -0800 Subject: [PATCH 6/6] added explicit dtype to handle new pandas compatibility --- xrspatial/focal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 79a595cf..a198c9ac 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -768,7 +768,7 @@ def _focal_stats_cupy(agg, kernel, stats_funcs): attrs=agg.attrs ) stats_aggs.append(stats_agg) - stats = xr.concat(stats_aggs, pd.Index(stats_funcs, name='stats')) + stats = xr.concat(stats_aggs, pd.Index(stats_funcs, name='stats', dtype=object)) return stats @@ -786,7 +786,7 @@ def _focal_stats_cpu(agg, kernel, stats_funcs): for stats in stats_funcs: stats_agg = apply(agg, kernel, func=_function_mapping[stats]) stats_aggs.append(stats_agg) - stats = xr.concat(stats_aggs, pd.Index(stats_funcs, name='stats')) + stats = xr.concat(stats_aggs, pd.Index(stats_funcs, name='stats', dtype=object)) return stats