From aff0975ae8e3820f826f8d255aef8705217fd4ca Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 03:38:02 +0000 Subject: [PATCH 01/11] #240 Add function to apply chunked statistical operation --- decode/__init__.py | 2 + decode/stats.py | 143 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 decode/stats.py diff --git a/decode/__init__.py b/decode/__init__.py index e6596e5..d9e8183 100644 --- a/decode/__init__.py +++ b/decode/__init__.py @@ -8,6 +8,7 @@ "plot", "qlook", "select", + "stats", "utils", ] __version__ = "2025.4.2" @@ -23,4 +24,5 @@ from . import plot from . import qlook from . import select +from . import stats from . import utils diff --git a/decode/stats.py b/decode/stats.py new file mode 100644 index 0000000..dad2527 --- /dev/null +++ b/decode/stats.py @@ -0,0 +1,143 @@ +__all__ = ["apply"] + + +# standard library +from collections.abc import Callable, Hashable, Iterable, Sequence +from typing import Any, Literal, Optional, Union + + +# dependencies +import numpy as np +import xarray as xr +from numpy.typing import NDArray +from xarray.core.types import Dims + + +# constants +FIRST_INDEX = 0 +LAST_INDEX = -1 +NUMERIC_KINDS = "biufcmM" + + +# type hints +Boundary = Literal["exact", "trim", "pad"] +Side = Literal["left", "right"] +Stat = Union[Callable[..., Any], str] + + +def apply( + da: xr.DataArray, + func: Stat, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) statistical operation to a DataArray. + + Args: + da: Input DataArray. + func: Function or name of the statistical operation (e.g. ``'mean'``). + dim: Name(s) of the dimension(s) along which the statistical operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the statistical operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the statistical operation. + keep_attrs: Whether to keep attributes in the statistical operation. + **options: Other options to be passed to the statistical operation. + + Returns: + DataArray that the (chunked) statistical operation is applied. + + """ + if isinstance(dim, dict): + pass + elif dim is ... or dim is None: + dim = da.sizes + elif isinstance(dim, str): + dim = {dim: da.sizes[dim]} + elif isinstance(dim, Iterable): + dim = {d: da.sizes[d] for d in dim} + + coord_func: dict[Hashable, Stat] = {} + + for name, coord in da.coords.items(): + if coord.dtype.kind in NUMERIC_KINDS: + coord_func[name] = numeric_coord_func + else: + if nonnumeric_coord_func == "first": + coord_func[name] = _first + elif nonnumeric_coord_func == "last": + coord_func[name] = _last + else: + coord_func[name] = nonnumeric_coord_func + + coarsened = da.coarsen( + dim, + boundary=boundary, + coord_func=coord_func, + side=side, + ) + + if isinstance(func, str): + return getattr(coarsened, func)( + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + if callable(func): + return coarsened.reduce( + func=func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + raise TypeError("Func must be either callable or string.") + + +def _first( + array: NDArray[Any], + axis: Union[Sequence[int], int, None], + **kwargs: Any, +) -> NDArray[Any]: + """Similar to numpy.take(array, 0, axis) but supports multiple axes.""" + if not isinstance(axis, Sequence): + return np.take(array, FIRST_INDEX, axis=axis) + + slices: list[Union[slice, int]] = [slice(None)] * array.ndim + + for ax in axis: + slices[ax] = FIRST_INDEX + + return array[tuple(slices)] + + +def _last( + array: NDArray[Any], + axis: Union[Sequence[int], int, None], + **kwargs: Any, +) -> NDArray[Any]: + """Similar to numpy.take(array, -1, axis) but supports multiple axes.""" + if not isinstance(axis, Sequence): + return np.take(array, LAST_INDEX, axis=axis) + + slices: list[Union[slice, int]] = [slice(None)] * array.ndim + + for ax in axis: + slices[ax] = LAST_INDEX + + return array[tuple(slices)] From bbaa3ee8f6ca6d63fa6349dc64fbe0c0c04571fb Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 03:53:29 +0000 Subject: [PATCH 02/11] #240 Add each stat functions --- decode/stats.py | 555 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 553 insertions(+), 2 deletions(-) diff --git a/decode/stats.py b/decode/stats.py index dad2527..1c66d0e 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -1,4 +1,16 @@ -__all__ = ["apply"] +__all__ = [ + "all", + "any", + "count", + "max", + "mean", + "median", + "min", + "prod", + "std", + "sum", + "var", +] # standard library @@ -25,7 +37,546 @@ Stat = Union[Callable[..., Any], str] -def apply( +def all( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``all`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``all`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``all`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``all`` operation. + keep_attrs: Whether to keep attributes in the ``all`` operation. + **options: Other options to be passed to the ``all`` operation. + + Returns: + DataArray that the (chunked) ``all`` operation is applied. + + """ + return _apply( + da, + "all", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def any( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``any`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``any`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``any`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``any`` operation. + keep_attrs: Whether to keep attributes in the ``any`` operation. + **options: Other options to be passed to the ``any`` operation. + + Returns: + DataArray that the (chunked) ``any`` operation is applied. + + """ + return _apply( + da, + "any", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def count( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``count`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``count`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``count`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``count`` operation. + keep_attrs: Whether to keep attributes in the ``count`` operation. + **options: Other options to be passed to the ``count`` operation. + + Returns: + DataArray that the (chunked) ``count`` operation is applied. + + """ + return _apply( + da, + "count", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def max( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``max`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``max`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``max`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``max`` operation. + keep_attrs: Whether to keep attributes in the ``max`` operation. + **options: Other options to be passed to the ``max`` operation. + + Returns: + DataArray that the (chunked) ``max`` operation is applied. + + """ + return _apply( + da, + "max", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def mean( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``mean`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``mean`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``mean`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``mean`` operation. + keep_attrs: Whether to keep attributes in the ``mean`` operation. + **options: Other options to be passed to the ``mean`` operation. + + Returns: + DataArray that the (chunked) ``mean`` operation is applied. + + """ + return _apply( + da, + "mean", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def median( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``median`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``median`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``median`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``median`` operation. + keep_attrs: Whether to keep attributes in the ``median`` operation. + **options: Other options to be passed to the ``median`` operation. + + Returns: + DataArray that the (chunked) ``median`` operation is applied. + + """ + return _apply( + da, + "median", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def min( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``min`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``min`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``min`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``min`` operation. + keep_attrs: Whether to keep attributes in the ``min`` operation. + **options: Other options to be passed to the ``min`` operation. + + Returns: + DataArray that the (chunked) ``min`` operation is applied. + + """ + return _apply( + da, + "min", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def prod( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``prod`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``prod`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``prod`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``prod`` operation. + keep_attrs: Whether to keep attributes in the ``prod`` operation. + **options: Other options to be passed to the ``prod`` operation. + + Returns: + DataArray that the (chunked) ``prod`` operation is applied. + + """ + return _apply( + da, + "prod", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def std( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``std`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``std`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``std`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``std`` operation. + keep_attrs: Whether to keep attributes in the ``std`` operation. + **options: Other options to be passed to the ``std`` operation. + + Returns: + DataArray that the (chunked) ``std`` operation is applied. + + """ + return _apply( + da, + "std", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def sum( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``sum`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``sum`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``sum`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``sum`` operation. + keep_attrs: Whether to keep attributes in the ``sum`` operation. + **options: Other options to be passed to the ``sum`` operation. + + Returns: + DataArray that the (chunked) ``sum`` operation is applied. + + """ + return _apply( + da, + "sum", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def var( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + skipna: Optional[bool] = None, + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``var`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``var`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``var`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + skipna: Whether to ignore missing values in the ``var`` operation. + keep_attrs: Whether to keep attributes in the ``var`` operation. + **options: Other options to be passed to the ``var`` operation. + + Returns: + DataArray that the (chunked) ``var`` operation is applied. + + """ + return _apply( + da, + "var", + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + skipna=skipna, + keep_attrs=keep_attrs, + **options, + ) + + +def _apply( da: xr.DataArray, func: Stat, /, From 5a736ebae217ed73f01c881e3f5b134b4a29aa26 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 03:59:03 +0000 Subject: [PATCH 03/11] #240 Add squeeze before returning DataArray --- decode/stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/decode/stats.py b/decode/stats.py index 1c66d0e..40b7bed 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -647,7 +647,7 @@ def _apply( skipna=skipna, keep_attrs=keep_attrs, **options, - ) + ).squeeze() if callable(func): return coarsened.reduce( @@ -655,7 +655,7 @@ def _apply( skipna=skipna, keep_attrs=keep_attrs, **options, - ) + ).squeeze() raise TypeError("Func must be either callable or string.") From cd8be20b42df7724a826ab74c513e163fd7d69a3 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 04:48:33 +0000 Subject: [PATCH 04/11] #240 Fix to remove skipna option from any, all, count --- decode/stats.py | 59 +++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/decode/stats.py b/decode/stats.py index 40b7bed..3384ad7 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -46,7 +46,6 @@ def all( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, **options: Any, ) -> xr.DataArray: @@ -64,7 +63,6 @@ def all( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``all`` operation. keep_attrs: Whether to keep attributes in the ``all`` operation. **options: Other options to be passed to the ``all`` operation. @@ -80,7 +78,6 @@ def all( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, **options, ) @@ -95,7 +92,6 @@ def any( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, **options: Any, ) -> xr.DataArray: @@ -113,7 +109,6 @@ def any( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``any`` operation. keep_attrs: Whether to keep attributes in the ``any`` operation. **options: Other options to be passed to the ``any`` operation. @@ -129,7 +124,6 @@ def any( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, **options, ) @@ -144,7 +138,6 @@ def count( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, **options: Any, ) -> xr.DataArray: @@ -162,7 +155,6 @@ def count( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``count`` operation. keep_attrs: Whether to keep attributes in the ``count`` operation. **options: Other options to be passed to the ``count`` operation. @@ -178,7 +170,6 @@ def count( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, **options, ) @@ -193,8 +184,8 @@ def max( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``max`` operation to a DataArray. @@ -211,8 +202,8 @@ def max( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``max`` operation. keep_attrs: Whether to keep attributes in the ``max`` operation. + skipna: Whether to ignore missing values in the ``max`` operation. **options: Other options to be passed to the ``max`` operation. Returns: @@ -227,8 +218,8 @@ def max( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -242,8 +233,8 @@ def mean( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``mean`` operation to a DataArray. @@ -260,8 +251,8 @@ def mean( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``mean`` operation. keep_attrs: Whether to keep attributes in the ``mean`` operation. + skipna: Whether to ignore missing values in the ``mean`` operation. **options: Other options to be passed to the ``mean`` operation. Returns: @@ -276,8 +267,8 @@ def mean( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -291,8 +282,8 @@ def median( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``median`` operation to a DataArray. @@ -309,8 +300,8 @@ def median( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``median`` operation. keep_attrs: Whether to keep attributes in the ``median`` operation. + skipna: Whether to ignore missing values in the ``median`` operation. **options: Other options to be passed to the ``median`` operation. Returns: @@ -325,8 +316,8 @@ def median( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -340,8 +331,8 @@ def min( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``min`` operation to a DataArray. @@ -358,8 +349,8 @@ def min( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``min`` operation. keep_attrs: Whether to keep attributes in the ``min`` operation. + skipna: Whether to ignore missing values in the ``min`` operation. **options: Other options to be passed to the ``min`` operation. Returns: @@ -374,8 +365,8 @@ def min( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -389,8 +380,8 @@ def prod( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``prod`` operation to a DataArray. @@ -407,8 +398,8 @@ def prod( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``prod`` operation. keep_attrs: Whether to keep attributes in the ``prod`` operation. + skipna: Whether to ignore missing values in the ``prod`` operation. **options: Other options to be passed to the ``prod`` operation. Returns: @@ -423,8 +414,8 @@ def prod( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -438,8 +429,8 @@ def std( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``std`` operation to a DataArray. @@ -456,8 +447,8 @@ def std( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``std`` operation. keep_attrs: Whether to keep attributes in the ``std`` operation. + skipna: Whether to ignore missing values in the ``std`` operation. **options: Other options to be passed to the ``std`` operation. Returns: @@ -472,8 +463,8 @@ def std( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -487,8 +478,8 @@ def sum( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``sum`` operation to a DataArray. @@ -505,8 +496,8 @@ def sum( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``sum`` operation. keep_attrs: Whether to keep attributes in the ``sum`` operation. + skipna: Whether to ignore missing values in the ``sum`` operation. **options: Other options to be passed to the ``sum`` operation. Returns: @@ -521,8 +512,8 @@ def sum( side=side, numeric_coord_func=numeric_coord_func, nonnumeric_coord_func=nonnumeric_coord_func, - skipna=skipna, keep_attrs=keep_attrs, + skipna=skipna, **options, ) @@ -536,8 +527,8 @@ def var( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, + skipna: Optional[bool] = None, **options: Any, ) -> xr.DataArray: """Apply a (chunked) ``var`` operation to a DataArray. @@ -554,8 +545,8 @@ def var( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the ``var`` operation. keep_attrs: Whether to keep attributes in the ``var`` operation. + skipna: Whether to ignore missing values in the ``var`` operation. **options: Other options to be passed to the ``var`` operation. Returns: @@ -586,7 +577,6 @@ def _apply( side: Union[Side, dict[Hashable, Side]] = "left", numeric_coord_func: Stat = "mean", nonnumeric_coord_func: Stat = "first", - skipna: Optional[bool] = None, keep_attrs: Optional[bool] = None, **options: Any, ) -> xr.DataArray: @@ -605,7 +595,6 @@ def _apply( for the numeric coordinates (bool, numbers, datetime, timedelta). nonnumeric_coord_func: Function or name of the statistical operation for the non-numeric coordinates (str, bytes, and general object). - skipna: Whether to ignore missing values in the statistical operation. keep_attrs: Whether to keep attributes in the statistical operation. **options: Other options to be passed to the statistical operation. @@ -644,7 +633,6 @@ def _apply( if isinstance(func, str): return getattr(coarsened, func)( - skipna=skipna, keep_attrs=keep_attrs, **options, ).squeeze() @@ -652,7 +640,6 @@ def _apply( if callable(func): return coarsened.reduce( func=func, - skipna=skipna, keep_attrs=keep_attrs, **options, ).squeeze() From ebff2af4555f0cc19efef8f868ec155a8d4ce720 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 04:50:20 +0000 Subject: [PATCH 05/11] #240 Accept first and last as numeric_coord_func --- decode/stats.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/decode/stats.py b/decode/stats.py index 3384ad7..e3d273f 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -613,16 +613,21 @@ def _apply( coord_func: dict[Hashable, Stat] = {} + if numeric_coord_func == "first": + numeric_coord_func = _first + elif numeric_coord_func == "last": + numeric_coord_func = _last + + if nonnumeric_coord_func == "first": + nonnumeric_coord_func = _first + elif nonnumeric_coord_func == "last": + nonnumeric_coord_func = _last + for name, coord in da.coords.items(): if coord.dtype.kind in NUMERIC_KINDS: coord_func[name] = numeric_coord_func else: - if nonnumeric_coord_func == "first": - coord_func[name] = _first - elif nonnumeric_coord_func == "last": - coord_func[name] = _last - else: - coord_func[name] = nonnumeric_coord_func + coord_func[name] = nonnumeric_coord_func coarsened = da.coarsen( dim, From da35df551e1a4a52ea56befa0c9d2325af4df7de Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:07:14 +0000 Subject: [PATCH 06/11] #240 Add tests for stats module --- decode/stats.py | 2 +- tests/test_stats.py | 221 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 tests/test_stats.py diff --git a/decode/stats.py b/decode/stats.py index e3d273f..42b924d 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -49,7 +49,7 @@ def all( keep_attrs: Optional[bool] = None, **options: Any, ) -> xr.DataArray: - """Apply a (chunked) ``all`` operation to a DataArray. + """Apply a (chunked) ``all`` operation to a DataArray. Args: da: Input DataArray. diff --git a/tests/test_stats.py b/tests/test_stats.py new file mode 100644 index 0000000..f60f73e --- /dev/null +++ b/tests/test_stats.py @@ -0,0 +1,221 @@ +# dependencies +import numpy as np +import xarray as xr +from decode import stats + + +DATA = xr.DataArray( + np.arange(36).reshape(6, 6), + dims=("x", "y"), + coords={ + "x": np.arange(0, 6), + "y": np.arange(6, 12), + "c1": ("x", np.array(list("abcdef"))), + "c2": ("x", np.arange(6)), + }, +) + + +def test_all() -> None: + expected = xr.DataArray( + np.array([[False, True], [True, True], [True, True]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.all(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_any() -> None: + expected = xr.DataArray( + np.array([[True, True], [True, True], [True, True]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.any(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_count() -> None: + expected = xr.DataArray( + np.array([[6, 6], [6, 6], [6, 6]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.count(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_max() -> None: + expected = xr.DataArray( + np.array([[8, 11], [20, 23], [32, 35]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.max(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_mean() -> None: + expected = xr.DataArray( + np.array([[4, 7], [16, 19], [28, 31]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.mean(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_median() -> None: + expected = xr.DataArray( + np.array([[4, 7], [16, 19], [28, 31]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.median(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_min() -> None: + expected = xr.DataArray( + np.array([[0, 3], [12, 15], [24, 27]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.min(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_prod() -> None: + expected = xr.DataArray( + np.array( + [ + [0, 59400], + [14938560, 43354080], + [464256000, 860955480], + ] + ), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.prod(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_std() -> None: + expected = xr.DataArray( + np.array( + [ + [3.1091263510296048, 3.1091263510296048], + [3.1091263510296048, 3.1091263510296048], + [3.1091263510296048, 3.1091263510296048], + ] + ), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.std(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_sum() -> None: + expected = xr.DataArray( + np.array([[24, 42], [96, 114], [168, 186]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.sum(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_var() -> None: + expected = xr.DataArray( + np.array( + [ + [9.666666666666666, 9.666666666666666], + [9.666666666666666, 9.666666666666666], + [9.666666666666666, 9.666666666666666], + ] + ), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.var(DATA, dim={"x": 2, "y": 3}), + expected, + ) From 9aa537c25003747bab90cf05c942ef1ec31a1713 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:11:56 +0000 Subject: [PATCH 07/11] #240 Make apply function public --- decode/stats.py | 193 ++++++++++++++++++++++++------------------------ 1 file changed, 97 insertions(+), 96 deletions(-) diff --git a/decode/stats.py b/decode/stats.py index 42b924d..4dc042c 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -1,6 +1,7 @@ __all__ = [ "all", "any", + "apply", "count", "max", "mean", @@ -37,6 +38,91 @@ Stat = Union[Callable[..., Any], str] +def apply( + da: xr.DataArray, + func: Stat, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) statistical operation to a DataArray. + + Args: + da: Input DataArray. + func: Function or name of the statistical operation (e.g. ``'mean'``). + dim: Name(s) of the dimension(s) along which the statistical operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the statistical operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + keep_attrs: Whether to keep attributes in the statistical operation. + **options: Other options to be passed to the statistical operation. + + Returns: + DataArray that the (chunked) statistical operation is applied. + + """ + if isinstance(dim, dict): + pass + elif dim is ... or dim is None: + dim = da.sizes + elif isinstance(dim, str): + dim = {dim: da.sizes[dim]} + elif isinstance(dim, Iterable): + dim = {d: da.sizes[d] for d in dim} + + coord_func: dict[Hashable, Stat] = {} + + if numeric_coord_func == "first": + numeric_coord_func = _first + elif numeric_coord_func == "last": + numeric_coord_func = _last + + if nonnumeric_coord_func == "first": + nonnumeric_coord_func = _first + elif nonnumeric_coord_func == "last": + nonnumeric_coord_func = _last + + for name, coord in da.coords.items(): + if coord.dtype.kind in NUMERIC_KINDS: + coord_func[name] = numeric_coord_func + else: + coord_func[name] = nonnumeric_coord_func + + coarsened = da.coarsen( + dim, + boundary=boundary, + coord_func=coord_func, + side=side, + ) + + if isinstance(func, str): + return getattr(coarsened, func)( + keep_attrs=keep_attrs, + **options, + ).squeeze() + + if callable(func): + return coarsened.reduce( + func=func, + keep_attrs=keep_attrs, + **options, + ).squeeze() + + raise TypeError("Func must be either callable or string.") + + def all( da: xr.DataArray, /, @@ -70,7 +156,7 @@ def all( DataArray that the (chunked) ``all`` operation is applied. """ - return _apply( + return apply( da, "all", dim=dim, @@ -116,7 +202,7 @@ def any( DataArray that the (chunked) ``any`` operation is applied. """ - return _apply( + return apply( da, "any", dim=dim, @@ -162,7 +248,7 @@ def count( DataArray that the (chunked) ``count`` operation is applied. """ - return _apply( + return apply( da, "count", dim=dim, @@ -210,7 +296,7 @@ def max( DataArray that the (chunked) ``max`` operation is applied. """ - return _apply( + return apply( da, "max", dim=dim, @@ -259,7 +345,7 @@ def mean( DataArray that the (chunked) ``mean`` operation is applied. """ - return _apply( + return apply( da, "mean", dim=dim, @@ -308,7 +394,7 @@ def median( DataArray that the (chunked) ``median`` operation is applied. """ - return _apply( + return apply( da, "median", dim=dim, @@ -357,7 +443,7 @@ def min( DataArray that the (chunked) ``min`` operation is applied. """ - return _apply( + return apply( da, "min", dim=dim, @@ -406,7 +492,7 @@ def prod( DataArray that the (chunked) ``prod`` operation is applied. """ - return _apply( + return apply( da, "prod", dim=dim, @@ -455,7 +541,7 @@ def std( DataArray that the (chunked) ``std`` operation is applied. """ - return _apply( + return apply( da, "std", dim=dim, @@ -504,7 +590,7 @@ def sum( DataArray that the (chunked) ``sum`` operation is applied. """ - return _apply( + return apply( da, "sum", dim=dim, @@ -553,7 +639,7 @@ def var( DataArray that the (chunked) ``var`` operation is applied. """ - return _apply( + return apply( da, "var", dim=dim, @@ -567,91 +653,6 @@ def var( ) -def _apply( - da: xr.DataArray, - func: Stat, - /, - *, - dim: Union[Dims, dict[Hashable, int]] = None, - boundary: Boundary = "trim", - side: Union[Side, dict[Hashable, Side]] = "left", - numeric_coord_func: Stat = "mean", - nonnumeric_coord_func: Stat = "first", - keep_attrs: Optional[bool] = None, - **options: Any, -) -> xr.DataArray: - """Apply a (chunked) statistical operation to a DataArray. - - Args: - da: Input DataArray. - func: Function or name of the statistical operation (e.g. ``'mean'``). - dim: Name(s) of the dimension(s) along which the statistical operation - will be applied. If a dictionary such as ``{dim: size, ...}`` - is specified, then the statistical operation will be applied - to every data chunk of given size. - boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. - side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. - numeric_coord_func: Function or name of the statistical operation - for the numeric coordinates (bool, numbers, datetime, timedelta). - nonnumeric_coord_func: Function or name of the statistical operation - for the non-numeric coordinates (str, bytes, and general object). - keep_attrs: Whether to keep attributes in the statistical operation. - **options: Other options to be passed to the statistical operation. - - Returns: - DataArray that the (chunked) statistical operation is applied. - - """ - if isinstance(dim, dict): - pass - elif dim is ... or dim is None: - dim = da.sizes - elif isinstance(dim, str): - dim = {dim: da.sizes[dim]} - elif isinstance(dim, Iterable): - dim = {d: da.sizes[d] for d in dim} - - coord_func: dict[Hashable, Stat] = {} - - if numeric_coord_func == "first": - numeric_coord_func = _first - elif numeric_coord_func == "last": - numeric_coord_func = _last - - if nonnumeric_coord_func == "first": - nonnumeric_coord_func = _first - elif nonnumeric_coord_func == "last": - nonnumeric_coord_func = _last - - for name, coord in da.coords.items(): - if coord.dtype.kind in NUMERIC_KINDS: - coord_func[name] = numeric_coord_func - else: - coord_func[name] = nonnumeric_coord_func - - coarsened = da.coarsen( - dim, - boundary=boundary, - coord_func=coord_func, - side=side, - ) - - if isinstance(func, str): - return getattr(coarsened, func)( - keep_attrs=keep_attrs, - **options, - ).squeeze() - - if callable(func): - return coarsened.reduce( - func=func, - keep_attrs=keep_attrs, - **options, - ).squeeze() - - raise TypeError("Func must be either callable or string.") - - def _first( array: NDArray[Any], axis: Union[Sequence[int], int, None], From 2b1e34eeb387cfa724ceee27cd9816456d6b7831 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:16:11 +0000 Subject: [PATCH 08/11] #240 Add first and last functions --- decode/stats.py | 94 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_stats.py | 34 ++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/decode/stats.py b/decode/stats.py index 4dc042c..0684370 100644 --- a/decode/stats.py +++ b/decode/stats.py @@ -3,6 +3,8 @@ "any", "apply", "count", + "first", + "last", "max", "mean", "median", @@ -261,6 +263,98 @@ def count( ) +def first( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``first`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``first`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``first`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + keep_attrs: Whether to keep attributes in the ``first`` operation. + **options: Other options to be passed to the ``first`` operation. + + Returns: + DataArray that the (chunked) ``first`` operation is applied. + + """ + return apply( + da, + _first, + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + keep_attrs=keep_attrs, + **options, + ) + + +def last( + da: xr.DataArray, + /, + *, + dim: Union[Dims, dict[Hashable, int]] = None, + boundary: Boundary = "trim", + side: Union[Side, dict[Hashable, Side]] = "left", + numeric_coord_func: Stat = "mean", + nonnumeric_coord_func: Stat = "first", + keep_attrs: Optional[bool] = None, + **options: Any, +) -> xr.DataArray: + """Apply a (chunked) ``last`` operation to a DataArray. + + Args: + da: Input DataArray. + dim: Name(s) of the dimension(s) along which the ``last`` operation + will be applied. If a dictionary such as ``{dim: size, ...}`` + is specified, then the ``last`` operation will be applied + to every data chunk of given size. + boundary: Same option as ``xarray.DataArray.coarsen`` but defaults to ``'trim'``. + side: Same option as ``xarray.DataArray.coarsen`` and defualts to ``'left'``. + numeric_coord_func: Function or name of the statistical operation + for the numeric coordinates (bool, numbers, datetime, timedelta). + nonnumeric_coord_func: Function or name of the statistical operation + for the non-numeric coordinates (str, bytes, and general object). + keep_attrs: Whether to keep attributes in the ``last`` operation. + **options: Other options to be passed to the ``last`` operation. + + Returns: + DataArray that the (chunked) ``last`` operation is applied. + + """ + return apply( + da, + _last, + dim=dim, + boundary=boundary, + side=side, + numeric_coord_func=numeric_coord_func, + nonnumeric_coord_func=nonnumeric_coord_func, + keep_attrs=keep_attrs, + **options, + ) + + def max( da: xr.DataArray, /, diff --git a/tests/test_stats.py b/tests/test_stats.py index f60f73e..c4b8471 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -67,6 +67,40 @@ def test_count() -> None: ) +def test_first() -> None: + expected = xr.DataArray( + np.array([[0, 3], [12, 15], [24, 27]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.first(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + +def test_last() -> None: + expected = xr.DataArray( + np.array([[8, 11], [20, 23], [32, 35]]), + dims=("x", "y"), + coords={ + "x": np.array([0.5, 2.5, 4.5]), + "y": np.array([7.0, 10.0]), + "c1": ("x", np.array(list("ace"))), + "c2": ("x", np.array([0.5, 2.5, 4.5])), + }, + ) + xr.testing.assert_equal( + stats.last(DATA, dim={"x": 2, "y": 3}), + expected, + ) + + def test_max() -> None: expected = xr.DataArray( np.array([[8, 11], [20, 23], [32, 35]]), From 523e9e6841d1188324e145a9aed29bfbef783d57 Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:19:26 +0000 Subject: [PATCH 09/11] =?UTF-8?q?#240=20Update=20package=20version=20(2025?= =?UTF-8?q?.4.2=20=E2=86=92=202025.6.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CITATION.cff | 2 +- README.md | 2 +- decode/__init__.py | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 2d98b11..482015b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -3,7 +3,7 @@ message: "If you use this software, please cite it as below." title: "de:code" abstract: "DESHIMA code for data analysis" -version: 2025.4.2 +version: 2025.6.0 date-released: 2025-04-17 license: "MIT" doi: "10.5281/zenodo.3384216" diff --git a/README.md b/README.md index 10218f5..5724f90 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ DESHIMA code for data analysis ## Installation ```shell -pip install decode==2025.4.2 +pip install decode==2025.6.0 ``` ## Quick look diff --git a/decode/__init__.py b/decode/__init__.py index d9e8183..bad0327 100644 --- a/decode/__init__.py +++ b/decode/__init__.py @@ -11,7 +11,7 @@ "stats", "utils", ] -__version__ = "2025.4.2" +__version__ = "2025.6.0" # submodules diff --git a/pyproject.toml b/pyproject.toml index 56b3409..d93ce3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "decode" -version = "2025.4.2" +version = "2025.6.0" description = "DESHIMA code for data analysis" readme = "README.md" keywords = [ From 790228ebe1b4bcfe686cab58cb6f4afebcb7520c Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:19:42 +0000 Subject: [PATCH 10/11] #240 Update project dependencies --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 4a1cc8e..a67cb77 100644 --- a/uv.lock +++ b/uv.lock @@ -535,7 +535,7 @@ wheels = [ [[package]] name = "decode" -version = "2025.4.2" +version = "2025.6.0" source = { editable = "." } dependencies = [ { name = "astropy", version = "6.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, From ff69239c751c9c23133aea28c09663436e2670ea Mon Sep 17 00:00:00 2001 From: Akio Taniguchi <13254278+astropenguin@users.noreply.github.com> Date: Mon, 23 Jun 2025 05:27:09 +0000 Subject: [PATCH 11/11] #240 Update citation file --- CITATION.cff | 55 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 482015b..3d5507b 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,22 +1,37 @@ -cff-version: 1.2.0 -message: "If you use this software, please cite it as below." +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! -title: "de:code" -abstract: "DESHIMA code for data analysis" -version: 2025.6.0 -date-released: 2025-04-17 -license: "MIT" -doi: "10.5281/zenodo.3384216" -url: "https://github.com/deshima-dev/decode" +cff-version: 1.2.0 +title: decode +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software authors: - - given-names: "Akio" - family-names: "Taniguchi" - affiliation: "Kitami Institute of Technology" - orcid: "https://orcid.org/0000-0002-9695-6183" - - given-names: "Juri" - family-names: "Yamanaka" - affiliation: "Nagoya University" - - given-names: "Shinji" - family-names: "Fujita" - affiliation: "The Institute of Statistical Mathematics" - orcid: "https://orcid.org/0000-0002-6375-7065" + - given-names: Akio + family-names: Taniguchi + affiliation: Kitami Institute of Technology + orcid: 'https://orcid.org/0000-0002-9695-6183' + - given-names: Juri + family-names: Yamanaka + affiliation: Nagoya University + - given-names: Shinji + family-names: Fujita + affiliation: The Institute of Statistical Mathematics + orcid: 'https://orcid.org/0000-0002-6375-7065' +identifiers: + - type: doi + value: 10.5281/zenodo.3384216 +repository-code: 'https://github.com/deshima-dev/decode' +url: 'https://deshima-dev.github.io/decode' +abstract: DESHIMA code for data analysis +keywords: + - python + - astronomy + - data-analysis + - deshima + - spectroscopy + - submillimeter +license: MIT +version: 2025.6.0 +date-released: '2025-06-23'