Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9750991
+postprocessing: +recenter
JamesMcClung Mar 20, 2025
de8fca9
+test_postprocessing: +recenter tests
JamesMcClung Mar 20, 2025
772daf9
postprocessing: implement recenter
JamesMcClung Mar 20, 2025
89b6365
postprocessing; *: rename to get_recentered
JamesMcClung Mar 20, 2025
a099518
postprocessing: +BoundaryInterpMethod
JamesMcClung Mar 20, 2025
b00b25b
postprocessing: +auto_recenter
JamesMcClung Mar 20, 2025
b2a4c5e
test_postprocessing: +test_autorecenter_ec_to_nc
JamesMcClung Mar 20, 2025
a8d3ab3
postprocessing: partial impl of auto_recenter
JamesMcClung Mar 20, 2025
fa33c63
test_postprocessing: +test_autorecenter_ec_to_cc
JamesMcClung Mar 20, 2025
35ae011
postprocessing: pass test
JamesMcClung Mar 20, 2025
72d6307
test_postprocessing: add "dont_touch" var
JamesMcClung Mar 20, 2025
52c6a01
postprocessing: fix spurious renames
JamesMcClung Mar 20, 2025
c27f92d
postprocessing: +_rename_var
JamesMcClung Mar 20, 2025
58955e0
test_postprocessing: +fc tests
JamesMcClung Mar 20, 2025
408d269
postprocessing: pass fc tests
JamesMcClung Mar 20, 2025
6689864
test_postprocessing: +test_autorecenter_spurious_renames
JamesMcClung Mar 20, 2025
4afed34
test_postprocessing: +nc, cc tests
JamesMcClung Mar 20, 2025
c40c75e
postprocessing: pass nc, cc tests
JamesMcClung Mar 20, 2025
6a52df0
postprocessing: ugly nasty reformatting
JamesMcClung Mar 20, 2025
35f7b37
postprocessing: fix typing issues
JamesMcClung Mar 20, 2025
dae5a51
fix mypy complaint
germasch Mar 21, 2025
ffcd124
test_postprocessing: add test for nonstr varname
JamesMcClung Mar 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/pscpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
import pathlib

from ._version import version as __version__
from .postprocessing import auto_recenter, get_recentered
from .psc import decode_psc

sample_dir = pathlib.Path(__file__).parent / "sample"


__all__ = [
"__version__",
"auto_recenter",
"decode_psc",
"get_recentered",
"sample_dir",
]
93 changes: 93 additions & 0 deletions src/pscpy/postprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

from typing import Literal, TypeAlias

import xarray as xr

BoundaryInterpMethod: TypeAlias = Literal["periodic", "pad", "zero"]


def get_recentered(
da: xr.DataArray,
dim: str,
interp_dir: Literal[-1, 1],
*,
boundary: BoundaryInterpMethod = "periodic",
) -> xr.DataArray:
"""
Returns a new array with values along `dim` recentered in the direction `interp_dir`.

For example, `interp_dir=-1` interpolates node-centered values from cell-centered values, because each node center is at a lesser coordinate than the cell center of the same index.
"""

shifted = da.roll({dim: -interp_dir}, roll_coords=False)
boundary_idx = {-1: 0, 1: -1}[interp_dir]

if boundary == "periodic":
pass # this case is already handled by the behavior of roll()
elif boundary == "pad":
shifted[{dim: boundary_idx}] = da[{dim: boundary_idx}]
elif boundary == "zero":
shifted[{dim: boundary_idx}] = 0

return 0.5 * (da + shifted)


def _rename_var(ds: xr.Dataset, old_name: str, new_name: str) -> None:
ds[new_name] = ds[old_name].rename(new_name)
del ds[old_name]
Comment on lines +36 to +38
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't you use Dataset.rename()? I suppose that's what it's there for -- it does return a new Dataset, but I think that's actually not bad, since it kinda treats Datasets as immutable, so one doesn't have to worry about changing an existing object and possibly messing something up.

I suppose that all of that xarray stuff is implemented efficiently, ie., doesn't make copies and wastes memory.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't use rename because I wanted to do it in-place. I personally just don't like the pattern of ds = ds.do_something() in python, but I don't feel that strongly, and wouldn't mind changing the api to that.

However, I don't trust xarray to do anything efficiently anymore, and even if rename uses views, the recentering itself can't. Using the builtin rename would necessitate that all the changes happen to a copy of the original ds. At best, the original ds would then be garbage-collected, but that all still seems wasteful when these datasets can become so huge.

I'd actually like to make get_recentered work in-place, too, or at least to not make multiple copies (or views? who knows). You can always pass a copy to an in-place function, but you can't do something in-place with a function that makes a copy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that in particular ds = ds.do_something() isn't great, in particular it's iffy if used in notebooks. But if used as ds_cc = ds.psc.recenter('cc'), I think it is a pattern that's more flexible in that it allows for keeping the original data around unchanged, and that again is something that's useful for not breaking other cells in a notebook.

This now actually reminds me of the marimo notebooks I mentioned before -- which IIRC does have restrictions on mutating objects since that makes it hard to automatically figure out a dependency graph.

It is foreign to me to not mutate objects, since that's what one usually does for efficiency reasons, rather than copying and then modifying. But it seems to be xarray's model to not mutate objects, and so I'm kinda inclined to give that approach the benefit of the doubt.

In any case, as far as this PR is concerned, it's fine as-is, it's not worth holding up progress for questions that can be resolved later -- it's not like this defines an interface we expect to be set in stone.



def auto_recenter(
ds: xr.Dataset,
to_centering: Literal["nc", "cc"],
**boundaries: BoundaryInterpMethod,
) -> None:
Comment on lines +41 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think my preference would be to just call this recenter() (also in that I think eventually this could become ds2 = ds.psc.recenter(). It's automatic, I guess, in that it uses knowledge about the existing staggering, but well, that's more or less an implementation detail.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also automatic in the sense that it can fail to do the correct thing. It detects which variables need to be recentered based on their names and the passed dimension names (the keys to boundares), but that's a heuristic. For example, it fails on the gauss data, since neither rho nor dive end with _nc or _cc. It similarly fails on pfd_moments (although that could potentially be fixed by appending _nc or _cc to variable names during the decoding step based on the all_1st_*c super-variable name).

If recenter existed, I would expect it to take a map of variable names to their current centerings instead of guessing. That actually seems like a good idea, and I'd be happy to make that so (and auto_recenter would just call recenter with the guessed map). There would be the question of whether/how to rename recentered variables, however.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, not worth the argument at this point. In the longer run, it'd certainly be nice to store the centering information in the output directly, rather than relying on field names / heuristics, making this mostly moot.

Also, IMHO nothing wrong with having recenter() try to do the right thing based on heuristics for now, but later extending it be follow a map of centerings if one is passed.

"""
Recenters variables with names matching a particular pattern to the given centering.

In particular, variable name ending in `"{dim}_ec"`, `"{dim}_fc"`, `"_nc"`, or `"_cc"`, where `dim` is a dimension name and a key in `boundaries`, is recentered appropriately. For example, if `to_centering="nc"` (node-centered), a variable ending in "x_ec" (i.e., the x-component of an edge-centered field) will be recentered along x, but not y or z, since it is already node-centered in those dimensions.

Variables are also renamed appropriately. In the example above, `ex_ec` would be renamed to `ex_nc`.
"""

interp_dir: Literal[-1, 1] = {"cc": 1, "nc": -1}[to_centering] # type: ignore[assignment]

for var_name in ds:
if not isinstance(var_name, str):
continue

needs_rename = False

for dim, boundary_method in boundaries.items():
if (to_centering == "nc" and var_name.endswith(f"{dim}_ec")) or (
to_centering == "cc" and var_name.endswith(f"{dim}_fc")
):
ds[var_name] = get_recentered(
ds[var_name], dim, interp_dir, boundary=boundary_method
)
needs_rename = True
elif (to_centering == "cc" and var_name.endswith(f"{dim}_ec")) or (
to_centering == "nc" and var_name.endswith(f"{dim}_fc")
):
for other_dim, other_boundary_method in boundaries.items():
if other_dim != dim:
ds[var_name] = get_recentered(
ds[var_name],
other_dim,
interp_dir,
boundary=other_boundary_method,
)
needs_rename = True

if (to_centering == "cc" and var_name.endswith("_nc")) or (
to_centering == "nc" and var_name.endswith("_cc")
):
for dim, boundary_method in boundaries.items():
ds[var_name] = get_recentered(
ds[var_name], dim, interp_dir, boundary=boundary_method
)
needs_rename = True

if needs_rename:
_rename_var(ds, var_name, var_name[:-3] + "_" + to_centering)
2 changes: 1 addition & 1 deletion src/pscpy/psc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
self.z = self._get_coord(2)

def _get_coord(self, coord_idx: int) -> NDArray[Any]:
return np.linspace( # type: ignore[no-any-return]
return np.linspace(
start=self.corner[coord_idx],
stop=self.corner[coord_idx] + self.length[coord_idx],
num=self.gdims[coord_idx],
Expand Down
155 changes: 155 additions & 0 deletions tests/test_postprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from __future__ import annotations

import numpy as np
import pytest
import xarray as xr

import pscpy


@pytest.fixture
def test_dataarray():
return xr.DataArray([4, 5, 6, 5], coords={"x": [0, 1, 2, 3]})


def test_recenter_periodic_left(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", -1)
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [4.5, 4.5, 5.5, 5.5])


def test_recenter_periodic_right(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", 1)
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [4.5, 5.5, 5.5, 4.5])


def test_recenter_pad_left(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", -1, boundary="pad")
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [4, 4.5, 5.5, 5.5])


def test_recenter_pad_right(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", 1, boundary="pad")
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [4.5, 5.5, 5.5, 5])


def test_recenter_zero_left(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", -1, boundary="zero")
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [2, 4.5, 5.5, 5.5])


def test_recenter_zero_right(test_dataarray):
recentered = pscpy.get_recentered(test_dataarray, "x", 1, boundary="zero")
assert np.array_equal(recentered.coords, test_dataarray.coords)
assert np.array_equal(recentered, [4.5, 5.5, 5.5, 2.5])


@pytest.fixture
def test_dataset_ec():
coords = [[0, 1], [0, 1, 2]]
dims = ["x", "y"]
ex_ec = xr.DataArray([[0, 1, 2], [3, 4, 5]], coords, dims)
ey_ec = xr.DataArray([[0, 2, 4], [1, 3, 5]], coords, dims)
return xr.Dataset({"ex_ec": ex_ec, "ey_ec": ey_ec})


def test_autorecenter_ec_to_nc(test_dataset_ec):
pscpy.auto_recenter(test_dataset_ec, "nc", x="pad", y="pad")
assert np.array_equal(test_dataset_ec.ex_nc, [[0, 1, 2], [1.5, 2.5, 3.5]])
assert np.array_equal(test_dataset_ec.ey_nc, [[0, 1, 3], [1, 2, 4]])


def test_autorecenter_ec_to_cc(test_dataset_ec):
pscpy.auto_recenter(test_dataset_ec, "cc", x="pad", y="pad")
assert np.array_equal(test_dataset_ec.ex_cc, [[0.5, 1.5, 2], [3.5, 4.5, 5]])
assert np.array_equal(test_dataset_ec.ey_cc, [[0.5, 2.5, 4.5], [1, 3, 5]])


@pytest.fixture
def test_dataset_fc():
coords = [[0, 1], [0, 1, 2]]
dims = ["x", "y"]
hx_fc = xr.DataArray([[0, 1, 2], [3, 4, 5]], coords, dims)
hy_fc = xr.DataArray([[0, 2, 4], [1, 3, 5]], coords, dims)
return xr.Dataset({"hx_fc": hx_fc, "hy_fc": hy_fc})


def test_autorecenter_fc_to_nc(test_dataset_fc):
pscpy.auto_recenter(test_dataset_fc, "nc", x="pad", y="pad")
assert np.array_equal(test_dataset_fc.hx_nc, [[0, 0.5, 1.5], [3, 3.5, 4.5]])
assert np.array_equal(test_dataset_fc.hy_nc, [[0, 2, 4], [0.5, 2.5, 4.5]])


def test_autorecenter_fc_to_cc(test_dataset_fc):
pscpy.auto_recenter(test_dataset_fc, "cc", x="pad", y="pad")
assert np.array_equal(test_dataset_fc.hx_cc, [[1.5, 2.5, 3.5], [3, 4, 5]])
assert np.array_equal(test_dataset_fc.hy_cc, [[1, 3, 4], [2, 4, 5]])


@pytest.fixture
def test_dataset_nc():
coords = [[0, 1], [0, 1, 2]]
dims = ["x", "y"]
var_nc = xr.DataArray([[0, 1, 2], [3, 4, 5]], coords, dims)
return xr.Dataset({"var_nc": var_nc})


def test_autorecenter_nc_to_nc(test_dataset_nc):
# should do nothing
pscpy.auto_recenter(test_dataset_nc, "nc", x="pad", y="pad")
assert np.array_equal(test_dataset_nc.var_nc, [[0, 1, 2], [3, 4, 5]])


def test_autorecenter_nc_to_cc(test_dataset_nc):
pscpy.auto_recenter(test_dataset_nc, "cc", x="pad", y="pad")
assert np.array_equal(test_dataset_nc.var_cc, [[2, 3, 3.5], [3.5, 4.5, 5]])


@pytest.fixture
def test_dataset_cc():
coords = [[0, 1], [0, 1, 2]]
dims = ["x", "y"]
var_cc = xr.DataArray([[0, 1, 2], [3, 4, 5]], coords, dims)
return xr.Dataset({"var_cc": var_cc})


def test_autorecenter_cc_to_cc(test_dataset_cc):
# should do nothing
pscpy.auto_recenter(test_dataset_cc, "cc", x="pad", y="pad")
assert np.array_equal(test_dataset_cc.var_cc, [[0, 1, 2], [3, 4, 5]])


def test_autorecenter_cc_to_nc(test_dataset_cc):
pscpy.auto_recenter(test_dataset_cc, "nc", x="pad", y="pad")
assert np.array_equal(test_dataset_cc.var_nc, [[0, 0.5, 1.5], [1.5, 2, 3]])


@pytest.fixture
def test_dataset_dont_touch():
coords = [[0, 1, 2]]
dims = ["x"]
ex_ec = xr.DataArray([0, 1, 2], coords, dims)
dont_touch = xr.DataArray([0, 1, 2], coords, dims)
return xr.Dataset({"ex_ec": ex_ec, "dont_touch": dont_touch})


def test_autorecenter_spurious_renames(test_dataset_dont_touch):
pscpy.auto_recenter(test_dataset_dont_touch, "cc", x="pad")
assert np.array_equal(test_dataset_dont_touch.dont_touch, [0, 1, 2])


@pytest.fixture
def test_dataset_nonstr_varname():
coords = [[0, 1, 2]]
dims = ["x"]
one = xr.DataArray([0, 1, 2], coords, dims)
return xr.Dataset({1: one})


def test_nonstr_varname(test_dataset_nonstr_varname):
pscpy.auto_recenter(test_dataset_nonstr_varname, "nc", x="pad")
assert np.array_equal(test_dataset_nonstr_varname[1], [0, 1, 2])