Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/api/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ The format is based on `Keep a Changelog`_, and this project adheres to
[Unreleased]
------------

Added
~~~~~

- Added ``ignore_time_purge_empty`` argument to
:meth:`imod.mf6.Modflow6Simulation.mask_all_models` and
:meth:`imod.mf6.Modflow6Simulation.clip_box` to consider a package empty if
its first times step is all nodata. This can save a lot of clipping or masking
transient models with many timesteps.

Fixed
~~~~~

Expand Down
10 changes: 6 additions & 4 deletions imod/common/utilities/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def _validate_coords_mask(mask: GridDataArray) -> None:
)


def _mask_all_models(
def mask_all_models(
simulation: ISimulation,
mask: GridDataArray,
ignore_time_purge_empty: bool = False,
):
_validate_coords_mask(mask)
if simulation.is_split():
Expand All @@ -54,21 +55,22 @@ def _mask_all_models(

for name in modelnames:
if is_same_domain(simulation[name].domain, mask):
simulation[name].mask_all_packages(mask)
simulation[name].mask_all_packages(mask, ignore_time_purge_empty)
else:
raise ValueError(
"masking can only be applied to simulations when all the models in the simulation use the same grid."
)


def _mask_all_packages(
def mask_all_packages(
model: IModel,
mask: GridDataArray,
ignore_time_purge_empty: bool = False,
):
_validate_coords_mask(mask)
for pkgname, pkg in model.items():
model[pkgname] = pkg.mask(mask)
model.purge_empty_packages()
model.purge_empty_packages(ignore_time=ignore_time_purge_empty)


def mask_package(package: IPackage, mask: GridDataArray) -> IPackage:
Expand Down
19 changes: 16 additions & 3 deletions imod/mf6/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from imod.common.serializer import EngineType
from imod.common.statusinfo import NestedStatusInfo, StatusInfo, StatusInfoBase
from imod.common.utilities.clip import clip_box_dataset
from imod.common.utilities.mask import _mask_all_packages
from imod.common.utilities.mask import mask_all_packages
from imod.common.utilities.regrid import _regrid_like
from imod.common.utilities.schemata import (
concatenate_schemata_dicts,
Expand Down Expand Up @@ -697,6 +697,7 @@ def clip_box(
y_min: Optional[float] = None,
y_max: Optional[float] = None,
state_for_boundary: Optional[GridDataArray] = None,
ignore_time_purge_empty: bool = False,
):
"""
Clip a model by a bounding box (time, layer, y, x).
Expand Down Expand Up @@ -724,6 +725,14 @@ def clip_box(
state_for_boundary : optional, Union[xr.DataArray, xu.UgridDataArray]
A grids with states that are used to put as boundary values. This
model will get a :class:`imod.mf6.ConstantHead`.
ignore_time_purge_empty : bool, default False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this a very confusing name.

I think that we need to keep the user away from the internals of this method. They shouldn't have to know that packages are being purged.

Would ignore_time_validation work? Or have them provide a ValidationContext?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Legitimate critique. Unfortunately there are already some methods with this argument as name in the public API, so if I change it here, I have to change it everywhere, and keep on supporting the old argument for the next versions to keep the API stable.

Not the end of the world, but perhaps out of scope for this PR?

I want to refrain from letting users provide ValidationContext themselves, as limiting the amount of custom objects users have to create themselves decreases the learning curve for iMOD Python.

Whether to ignore the time dimension when purging empty packages.
Can improve performance when clipping models with many time steps.
Clipping models can cause package data with all nans. These packages
are considered empty and need to be removed. However, checking all
timesteps for each package is a costly operation. Therefore, this
option can be set to True to only check the first timestep. Defaults
to False.

Returns
-------
Expand Down Expand Up @@ -786,7 +795,7 @@ def clip_box(
if clipped_boundary_condition is not None:
clipped[pkg_name] = clipped_boundary_condition

clipped.purge_empty_packages()
clipped.purge_empty_packages(ignore_time=ignore_time_purge_empty)

return clipped

Expand Down Expand Up @@ -882,6 +891,7 @@ def regrid_like(
def mask_all_packages(
self,
mask: GridDataArray,
ignore_time_purge_empty: bool = False,
):
"""
This function applies a mask to all packages in a model. The mask must
Expand All @@ -898,9 +908,12 @@ def mask_all_packages(
mask: xr.DataArray, xu.UgridDataArray of ints
idomain-like integer array. >0 sets cells to active, 0 sets cells to inactive,
<0 sets cells to vertical passthrough
ignore_time_purge_empty: bool, default False
Whether to ignore time dimension when purging empty packages. Can
improve performance when masking models with many time steps.
"""

_mask_all_packages(self, mask)
mask_all_packages(self, mask, ignore_time_purge_empty)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I first thought that this method was calling itself but it is actually calling a utilities method.
Would it be clearer to add the module name to it?:
utitlities.mas. mask_all_packages

Same applies to mask_all_models

Copy link
Contributor Author

@JoerivanEngelen JoerivanEngelen Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, that might have been part of the reason the underscore was left there in the first place.

I tried the following:

  1. Import the utilities module
from  imod.common import utilities as common_utilities
...
    common_utilities.mask.mask_all_packages(...)

Pythons runs this without error, but this was not understood by the linters and VSCode couldn't trace the function for some reason.

  1. Import the mask utilities module
import imod.common.utilities as utilities
...
def mask_all_packages(mask, ...)
    ...
    mask.mask_all_packages(mask, ...)

The function also has mask as argument here, so the module name clashes here with the argument name.

  1. Import under alias
import imod.common.utilities.mask as mask_utils
...
    mask_utils.mask_all_packages(...)

This too can lead to confusion, as it can lead to developers looking for a module file/folder named mask_utils which is not present (if they overlook the aliasing in the imports). Given that this way of importing is done nowhere else in the codebase, I deem this confusion quite likely.

  1. Reference all modules in function call
import imod
...
    imod.common.utilities.mask.mask_all_packages(...)

Too much clutter in my opinion.

I think I keep it as is in this PR, downside is that developers have to be sharp for the difference between function and method calls here. It appears changing it back to how it was (as a function starting with an underscore) and using it outside the module is a no no in the python world.


def purge_empty_packages(
self, model_name: Optional[str] = "", ignore_time: bool = False
Expand Down
40 changes: 33 additions & 7 deletions imod/mf6/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from imod.common.serializer import EngineType
from imod.common.statusinfo import NestedStatusInfo
from imod.common.utilities.dataclass_type import DataclassType
from imod.common.utilities.mask import _mask_all_models
from imod.common.utilities.mask import mask_all_models
from imod.common.utilities.regrid import _regrid_like
from imod.common.utilities.version import (
get_version,
Expand Down Expand Up @@ -1202,6 +1202,7 @@ def clip_box(
y_min: Optional[float] = None,
y_max: Optional[float] = None,
states_for_boundary: Optional[dict[str, GridDataArray]] = None,
ignore_time_purge_empty: Optional[bool] = None,
) -> Modflow6Simulation:
"""
Clip a simulation by a bounding box (time, layer, y, x).
Expand Down Expand Up @@ -1233,6 +1234,14 @@ def clip_box(
:class:`imod.mf6.ConstantHead`,
:class:`imod.mf6.GroundwaterTransportModel` will get a
:class:`imod.mf6.ConstantConcentration` package.
ignore_time_purge_empty: optional, bool, default None
Whether to ignore the time dimension when purging empty packages.
Can improve performance when clipping models with many time steps.
Clipping models can cause package data with all nans. These packages
are considered empty and need to be removed. However, checking all
timesteps for each package is a costly operation. Therefore, this
option can be set to True to only check the first timestep. If None,
the value of the validation settings of the simulation are used.

Returns
-------
Expand Down Expand Up @@ -1276,6 +1285,8 @@ def clip_box(
raise ValueError(
"Unable to clip simulation. Clipping can only be done on simulations that have a single flow model ."
)
if ignore_time_purge_empty is None:
ignore_time_purge_empty = self._validation_context.ignore_time
for model_name, model in self.get_models().items():
supported, error_with_object = model._is_clipping_supported()
if not supported:
Expand All @@ -1301,6 +1312,7 @@ def clip_box(
y_min=y_min,
y_max=y_max,
state_for_boundary=state_for_boundary,
ignore_time_purge_empty=ignore_time_purge_empty,
)
elif isinstance(value, Package):
clipped[key] = value.clip_box(
Expand Down Expand Up @@ -1398,11 +1410,14 @@ def split(
similar shape as a layer in the domain. The values in the array
indicate to which partition a cell belongs. The values should be
zero or greater.
ignore_time_purge_empty: bool, default None
If True, only the first timestep is validated. This increases
performance for packages with a time dimensions over which changes
of cell activity are not expected. If None, the value of the
validation context is of the simulation is used.
ignore_time_purge_empty: optional, bool, default None
Whether to ignore the time dimension when purging empty packages.
Can improve performance when splitting models with many time steps.
Splitting models can cause package data with all nans. These packages
are considered empty and need to be removed. However, checking all
timesteps for each package is a costly operation. Therefore, this
option can be set to True to only check the first timestep. If None,
the value of the validation settings of the simulation are used.

Returns
-------
Expand Down Expand Up @@ -1688,6 +1703,7 @@ def _has_one_flow_model(self) -> bool:
def mask_all_models(
self,
mask: GridDataArray,
ignore_time_purge_empty: Optional[bool] = None,
) -> None:
"""
This function applies a mask to all models in a simulation, provided they use
Expand All @@ -1702,6 +1718,14 @@ def mask_all_models(
mask: xr.DataArray, xu.UgridDataArray of ints
idomain-like integer array. >0 sets cells to active, 0 sets cells to inactive,
<0 sets cells to vertical passthrough
ignore_time_purge_empty : bool, default False
Whether to ignore the time dimension when purging empty packages.
Can improve performance when masking models with many time steps.
Masking models can cause package data with all nans. These packages
are considered empty and need to be removed. However, checking all
timesteps for each package is a costly operation. Therefore, this
option can be set to True to only check the first timestep. Defaults
to False.

Examples
--------
Expand All @@ -1713,7 +1737,9 @@ def mask_all_models(
mask should be an idomain-like array, i.e. it should have the same shape
as the model and contain integer values.
"""
_mask_all_models(self, mask)
if ignore_time_purge_empty is None:
ignore_time_purge_empty = self._validation_context.ignore_time
mask_all_models(self, mask, ignore_time_purge_empty)

@classmethod
@standard_log_decorator()
Expand Down
2 changes: 1 addition & 1 deletion imod/tests/test_mf6/test_mf6_LHM.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def cleanup_mf6_sim(simulation: Modflow6Simulation) -> None:
pkg.dataset.load()

mask = model.domain
simulation.mask_all_models(mask)
simulation.mask_all_models(mask, ignore_time_purge_empty=True)
dis = model["dis"]

pkgs_to_cleanup = [
Expand Down
4 changes: 3 additions & 1 deletion imod/tests/test_mf6/test_mf6_mask_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
from imod.tests.fixtures.mf6_modelrun_fixture import assert_simulation_can_run


@pytest.mark.parametrize("ignore_time", [True, False])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In all the test it doesnt matter what the flag does
It would be good to have some tests where the outcome is different depending on the flag

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, we already have a test that do this:

  • test_purge_empty_package__ignore_time in imod/tests/test_mf6/test_mf6_model.py

I also tried adding a test to test in imod/tests/test_mf6/test_mf6_rch.py next to the performance test test_ignore_time_validation to test the opposite, this failed however, as there is no easy way to catch fails due to timeouts (something with an xfail) and the test errors instead of fails due to dask when the timeout kicks in. I added a note instead so that I do not waste on this again in the future ;-). This test would only really test whether the performance test ```test_ignore_time_validationis not unexpectedly returning false positives though, which is a nice-to-have.test_purge_empty_package__ignore_time`` already tests if the data handling is correct.

I think spending a lot of time on adding more tests in test_mf6_mask_simulation for this is a bit of a time waster.

def test_mask_simulation(
tmp_path: Path,
flow_transport_simulation: imod.mf6.Modflow6Simulation,
ignore_time: bool,
):
mask = deepcopy(flow_transport_simulation["flow"].domain)
mask.loc[1, 0.5, 15] = 0
flow_transport_simulation.mask_all_models(mask)
flow_transport_simulation.mask_all_models(mask, ignore_time_purge_empty=ignore_time)
assert (
flow_transport_simulation["flow"].domain.sel({"layer": 1, "y": 0.5, "x": 15})
== 0
Expand Down
6 changes: 5 additions & 1 deletion imod/tests/test_mf6/test_mf6_model_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,13 @@ def test_mask_with_time_coordinate(
@pytest.mark.parametrize(
"inactivity_marker", [0, -1]
) # 0 = inactive, -1 = vertical passthrough
@pytest.mark.parametrize("ignore_time_purge_empty", [True, False])
def test_mask_structured(
tmp_path: Path,
structured_flow_model: GroundwaterFlowModel,
mask_cells: list[tuple[int, int, int]],
inactivity_marker: int,
ignore_time_purge_empty: bool,
):
# Arrange
# add a well to the model
Expand All @@ -291,7 +293,9 @@ def test_mask_structured(
mask.values[*cell] = inactivity_marker

# Act
structured_flow_model.mask_all_packages(mask)
structured_flow_model.mask_all_packages(
mask, ignore_time_purge_empty=ignore_time_purge_empty
)

# Assert
unique, counts = np.unique(
Expand Down
12 changes: 10 additions & 2 deletions imod/tests/test_mf6/test_mf6_rch.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,16 @@ def test_validate_false():

@pytest.mark.timeout(10, method="thread")
def test_ignore_time_validation():
# Create a large recharge dataset with a time dimension. This is to test the
# performance of the validation when ignore_time_no_data is True.
"""
Create a large recharge dataset with a time dimension. This is to test the
performance of the validation when ignore_time_no_data is True.
NOTE: There currently is no easy way to test the opposite (i.e.,
ignore_time_no_data is False, then catch timeout with an pytest xfail
marker), because the timeout will terminate the test run with an error when
using dask instead of a fail. Somewhat relevant issue:
https://github.com/pytest-dev/pytest-timeout/issues/181
"""
# Arrange
rng = dask.array.random.default_rng()
layer = [1, 2, 3]
template = imod.util.empty_3d(1.0, 0.0, 1000.0, 1.0, 0.0, 1000.0, layer)
Expand Down
8 changes: 8 additions & 0 deletions imod/tests/test_mf6/test_mf6_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,14 @@ def test_write_circle_model_twice(circle_model, tmp_path):
assert len(diff.right_only) == 0


@pytest.mark.parametrize("ignore_time_purge_empty", [True, False])
def test_simulation_clip_box__ignore_time(circle_model, ignore_time_purge_empty):
simulation = circle_model
simulation.clip_box(
y_min=-50, y_max=0, ignore_time_purge_empty=ignore_time_purge_empty
)


def test_simulation_clip_box__validation_settings_preserved(circle_model):
simulation = circle_model
simulation.set_validation_settings(
Expand Down