diff --git a/doc/sphinx/source/recipes/droughts/recipe_consecdrydays.rst b/doc/sphinx/source/recipes/droughts/recipe_consecdrydays.rst index 868cb04609..f02359f4fa 100644 --- a/doc/sphinx/source/recipes/droughts/recipe_consecdrydays.rst +++ b/doc/sphinx/source/recipes/droughts/recipe_consecdrydays.rst @@ -65,4 +65,5 @@ Example plots :align: center :width: 14cm - Example of the number of occurrences with consecutive dry days of more than five days in the period 2001 to 2002 for the CMIP5 model bcc-csm1-1-m. + Example of the number of occurrences with consecutive dry days of more than + five days in the period 2001 to 2002 for the CMIP5 model bcc-csm1-1-m. diff --git a/doc/sphinx/source/recipes/droughts/recipe_lindenlaub25.rst b/doc/sphinx/source/recipes/droughts/recipe_lindenlaub25.rst new file mode 100644 index 0000000000..368523ef2f --- /dev/null +++ b/doc/sphinx/source/recipes/droughts/recipe_lindenlaub25.rst @@ -0,0 +1,81 @@ + +.. _recipes_martin18grl: + +Agricultural Droughts in CMIP6 Future Projections +================================================= + +Overview +-------- + +The two recipes presented here evaulate historical simulations of 18 CMIP6 +models and analyse their projections for three different future pathways. +The results are published in Lindenlaub (2025). + + +Available recipes and diagnostics +--------------------------------- + +Recipes are stored in ``recipes/droughts/`` + + * recipe_lindenlaub25_historical.yml + * recipe_lindenlaub25_scenarios.yml + +Diagnostics used by this recipes: + + * droughts/pet.R + * droughts/spei.R + * :ref:`droughts/diffmap.py ` + * :ref:`droughts/distribution.py ` + * :ref:`droughts/event_area_timeseries.py ` + * :ref:`droughts/pattern_correlation.py ` + * :ref:`droughts/timeseries_scenarios.py ` + * :ref:`droughts/regional_hexagons.py ` + +Data +---- + +Soil moisture is evaluated and discussed, but not required for PET and SPEI +calculation. +``tasmin``, ``tasmax``, ``sfcWind``, ``ps``, ``rsds`` are used to approximate +``evspsblpot`` for ERA5 and 18 CMIP6 datasets. +``SPEI`` is calculated from ``evspsblpot`` and ``pr``. + + +TODO: ERA5 native on the fly or cmorizer? + +Reference Data (ERA5, CDS-SM, CRU) can be downloaded and cmorized by the +esmvaltool executing the `data` command: + +``` +esmvaltool data download ERA5 pr +esmvaltool data format ERA5 pr +``` + +``tasmin`` and ``tasmax`` are not directly available in the monthly averaged data +product. The download script `diag_scripts/droughts/download_era5_tasminmax.py` +can be used to download and preprocess the data based on the +``minimum_2m_temperature_since_previous_post_processing`` and +``maximum_2m_temperature_since_previous_post_processing`` variables from ERA5 +daily data on single levels. The output files are compatible with the esmvaltool +and can be copied into the ``native6/Tier3/ERA5/v1/mon/`` +directory. Using the esmvaltool environment ensures that all required libraries +are available. The script can be run with the following command: + +``` +python diag_scripts/droughts/download_era5_tasminmax.py +``` + +For more options use the ``--help`` flag. + +The CMIP6 data can be downloaded automatically by the ESMValTool. Just ensure +that ``esgf_download`` is set to ``True`` or ``when_missing`` in the +user configuration. + +Figures +------- + +References +---------- + +* Lindenlaub, L. (2025). Agricultural Droughts in CMIP6 Future Projections. + Journal of Climate, 38(1), 1-15. https://doi.org/10.1029/2025JC012345 diff --git a/doc/sphinx/source/recipes/figures/spei/histogram_spei.png b/doc/sphinx/source/recipes/figures/spei/histogram_spei.png deleted file mode 100644 index 9aed50720f..0000000000 Binary files a/doc/sphinx/source/recipes/figures/spei/histogram_spei.png and /dev/null differ diff --git a/doc/sphinx/source/recipes/figures/spei/histogram_spi.png b/doc/sphinx/source/recipes/figures/spei/histogram_spi.png deleted file mode 100644 index 94cd9ea5df..0000000000 Binary files a/doc/sphinx/source/recipes/figures/spei/histogram_spi.png and /dev/null differ diff --git a/esmvaltool/diag_scripts/droughts/diffmap.py b/esmvaltool/diag_scripts/droughts/diffmap.py index fe777c4141..f9b01ff49b 100644 --- a/esmvaltool/diag_scripts/droughts/diffmap.py +++ b/esmvaltool/diag_scripts/droughts/diffmap.py @@ -74,7 +74,6 @@ from __future__ import annotations import logging -import os from collections import defaultdict from pathlib import Path @@ -210,7 +209,10 @@ def plot( basename: str, kwargs: dict | None = None, ) -> str: - """Plot map using diag_scripts.shared module.""" + """Plot map using diag_scripts.shared module. + + Returns the plot filename. + """ plotfile = get_plot_filename(basename, cfg) plot_kwargs = cfg.get("plot_kwargs", {}).copy() if kwargs is not None: @@ -231,7 +233,7 @@ def plot( add_cyclic_point(cube.data, cube.coord("longitude").points) mapplot = global_contourf(cube, **plot_kwargs) if cfg.get("clip_land", False): - plt.gca().set_extent((220, 170, -55, 90)) + plt.gca().set_extent((220, 170, -55, 90)) # type: ignore[attr-defined] plt.title(meta.get("title", basename)) if cfg.get("strip_plots", False): plt.gca().set_title(None) @@ -285,15 +287,15 @@ def calculate_diff(cfg, meta, mm_data, output_meta, group) -> None: cube = pp.extract_time( cube, cfg["start_year"], 1, 1, cfg["end_year"], 12, 31 ) - dtime = cfg.get("comparison_period", 10) * 12 - cubes = {} + dtime = cfg["comparison_period"] * 12 + cubes: dict[Cube] = {} cubes["total"] = cube.collapsed("time", MEAN) do_metrics = cfg.get("metrics", METRICS) norm = ( int(meta["end_year"]) - int(meta["start_year"]) + 1 # count full end year - - cfg.get("comparison_period", 10) # decades center to center + - cfg["comparison_period"] # decades center to center ) / 10 cubes["first"] = cube[0:dtime].collapsed("time", MEAN) cubes["last"] = cube[-dtime:].collapsed("time", MEAN) @@ -335,26 +337,22 @@ def calculate_diff(cfg, meta, mm_data, output_meta, group) -> None: def calculate_mmm(cfg, meta, mm_data, output_meta, group) -> None: """Calculate multi-model mean for a given metric.""" for metric in cfg.get("metrics", METRICS): - drop = cfg.get("dropcoords", ["time", "height"]) meta = meta.copy() # don't modify meta in place: - meta["dataset"] = "MMM" meta["diffmap_metric"] = metric basename = cfg["basename"].format(**meta) - mmm, _ = utils.mmm( - mm_data[metric], - dropcoords=drop, - dropmethods=metric != "diff", - mdtol=cfg.get("mdtol", 0.3), + results = pp.multi_model_statistics( + mm_data[metric], "overlap", ["mean"], ignore_scalar_coords=True ) + mmm = results["mean"] meta["title"] = f"Multi-model Mean ({cfg['titles'][metric]})" if cfg.get("plot_mmm", True): plot_kwargs = cfg.get("plot_kwargs", {}).copy() overwrites = cfg.get("plot_kwargs_overwrite", []) apply_plot_kwargs_overwrite(plot_kwargs, overwrites, metric, group) - plotfile = plot(cfg, meta, mmm, basename, kwargs=plot_kwargs) + plot_file = plot(cfg, meta, mmm, basename, kwargs=plot_kwargs) prov = _get_provenance(cfg, meta) prov["ancestors"] = meta["ancestors"] - utils.log_provenance(cfg, plotfile, prov) + utils.log_provenance(cfg, plot_file, prov) if cfg.get("save_mmm", True): work_file = str(Path(cfg["work_dir"]) / f"{basename}.nc") meta["filename"] = work_file @@ -365,8 +363,8 @@ def calculate_mmm(cfg, meta, mm_data, output_meta, group) -> None: def set_defaults(cfg: dict) -> None: """Update cfg with default values from diffmap.yml in place.""" - config_fpath = os.path.realpath(__file__)[:-3] + ".yml" - with open(config_fpath, encoding="utf-8") as config_file: + config_fpath = Path(__file__).with_suffix(".yml") + with config_fpath.open() as config_file: defaults = yaml.safe_load(config_file) for key, val in defaults.items(): cfg.setdefault(key, val) diff --git a/esmvaltool/diag_scripts/droughts/diffmap_paper.py b/esmvaltool/diag_scripts/droughts/diffmap_paper.py new file mode 100644 index 0000000000..5c6c59e888 --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/diffmap_paper.py @@ -0,0 +1,424 @@ +"""Plot relative and absolute differences between two time intervals. + +A global map is plotted for each dataset with an index (must be unique). +The map shows the difference of the first and last N years +(N = comparison_period). +For multiple datasets a multi-model mean is calculated by default. This can be +disabled using `plot_mmm: False`. To plot only mmm and skip maps for individual +datasets use `plot_models: False`. +The diagnostic is applied to each variable by default, but for single variables +another meta key can be chosen for grouping like `group_by: project` to treat +observations and models seperatly. +The produced maps can be clipped to non polar landmasses (220, 170, -55, 90) +with `clip_land: True`. + + +Configuration options in recipe +------------------------------- +basename: str, optional + Format string for the plot filename. Can use meta keys and diffmap_metric. + For multi-model mean the dataset will be set to "MMM". Data will be saved + as same name with .nc extension. + By default: "{short_name}_{exp}_{diffmap_metric}_{dataset}" +clip_land: bool, optional (default: False) + Clips map plots to non polar land area (220, 170, -55, 90). +comparison_period: int, optional (default: 10) + Number of years to compare (first and last N years). Must be less or equal + half of the total time period. +filters: dict, or list, optional + Filter for metadata keys to select datasets. Only datasets with matching + values will be processed. This can be usefull, if ancestors or preprocessed + data is abailable, that should not be processed by the diagnostic. + If a list of dicts is given, all datasets matching any of the filters will + be considered. + By default None. +group_by: str, optional (default: short_name) + Meta key to loop over for multiple datasets. +metrics: list, optional + List of metrics to calculate and plot. For the difference ("percent" and + "diff") the mean over two comparison periods ("first" and "last") is + calculated. The "total" periods mean can be calculated and plotted as well. + By default ["first", "last", "diff", "total", "percent"] +mdtol: float, optional (default: 0.5) + Tolerance for missing data in multi-model mean calculation. 0 means no + missing data is allowed. For 1 mean is calculated if any data is available. +plot_kwargs: dict, optional + Kwargs passed to diag_scripts.shared.plot.global_contourf function. + The "cbar_label" parameter is formatted with meta keys. So placeholders + like "{short_name}" or "{units}" can be used. + By default {"cmap": "RdYlBu", "extend": "both"} +plot_kwargs_overwrite: list, optional (default: []) + List of plot_kwargs dicts for specific metrics (diff, first, latest, total) + and group_by values (ie. pr, tas for group_by: short_name). + `group` and `metric` can either be strings or lists of strings to be + applied to all matching plots. Leave any of them empty to apply to all. + All other given keys are applied to the plot_kwargs dict for this plot. + Settings will be applied in order of the list, so later entries can + overwrite previous ones. +plot_mmm: bool, optional (default: True) + Calculate and plot the average over all datasets. +plot_models: bool, optional (default: True) + Plot maps for each dataset. +strip_plots: bool, optional (default: False) + Removes titles, margins and colorbars from plots (to use them in panels). +titles: dict, optional + Customize plot titles for different metrics. Possible dict keys are + "first", "last", "trend", "diff", "total", "percent". The values are + formatted using meta data. Placeholders like "{short_name}" can be used. + By default {"first": "Mean Historical", "last": "Mean Future", + "trend": "Future - Historical", "diff": "Future - Historical", + "total": "Mean Full Period", "percent": "Relative Change"}. +""" + +from __future__ import annotations + +import contextlib +import logging +from collections import defaultdict +from pathlib import Path + +import iris +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import yaml +from cartopy.util import add_cyclic_point +from esmvalcore import preprocessor as pp +from iris.analysis import MEAN +from iris.cube import Cube + +import esmvaltool.diag_scripts.droughts.utils as ut +import esmvaltool.diag_scripts.shared as e + +# from esmvaltool.diag_scripts.droughts import colors + +log = logging.getLogger(__file__) + + +METRICS = ["first", "last", "diff", "total", "percent"] +PROVENANCE = { + "authors": ["lindenlaub_lukas"], + "domains": ["global"], + "plot_types": ["map"], +} + + +def _get_provenance(cfg: dict, meta: dict) -> dict: + """Create provenance dict for single model plots.""" + prov = PROVENANCE.copy() + prov["statistics"] = ["mean"] + dataset = meta.get("dataset", "unknown") + if dataset == "MMM": + dataset = "Multi-Model Mean" + if meta["diffmap_metric"] == "diff": + prov["statistics"] = ["diff", "mean"] + prov["caption"] = ( + f"Absolute difference in {meta['long_name']} between first and " + f"last {cfg['comparison_period']} years of the period " + f"{meta['start_year']}-{meta['end_year']}, based on " + f"{meta['dataset']}." + ) + elif meta["diffmap_metric"] == "percent": + prov["statistics"] = ["diff", "mean"] + prov["caption"] = ( + f"Relative difference in {meta['long_name']} between first and " + f"last {cfg['comparison_period']} years of the period " + f"{meta['start_year']}-{meta['end_year']}, based on " + f"{meta['dataset']}." + ) + elif meta["diffmap_metric"] == "first": + prov["caption"] = ( + f"Average {meta['long_name']} over the period " + f"{meta['start_year']}-" + f"{meta['start_year'] + cfg['comparison_period'] - 1}, based on " + f"{meta['dataset']}." + ) + elif meta["diffmap_metric"] == "last": + prov["caption"] = ( + f"Average {meta['long_name']} over the period " + f"{meta['end_year'] - cfg['comparison_period'] + 1}-" + f"{meta['end_year']}, based on " + f"{meta['dataset']}." + ) + elif meta["diffmap_metric"] == "total": + prov["caption"] = ( + f"Average {meta['long_name']} over the period " + f"{meta['start_year']}-{meta['end_year']}, based on " + f"{meta['dataset']}." + ) + return prov + + +def plot_colorbar( + cfg: dict, + plotfile: str, + plot_kwargs: dict, + orientation: str = "vertical", + mappable: mpl.cm.ScalarMappable | None = None, +) -> None: + """Plot colorbar in its own figure for strip_plots.""" + _ = cfg # we might need this in the future + fig = plt.figure(figsize=(1.5, 3)) + # fixed size axes in fixed size figure + cbar_ax = fig.add_axes([0.01, 0.04, 0.2, 0.92]) + if mappable is None: + cmap = plot_kwargs.get("cmap", "RdYlBu") + norm = mpl.colors.Normalize( + vmin=plot_kwargs.get("vmin"), + vmax=plot_kwargs.get("vmax"), + ) + mappable = mpl.cm.ScalarMappable(norm=norm, cmap=cmap) + cbar = fig.colorbar( + mappable, + cax=cbar_ax, + orientation=orientation, + label=plot_kwargs["cbar_label"], + pad=0.0, + ) + if "cbar_ticks" in plot_kwargs: + cbar.set_ticks(plot_kwargs["cbar_ticks"], minor=False) + fontsize = plot_kwargs.get("cbar_fontsize", 14) + cbar.ax.tick_params(labelsize=fontsize) + cbar.set_label( + plot_kwargs["cbar_label"], + fontsize=fontsize, + labelpad=fontsize, + ) + plotfile = plotfile.removesuffix(".png") + fig.savefig(plotfile + "_cb.png") # , bbox_inches="tight") + + +def fill_era5_gap(meta: dict, cube: Cube) -> None: + """Fill missing gap at 360 for era5 pet calculation.""" + if ( + meta["dataset"] == "ERA5" + and meta["short_name"] == "evspsblpot" + and len(cube.data[0]) == 360 # noqa: PLR2004 + ): + cube.data[:, 359] = cube.data[:, 0] + + +def plot( + cfg: dict, + meta: dict, + cube: Cube, + basename: str, + kwargs: dict | None = None, +) -> str: + """Plot map using diag_scripts.shared module. + + Returns the plot filename. + """ + plotfile = e.get_plot_filename(basename, cfg) + plot_kwargs = cfg.get("plot_kwargs", {}).copy() + if kwargs is not None: + plot_kwargs.update(kwargs) + if "vmax" in plot_kwargs and "vmin" in plot_kwargs: + plot_kwargs["levels"] = np.linspace( + plot_kwargs["vmin"], + plot_kwargs["vmax"], + 9, + ) + label = plot_kwargs.get("cbar_label", "{short_name} ({units})") + plot_kwargs["cbar_label"] = label.format(**meta) + for coord in cube.coords(dim_coords=True): + if not coord.has_bounds(): + log.warning("NO BOUNDS. GUESSING: %s", coord.name()) + cube.coord(coord.name()).guess_bounds() + fill_era5_gap(meta, cube) + add_cyclic_point(cube.data, cube.coord("longitude").points) + mapplot = e.plot.global_contourf(cube, **plot_kwargs) + if cfg.get("clip_land", False): + plt.gca().set_extent((220, 170, -55, 90)) # type: ignore[attr-defined] + plt.title(meta.get("title", basename)) + if cfg.get("strip_plots", False): + plt.gca().set_title(None) + plt.gca().set_ylabel(None) + plt.gca().set_xlabel(None) + cb_mappable = mapplot.colorbar.mappable + mapplot.colorbar.remove() + plot_colorbar(cfg, plotfile, plot_kwargs, mappable=cb_mappable) + fig = mapplot.get_figure() + fig.savefig(plotfile, bbox_inches="tight") + plt.close() + log.info("saved figure: %s", plotfile) + return plotfile + + +def apply_plot_kwargs_overwrite( + kwargs: dict, + overwrites: dict, + metric: str, + group: str, +) -> dict: + """Apply plot_kwargs_overwrite to kwargs dict for selected plots.""" + for overwrite in overwrites: + new_kwargs = overwrite.copy() + groups = new_kwargs.pop("group", []) + if not isinstance(groups, list): + groups = [groups] + if len(groups) > 0 and group not in groups: + continue + metrics = new_kwargs.pop("metric", []) + if not isinstance(metrics, list): + metrics = [metrics] + if len(metric) > 0 and metric not in metrics: + continue + kwargs.update(new_kwargs) + return kwargs + + +def calculate_diff(cfg, meta, mm_data, output_meta, group) -> None: + """Absolute difference between first and last years of a cube. + + Calculates the absolut and relative difference between the first and last + period of a cube. Write data to mm and optionally plot each dataset. + """ + cube = iris.load_cube(meta["filename"]) + if meta["short_name"] in cfg.get("convert_units", {}): + pp.convert_units(cube, cfg["convert_units"][meta["short_name"]]) + with contextlib.suppress(Exception): + # TODO: maybe fix this within cmorizer + cube.remove_coord("Number of stations") # dropped by unit conversions + if "start_year" in cfg or "end_year" in cfg: + log.info("selecting time period") + cube = pp.extract_time( + cube, cfg["start_year"], 1, 1, cfg["end_year"], 12, 31 + ) + dtime = cfg["comparison_period"] * 12 + cubes: dict[Cube] = {} + cubes["total"] = cube.collapsed("time", MEAN) + do_metrics = cfg.get("metrics", METRICS) + norm = ( + int(meta["end_year"]) + - int(meta["start_year"]) + + 1 # count full end year + - cfg["comparison_period"] # decades center to center + ) / 10 + cubes["first"] = cube[0:dtime].collapsed("time", MEAN) + cubes["last"] = cube[-dtime:].collapsed("time", MEAN) + cubes["diff"] = cubes["last"] - cubes["first"] + cubes["diff"].data /= norm + cubes["diff"].units = str(cubes["diff"].units) + " / 10 years" + cubes["percent"] = cubes["diff"] / cubes["first"] * 100 + cubes["percent"].units = "% / 10 years" + if cfg.get("plot_mmm", True): + for key in do_metrics: + mm_data[key].append(cubes[key]) + for key, cube in cubes.items(): + if key not in do_metrics: + continue # i.e. first/last if only diff is needed + meta["diffmap_metric"] = key + meta["exp"] = meta.get("exp", "exp") + basename = cfg["basename"].format(**meta) + meta["title"] = cfg["titles"][key].format(**meta) + prov = _get_provenance(cfg, meta) + prov["ancestors"] = [meta["filename"]] + if cfg.get("plot_models", True): + plot_kwargs = cfg.get("plot_kwargs", {}).copy() + apply_plot_kwargs_overwrite( + plot_kwargs, + cfg.get("plot_kwargs_overwrite", []), + key, + group, + ) + plotfile = plot(cfg, meta, cube, basename, kwargs=plot_kwargs) + plt.close() + ut.log_provenance(cfg, plotfile, prov) + if cfg.get("save_models", True): + work_file = str(Path(cfg["work_dir"]) / f"{basename}.nc") + iris.save(cube, work_file) + meta["filename"] = work_file + output_meta[work_file] = meta.copy() + ut.log_provenance(cfg, work_file, prov) + + +def calculate_mmm(cfg, meta, mm_data, output_meta, group) -> None: + """Calculate multi-model mean for a given metric.""" + for metric in cfg.get("metrics", METRICS): + meta = meta.copy() # don't modify meta in place: + meta["diffmap_metric"] = metric + basename = cfg["basename"].format(**meta) + results = pp.multi_model_statistics( + mm_data[metric], "overlap", ["mean"], ignore_scalar_coords=True + ) + mmm = results["mean"] + meta["title"] = f"Multi-model Mean ({cfg['titles'][metric]})" + if cfg.get("plot_mmm", True): + plot_kwargs = cfg.get("plot_kwargs", {}).copy() + overwrites = cfg.get("plot_kwargs_overwrite", []) + apply_plot_kwargs_overwrite(plot_kwargs, overwrites, metric, group) + plot_file = plot(cfg, meta, mmm, basename, kwargs=plot_kwargs) + prov = _get_provenance(cfg, meta) + prov["ancestors"] = meta["ancestors"] + ut.log_provenance(cfg, plot_file, prov) + if cfg.get("save_mmm", True): + work_file = str(Path(cfg["work_dir"]) / f"{basename}.nc") + meta["filename"] = work_file + output_meta[work_file] = meta.copy() + iris.save(mmm, work_file) + + +def set_defaults(cfg: dict) -> None: + """Update cfg with default values from diffmap.yml in place.""" + config_fpath = Path(__file__).with_suffix(".yml") + with config_fpath.open() as config_file: + defaults = yaml.safe_load(config_file) + for key, val in defaults.items(): + cfg.setdefault(key, val) + if cfg["plot_kwargs_overwrite"] is not defaults["plot_kwargs_overwrite"]: + cfg["plot_kwargs_overwrite"].extend(defaults["plot_kwargs_overwrite"]) + titles = defaults.get("titles", {}) + titles.update(cfg["titles"]) + cfg["titles"] = titles + + +def filter_metas(metas: list, filters: dict | list) -> list: + """Filter metas by filter dicts.""" + if isinstance(filters, dict): + filters = [filters] + filtered = {} + for selection in filters: + for meta in e.select_metadata(metas, **selection): + filtered[meta["filename"]] = meta # unique + return list(filtered.values()) + + +def main(cfg) -> None: + """Execute Diagnostic.""" + set_defaults(cfg) + metas = cfg["input_data"].values() + if cfg.get("filters") is not None: + metas = filter_metas(metas, cfg["filters"]) + groups = e.group_metadata(metas, cfg["group_by"]) + output = {} + for group, g_metas in groups.items(): + mm_data = defaultdict(list) + for meta in g_metas: + if "end_year" not in meta: + meta.update(ut.get_time_range(meta["filename"])) + # adjust norm for selected time period + meta["end_year"] = cfg.get("end_year", meta["end_year"]) + meta["start_year"] = cfg.get("start_year", meta["start_year"]) + calculate_diff(cfg, meta, mm_data, output, group) + do_mmm = cfg.get("plot_mmm", True) or cfg.get("save_mmm", True) + if do_mmm and len(g_metas) > 1: + # copy meta from first dataset and add all ancestors + keep_keys = [ + "short_name", + "long_name", + "units", + "start_year", + "end_year", + "exp", + ] + meta = {k: g_metas[0][k] for k in keep_keys} + meta["ancestors"] = [met["filename"] for met in g_metas] + meta["dataset"] = "MMM" + calculate_mmm(cfg, meta, mm_data, output, group) + ut.save_metadata(cfg, output) + + +if __name__ == "__main__": + with e.run_diagnostic() as config: + main(config) diff --git a/esmvaltool/diag_scripts/droughts/distribution.py b/esmvaltool/diag_scripts/droughts/distribution.py new file mode 100644 index 0000000000..3a5f54ab7e --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/distribution.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python +"""Creates histograms and regional boxplots for given timeperiods. + +Global histograms are plotted to compare distributions of any variable in given +intervals and/or by experiment. Combined experiments (historical-ssp*) will +be splitted into individual ones. + +NOTE: This diagnostic loads all values from all datasets into memory. If you +provide a lot of data make sure enough memory is available. + +TODO: Free up memory each split for global histograms only save bins and fit. + +Configuration options in recipe +------------------------------- +group_by: str, optional (default: short_name) + All input datasets are grouped by this metadata key. The diagnostic runs + for each of this groups. Consider adding group_by to the basename. +split_experiments: bool, optional (default: False) + If true, combined experiments like "historical-ssp126" get treated + individually as "historical" and "ssp126" when grouped. + NOTE: Only works for historical + scenarioMIP (cutted at 2014) yet. +split_by: str, optional (default: exp) + Create individual distributions for each split in the same figure. + Can be any metadata key or 'interval'. For 'interval' the corresponding + parameter must be given. + TODO: For 'exp' consider changing split_experiments. +intervals: list of dicts, optional (default: []) + List of dicts containing a `label` (optional) and a `range` + (timerange in ISO 8601 format. For example YYYY/YYYY) or + `start` and `end` (ISO 8601). + In the diagnostics `interval_label` and `interval_range` will be added + to metadata and can be used as split_by option and in basename. + NOTE: if splitted by experiment, the first interval is used for historical, + the second for scenarioMIP. For other splits this option is ignored. +regions: list of str, optional (default: []) + List of AR6 regions to extract data for histograms (all given regions) + and boxplots (each region). +sort_regions_by: str, optional (default: "") + Sort regions by median value of given split_by value. If empty no sorting. +basename: str, optional (default: '{plot_type}_{group}') + Filename for plot files. Can contain placeholders for group, plot_type + and interval_label or any other metadata key. +comparison_period: int, optional (default: 10) + Number of years to compare (first and last N years). Must be less or equal + half of the total time period. +plot_mmm: bool, optional (default: True) + Calculate and plot the average over all datasets. +plot_models: bool, optional (default: True) + Plot maps for each dataset. +plot_properties: dict, optional (default: {}) + Kwargs passed to all axes.set() functions. Can be styling of ticks, labels, + limits, grid, etc. See matplotlib documentation for all options: + https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.set.html + Use `histogram.plot_properties` or `regional_stats.plot_properties` + to specify kwargs for corresponding plots. +plot_kwargs: dict, optional (default: {}) + Kwargs to all plot functions. Use `histogram.plot_kwargs` or + `regional_stats.plot_kwargs` to specify kwargs for corresponding plots. +regional_stats.fig_kwargs: dict, optional (default: + {'figsize': (15, 3), 'dpi': 300}) + Kwargs passed to the figure function for the scenarios plot. The best size + for the figure depends on the number of splits and regions and the wanted + aspect ratio. 15,3 is the default for a large number of regions. +colors: dict, optional (default: {}) + Define colors as dict. Keys are the split_by values. For experiment or + dataset splits colors are used from ipcc colors, if available. + TODO: add ipcc dataset colors. +labels: dict, optional (default: {}) + Define labels as dict. Keys are the split_by values. Values are strings + shown in the legend. +strip_plots: bool, optional (default: False) + Removes titles, margins and colorbars from plots (to use them in panels). +grid_lines: list of float, optional (default: []) + Draw helper lines to the plots to indicate threshholds or ranges. At + given locations hlines are drawn in boxplots and vlines in histograms. +grid_line_style: dict, optional (default: + {'color': 'gray', 'linestyle': '--', 'linewidth': 0.5}) + Style of the grid lines. See matplotlib documentation for all options. +""" + +import copy +import logging +from collections import defaultdict + +import iris +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +import yaml +from cycler import cycler +from esmvalcore import preprocessor as pp +from iris.analysis.cartography import area_weights +from iris.time import PartialDateTime + +# from matplotlib.lines import Line2D +from matplotlib import cbook +from scipy.stats import norm + +from esmvaltool.diag_scripts.droughts import ( + colors as ipcc_colors, +) +from esmvaltool.diag_scripts.droughts import ( + utils as ut, +) +from esmvaltool.diag_scripts.shared import ( + get_diagnostic_filename, + # get_plot_filename, + group_metadata, + run_diagnostic, +) + +log = logging.getLogger(__file__) + + +SER_KEYS = [ + "mean", + "med", + "q1", + "q3", + "iqr", + "whislo", + "whishi", + "cihi", + "cilo", +] +BOX_PLOT_KWARGS = {} + + +def load_constrained(cfg, meta, regions=None): + """Load cube with constraints in meta.""" + if regions is None: + regions = [] + cube = iris.load_cube(meta["filename"], constraint=meta.get("cons", None)) + ut.guess_lat_lon_bounds(cube) + if regions != []: + log.debug("Extracting regions: %s", regions) + cube = pp.extract_shape( + cube, + shapefile="ar6", + ids={"Acronym": regions}, + ) + if "interval_range" in meta: + log.debug("clipping timerange to %s", meta["interval_range"]) + cube = pp.clip_timerange(cube, meta["interval_range"]) + return cube + + +def group_by_interval(cfg, metas: list): + """Group metadata by intervals.""" + groups = defaultdict(list) + for interval in cfg["intervals"]: + imetas = copy.deepcopy(metas) + for m in imetas: + m["interval_range"] = interval["range"] + m["interval_label"] = interval["label"] + groups[interval["label"]] = imetas + return groups + + +def group_by_exp(cfg, metas, historical_first=False): + """Similar to shared.group_metadata but splits combined experiments. + meta data will be added to individual experiments. + To keep this function lazy an iris constraint is added (`meta["cons"]`), + to be applied when loading the file. + If comparison_period is given, last part of its length in each experiment + is used. For historical the first part is used if `historical_first=True`. + """ + groups = group_metadata(metas, "exp") + historical_added = [] + year = PartialDateTime(year=2015) + groups = defaultdict(list, groups) + remove_keys = set() + # split combined experiments: + for exp in list(groups.keys()): + metas = groups[exp] + if not exp.startswith("historical-"): + continue + if exp not in historical_added: + for meta in metas: + hmeta = copy.deepcopy(meta) + hmeta["cons"] = iris.Constraint(time=lambda cell: cell < year) + hmeta["exp"] = "historical" + if len(cfg["intervals"]) > 0: + log.warning("set interval range for historical") + hmeta["interval_range"] = cfg["intervals"][0]["range"] + groups["historical"].append(hmeta) + historical_added.append(exp) + for meta in metas: + meta["cons"] = iris.Constraint(time=lambda cell: cell >= year) + meta["exp"] = exp.split("-")[1] + if len(cfg["intervals"]) > 0: + log.warning("set interval range for ssp") + meta["interval_range"] = cfg["intervals"][1]["range"] + groups[meta["exp"]].append(meta) + remove_keys.add(exp) + for rmkey in remove_keys: + del groups[rmkey] + return groups + + +def calculate_histogram(cfg, splits, output, group): + """Load data for each split and calculate counts, bins and fit parameters. + Safe parameters to netcdf file, to optionally skip this part on rerun. + """ + labels = [] + # weights = [] + fits = [] + counts = [] + bins = [] + log.info("start split loading") + for split, metas in splits.items(): + labels.append(cfg.get("labels", {}).get(split, split)) + split_data = [] + split_weights = [] + for meta in metas: # merge everything else + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cube = load_constrained(cfg, meta, regions=cfg["regions"]) + split_weights.append(area_weights(cube)) + applied_mask = cube.data.filled(np.nan) # type: ignore + split_data.append(applied_mask.ravel()) + flat = np.array(split_data).ravel() + flat_weights = np.array(split_weights).ravel() + fits.append(norm.fit(flat[~np.isnan(flat)])) # mu, std + mybins = np.arange(-8, 6.5, step=0.5) + s_counts, s_bins = np.histogram(flat, mybins, weights=flat_weights) + split_bins_center = (s_bins[1:] + s_bins[:-1]) / 2 + counts.append(s_counts) + bins.append(split_bins_center) + log.info("split done") + # save output + data = xr.Dataset( + { + "fits": xr.DataArray(fits, dims=["split", "param"], name="fit"), + "counts": xr.DataArray( + counts, + dims=["split", "bin"], + name="counts", + ), + "bins": xr.DataArray(bins, dims=["split", "bin"], name="bins"), + "labels": xr.DataArray(labels, dims=["split"], name="labels"), + }, + ) + fname = get_diagnostic_filename(f"histogram_{group}", cfg) + output["fname"] = { + "filename": fname, + "plottype": "histogram", + "group": group, + } + data.to_netcdf(fname) + return data + + +def load_histogram(cfg, group): + """Load histogram data from netcdf file.""" + fname = get_diagnostic_filename(f"histogram_{group}", cfg) + return xr.open_dataset(fname) + + +def plot_histogram(cfg, splits, output, group, fit=True): + """Plot one combined histogram of all given regions for each split.""" + plt.figure(**ut.sub_cfg(cfg, "histogram", "fig_kwargs")) + colors = list(get_split_colors(cfg, splits).values()) + if ut.sub_cfg(cfg, "histogram", "reuse"): + data = load_histogram(cfg, group) + else: + data = calculate_histogram(cfg, splits, output, group) + bins = data["bins"].values.T + counts = data["counts"].values.T + labels = data["labels"].values.T + plot_kwargs = { + "bins": np.arange(-8, 6.5, step=0.5), + "label": labels, + "density": True, + "alpha": 0.75, + } + log.info("plotting histogram (%s)", group) + _, bins, patches = plt.hist( + bins, + weights=counts, + color=colors, + **plot_kwargs, + zorder=3, + ) + legend = plt.legend() + try: + legend_rename = ut.sub_cfg(cfg, "histogram", "legend") + except KeyError: + legend_rename = {} + for text in legend.get_texts(): # overwrite legend labels if provided + text.set_text(legend_rename.get(text.get_text(), text.get_text())) + for patch in legend.get_patches(): + patch.set_alpha(1) + plot_props = ut.sub_cfg(cfg, "histogram", "plot_properties") + plt.gca().set(**plot_props) + for line in cfg["grid_lines"]: + plt.axvline(line, **cfg["grid_line_style"]) + + meta = next(iter(splits.values()))[0].copy() # first meta from dict + meta.update( + { + "plot_type": "histogram", + "group": group, + }, + ) + filename = ut.get_plot_fname(cfg, cfg["basename"], meta, {"/": "_"}) + plt.savefig(filename) + log.info("saved %s", filename) + + # plot normal fit + plot_kwargs_fit = { + "linewidth": 2, + "linestyle": "-", + "alpha": 1, + } + for patch in patches: # type: ignore + for rect in patch: + rect.set_alpha(0.2) + for iii, fit in enumerate(data["fits"].values): + x = np.linspace(bins[0], bins[-1], 200) + p = norm.pdf(x, fit[0], fit[1]) + plot_kwargs_fit.update(cfg["histogram"].get("plot_kwargs", {})) + plt.plot(x, p, color=colors[iii], **plot_kwargs_fit) + # fit_line = Line2D([], [], color="black", linestyle="-", alpha=1) + # handles, labels = plt.gca().get_legend_handles_labels() + # handles.append(fit_line) + # plt.legend(handles=handles, labels=labels + ["normal fit"]) + # ax.legend(handles, labels) + meta["plot_type"] = "histogram_fit" + filename = ut.get_plot_fname(cfg, cfg["basename"], meta, {"/": "_"}) + log.info("saved %s", filename) + plt.savefig(filename) + plt.close() + + +def sort_regions(data, regions, by="ssp585", inplace=True): + """Sort regions by median value.""" + if not inplace: + data = copy.deepcopy(data) + medians = [np.nanmedian(np.array(reg).ravel()) for reg in data[by]] + order = np.argsort(medians) + regions = [regions[i] for i in order] + for split in data.keys(): + data[split] = [data[split][i] for i in order] + return data, regions + + +def calculate_regional_stats(cfg, splits, output, group): + """Calculate regional statistics for given metadata.""" + data = {} + stats = {} + regions = cfg["regions"] + if regions == []: + regions = list(ut.get_hex_positions().keys()) + for split, metas in splits.items(): # default each experiment + region_data = defaultdict(list) + for meta in metas: + cube = load_constrained(cfg, meta) + ut.guess_lat_lon_bounds(cube) + for region in regions: + ids = {"Acronym": [region]} + reg = pp.extract_shape(cube, shapefile="ar6", ids=ids) + flat = reg.data[~reg.data.mask] # cheaper # type: ignore + region_data[region].extend(flat) + data[split] = [region_data[r] for r in regions] + if cfg["sort_regions_by"]: + data, regions = sort_regions(data, regions, by=cfg["sort_regions_by"]) + for split, dat in data.items(): + stats[split] = cbook.boxplot_stats( + dat, + whis=(2.3, 97.7), + labels=regions, + ) + # save stats + fname = get_diagnostic_filename(f"regional_stats_{group}", cfg) + fname = fname.replace(".nc", ".yml") + output["fname"] = { + "filename": fname, + "plottype": "regional_stats", + "group": group, + } + for split_stats in stats.values(): + for stat in split_stats: + del stat["fliers"] + for key in SER_KEYS: + stat[key] = stat[key].tolist() + with open(fname, "w") as fstream: + yaml.dump(stats, fstream) + log.info("saved: %s", fname) + return stats + + +def load_regional_stats(cfg, group): + """Load regional statistics from netcdf file.""" + fname = get_diagnostic_filename(f"regional_stats_{group}", cfg) + fname = fname.replace(".nc", ".yml") + with open(fname) as f: + stats = yaml.load(f, Loader=yaml.SafeLoader) + for split in stats: + for stat in stats[split]: + for key in SER_KEYS: + stat[key] = np.array(stat[key]) + return stats + + +def plot_regional_stats(cfg, splits, output, group): + if ut.sub_cfg(cfg, "regional_stats", "reuse"): + log.info("loading boxplot data from file") + stats = load_regional_stats(cfg, group) + else: + stats = calculate_regional_stats(cfg, splits, output, group) + regions = [sdata["label"] for sdata in list(stats.values())[0]] + fig = plt.figure(**ut.sub_cfg(cfg, "regional_stats", "fig_kwargs")) + positions = np.array(np.arange(len(regions))) + colors = get_split_colors(cfg, splits) + for line in cfg["grid_lines"]: + plt.hlines(line, -1, len(regions), **cfg["grid_line_style"]) + for i, (split, _stat) in enumerate(stats.items()): + n = len(stats) + group_width = 0.9 + width = group_width / (n + 1) + offset = ((i + 1) / (n + 1) - 0.5) * group_width + plot_kwargs_box = { + "widths": width, + "positions": positions + offset, + "showfliers": False, + "patch_artist": True, + "medianprops": {"color": "white", "linewidth": 1}, + "boxprops": { + "facecolor": colors[split], + "edgecolor": "white", + "linewidth": 1, + }, + "whiskerprops": {"color": colors[split], "linewidth": 0.8}, + "capprops": {"color": colors[split], "linewidth": 1}, + } + # plt.boxplot(sdata, **plot_kwargs_box) + plt.gca().bxp(stats[split], **plot_kwargs_box) + plt.plot([], c=colors[split], label=split) # dummy for legend + fig.legend(loc="center left", bbox_to_anchor=(1, 0.5)) + plt.xticks(positions, regions, rotation=90) + plt.tight_layout() + plt.xlim(-1, len(regions)) + # save plot + fname = ut.get_plot_fname(cfg, f"regional_stats_{group}") + plt.savefig(fname) + log.info("saved %s", fname) + + +def get_split_colors(cfg, splits): + """Adds colors for each split to the config if not yet present. + ipcc colors are used if available, matplotlib defaults otherwise. + TODO: add ipcc model colors. + """ + colors = cfg.get("colors", {}).copy() # new instance for each group + mpl_default = plt.rcParams["axes.prop_cycle"].by_key()["color"] + cycle = iter(cycler(color=mpl_default)) + missing_splits = [s for s in splits if s not in colors] + for split in missing_splits: + colors[split] = next(cycle) + if cfg["split_by"] == "exp" and hasattr(ipcc_colors, split): + colors[split] = getattr(ipcc_colors, split) + return colors + + +def set_defaults(cfg): + cfg.setdefault("group_by", "short_name") + cfg.setdefault("split_by", "exp") + cfg.setdefault("intervals", []) + cfg["intervals"] = [ut.fix_interval(i) for i in cfg["intervals"]] + cfg.setdefault("basename", "{plot_type}_{group}") + cfg.setdefault("regions", []) + cfg.setdefault("sort_regions_by", "") + cfg.setdefault("plot_models", False) + cfg.setdefault("plot_mmm", True) + cfg.setdefault("plot_global", True) + cfg.setdefault("plot_properties", {}) + cfg.setdefault("plot_kwargs", {}) + cfg.setdefault("histogram", {}) + cfg.setdefault("regional_stats", {}) + cfg["regional_stats"].setdefault( + "fig_kwargs", + {"figsize": (15, 3), "dpi": 300}, + ) + cfg.setdefault("reuse", False) + cfg.setdefault("strip_plots", False) + cfg.setdefault("grid_lines", []) + cfg.setdefault( + "grid_line_style", + {"color": "gray", "linestyle": "--", "linewidth": 0.5}, + ) + + +def main(cfg): + """Main function. executing all plots for each group.""" + set_defaults(cfg) + groups = group_metadata(cfg["input_data"].values(), cfg["group_by"]) + output = {} + for group, gmetas in groups.items(): + log.info("running diagnostic for: %s", group) + if cfg["split_by"] == "exp": + splits = group_by_exp(cfg, gmetas) + elif cfg["split_by"] == "interval": + splits = group_by_interval(cfg, gmetas) + else: + splits = group_metadata(gmetas, cfg["split_by"]) + cfg["split_colors"] = get_split_colors(cfg, splits) + if cfg["plot_global"]: + log.info("plotting global histogram") + plot_histogram(cfg, splits, output, group) + plt.close() + if cfg["plot_regions"]: + log.info("plotting regional statistics") + plot_regional_stats(cfg, splits, output, group) + plt.close() + ut.save_metadata(cfg, output) + + +if __name__ == "__main__": + with run_diagnostic() as config: + main(config) diff --git a/esmvaltool/diag_scripts/droughts/event_area_timeseries.py b/esmvaltool/diag_scripts/droughts/event_area_timeseries.py new file mode 100644 index 0000000000..e4dbd36efa --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/event_area_timeseries.py @@ -0,0 +1,606 @@ +"""Calculate and plot relative area of drought events. + +Creates timeseries of the spatial extend of all drought events. Different types +of events can be configured for specific ranges of drought indices. A +multimodel mean is calculated by averaging over all datasets event area ratios. +The datasets are required to be preprocessed accordingly to the same shape and +time axis. + +The default parameters for this diagnostic are defined in +event_area_timeseries.yml. + + +Configuration options in recipe +------------------------------- +interval: int, optional (default: 240) + Number of months per plot, for regular intervals. Set to 0 to create + one plot for the full period. +intervals: list of tuples, optional (default: None) + List of tuples with start and end time indices for each plot. If not set, + `interval` will be used to generate regular ranges. +events: list of dicts + List of event types with min and max index values, colors and labels. + See event_area_timeseries.yml for an example. +fig_kwargs: dict, optional + Additional keyword arguments for the figure creation. This can be set for + specific plottypes as ``fullperiod.fig_kwargs`` or ``overview.fig_kwargs``. +overview: dict, optional + Setup for a figure with multiple plots for selected intervals. The first + interval is expected to be plotted once, all others are plotted for each + scenario. plot_kwargs and fig_kwargs can be set as for this figure. + Set ``overview.skip: True`` to not create this figure. +fullperiod: dict, optional + Setup for a figure with one plot for each pair of scenario and region. + ``plot_kwargs`` and ``fig_kwargs`` can be set as for this figure. + Set ``fullperiod.skip: True`` to not create this figure. +regions: list of str, optional + List of regions (acronyms) for which the area ratios are plotted. + If not given, global data is used. +combine_regions: bool, optional (default: False) + If true the list of acronyms is combined into one region, + rather than plotting them individually. +basename: str, optional (default: "SPEI_{dataset}_{interval}") + Format string for plot file names. The string will be formatted with the + current meta data. "group", "interval" and "region" are be available + in certain cases. For multimodel mean plots "MMM" is used as dataset name. +reuse: bool, optional (default: False) + If True, the diagnostic will try to load existing data from the output of + previous runs. This should be only set to true in temporary settings during + development or adjusting plot parameters. Should not be changed in recipes. +""" + +import logging +import os + +import iris +import iris.plot as iplt +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +import yaml +from esmvalcore import preprocessor as pp +from iris.analysis.cartography import area_weights +from iris.cube import Cube +from matplotlib import gridspec +from matplotlib.dates import DateFormatter, YearLocator # MonthLocator +from numpy import ma + +import esmvaltool.diag_scripts.droughts.utils as ut +from esmvaltool.diag_scripts.shared import ( + get_diagnostic_filename, + get_plot_filename, + group_metadata, + run_diagnostic, +) + +log = logging.getLogger(__file__) + + +def load_and_prepare(cfg, fname) -> Cube: + """Apply mask and guess lat/lon bounds.""" + cube = iris.load_cube(fname) + ut.guess_lat_lon_bounds(cube) + old_mask = cube.data.mask + new_mask = get_2d_mask(cube, tile=True) + diff_mask = np.logical_xor(old_mask, new_mask) + cube.data.mask = new_mask + cube.data.data[diff_mask] = np.nan + return cube + + +def get_intervals(cube, interval) -> list: + """Generate a list of time intervals for plotting.""" + if interval > 0: + months = len(cube.coord("time").points) + steps = int(months / interval) + intervals = [(i * interval, (i + 1) * interval) for i in range(steps)] + if months % interval > 0: # plot last (incomplete) interval + intervals += [(intervals[-1][-1], None)] + else: + intervals = [(0, None)] + return intervals + + +def calc_ratio(cube, event, weights) -> np.ndarray: + """Calculate a timeseries of area ratio for specific index range. + + The fraction of area (or cells) with index values between min and max is + calculated for each timestep. NaN values are ignored for each event. + If imin and imax are set to "nan" the ratio of cells with nan/inf values is + returned. + + Parameters + ---------- + cube : iris.cube.Cube + 3D drought index, with area + event : dict + must contain floats or "nan" for keys: "min", "max" + weights : np.ndarray + weights array with the same shape as the cube + nan_area: bool + return the ratio of nan values ignoring imin and imax. + """ + imin = event["min"] + imax = event["max"] + if imin == "nan" and imax == "nan": + mask = np.isfinite(cube.data) + else: + mask = ~np.logical_and(cube.data >= imin, cube.data < imax) + event_areas = ma.masked_array(weights, mask=mask) # still 3D + return np.sum(event_areas, axis=(1, 2)) # collapse lat/lon + + +def get_2d_mask(cube, mask_any=False, tile=False) -> np.ndarray: + """Return a 2d (lat/lon) mask where any or all entries are masked. + + Parameters + ---------- + cube : iris.cube.Cube + 3d cube with masked data + mask_any: bool + return true for any masked entrie along time dim, instead of all + tile : bool + return a 3d mask (matching cube) repeated along the time dim + """ + mask2d = np.any(cube.data.mask, axis=0) if mask_any else np.all(cube.data.mask, axis=0) + if tile: + mask2d = np.tile(mask2d, (cube.shape[0], 1, 1)) + return mask2d + + +def plot_area_ratios(cfg, meta, cube) -> None: + """Plot area ratio of given event types for a cube of index values. + + The area weights are normalized on the masked cube data, resulting in the + ratio between area with index values in a given range and the area of all + grid cells, that are not masked (usually not glaciated land). + + Parameters + ---------- + cube : iris.cube.Cube + 3D index values. + interval : int + number of months per figure. negative values disable the split. + Optional, -1 by default + weighted : boolean + calculate area weights based on grid boundaries. Masked cells are + ignored in normalization. + """ + # if cfg["weighted"]: + # # NOTE: area_weights does not apply cubes mask, so normalize manually + # weights = area_weights(cube, normalize=True) + # else: + # weights = np.ones(cube.data.shape) / (cube.shape[1] * cube.shape[2]) + # # mask and normalize weights to sum up to 1 for unmasked (land/region) + # mask = get_2d_mask(cube, tile=True) + # weights = ma.masked_array(weights, mask=mask) + # unmasked_area = np.sum(weights) / cube.shape[0] + # weights = weights / unmasked_area + # # calculate areas for each event + # for event in cfg["events"]: + # event["area"] = calc_ratio(cube, event, weights=weights) + # mm_key = f'mm_{meta["region"]}' if "region" in meta else "mm" + # if mm_key not in event: + # event[mm_key] = [] + # event[mm_key].append(event["area"]) + y = np.vstack([e["area"] for e in cfg["events"]]) + if cfg.get("intervals", None) is None: + cfg["intervals"] = get_intervals(cube, cfg["interval"]) + if cfg.get("plot_models", True): + for i in cfg["intervals"]: + ftemp = f"{cube.name()}_{meta['dataset']}_{i[0]}-{i[1]}" + if "region" in meta: + ftemp += f"_{meta['region']}" + fname = get_plot_filename(ftemp, cfg) + plot(cfg, fname, i, y) + + +def plot(cfg, i, y, fname=None, fig=None, ax=None, label=None) -> None: + """Plot area ratios for given interval and events. + + pass either fname to save single plots, or fig and ax to plot one axis into + existing figure. + """ + if i is None: + i = (0, len(cfg["times"])) + t = cfg["times"][i[0] : i[1]] + if not fig or not ax: + fig, ax = plt.subplots(figsize=cfg["figsize"], dpi=300) + ax.xaxis.set_major_locator(YearLocator(5)) + ax.xaxis.set_major_formatter(DateFormatter("%Y")) + ax.xaxis.set_minor_locator(YearLocator(1)) + ax.set_ylabel(label) + plot_kwargs = {"step": "mid", "colors": cfg["colors"], "labels": cfg["labels"]} + plot_kwargs.update(cfg.get("plot_kwargs", {})) + ax.stackplot(t, y[:, i[0] : i[1]], **plot_kwargs) + ax.set_ylim(*cfg["ylim"]) + ax.set(**cfg.get("axes_properties", {})) + ax.tick_params(direction="in", which="both") + ax.set_xlim(t[0], t[-1]) + ax.set_xticklabels(ax.get_xticklabels(), rotation=00, ha="center") + if cfg.get("strip_plot", False): + # standalone legend in seperate figure + fig2, ax2 = plt.subplots(figsize=(5, 1), dpi=300) + ax2.stackplot(t, y[:, i[0] : i[1]], **plot_kwargs) + ax2.axis("off") + lfname = get_plot_filename(f"{cfg['index']}_MMM_legend", cfg) + fig2.legend(mode="expand", ncol=2, fancybox=False, framealpha=1) + fig2.savefig(lfname) + # elif not cfg.get("legend_latest", True) or i[1] is None: + # add legend last or every figure + # if not cfg.get("subplots") and not cfg.get("subplots_full"): + # fig.legend( + # loc="center right", + # fancybox=False, + # framealpha=1, + # labelspacing=0.3, + # ) # 'center right' + if fname: + fig.savefig(fname) + plt.close() + + +def plot_mmm(cfg, region=None, fig=None, axs=None, label=None, meta=None) -> None: + """Plot multi model mean of area ratios for given interval and events.""" + if meta is None: + meta = {} + mmm_key = f"mmm_{region}" if region else "mmm" + mm_key = f"mm_{region}" if region else "mm" + for event in cfg["events"]: + event[mmm_key] = np.mean(np.array(event[mm_key]), axis=(0)) + y = np.vstack([e[mmm_key] for e in cfg["events"]]) + if cfg.get("intervals", None) is not None: + for n, i in enumerate(cfg["intervals"]): + meta["interval"] = f"{i[0]}-{i[1]}" + cfg["basename"].format(**meta) + if cfg["subplots"]: + if axs is None: + error_msg = "no axs for subplots" + raise ValueError(error_msg) + plot(cfg, i, y, fig=fig, ax=axs[n], label=label) + if cfg.get("subplots_full", False): + plot(cfg, i, y, fig=fig, ax=axs, label=label) + + +def set_defaults(cfg) -> None: + """Update cfg with default values from diffmap.yml. + + TODO: This could be a shared function reused in other diagnostics. + """ + config_file = os.path.realpath(__file__)[:-3] + ".yml" + with open(config_file, encoding="utf-8") as f: + defaults = yaml.safe_load(f) + for key, val in defaults.items(): + cfg.setdefault(key, val) + cfg["colors"] = [e["color"] for e in cfg["events"]] + cfg["labels"] = [e["label"] for e in cfg["events"]] + cfg["mirror"] = len(cfg.get("mirror_events", [])) > 0 + cfg.setdefault("combine_regions", False) + cfg.setdefault("basename", "SPEI_MMM_{interval}") + cfg["overview"].setdefault("skip", False) + cfg["fullperiod"].setdefault("skip", False) + + +def set_timecoords(cfg) -> None: + """Read time coordinate points from reference dataset and store it in cfg. + + This is required to ensure that all datasets have the same time axis. + TODO: maybe new regrid_time with common calendar could replace this? + """ + meta = ut.select_single_meta( + cfg["input_data"].values(), + short_name=cfg["index"], + # experiment="historical-ssp585", # TODO: + dataset=cfg["reference_dataset"], + strict=False, + ) + tc = iris.load_cube(meta["filename"]).coord("time") + cfg["times"] = iplt._fixup_dates(tc, tc.points) + print(cfg["times"]) + + +def plot_overview(cfg, data, group="unnamed") -> None: + """Prepare a figure with 1 histogrical and 3 future scenario intervals.""" + fig = plt.figure(**ut.sub_cfg(cfg, "overview", "fig_kwargs"), dpi=300) + gs = gridspec.GridSpec(3, 2) + scenario_axs = [ + fig.add_subplot(gs[0, 1]), + fig.add_subplot(gs[1, 1]), + fig.add_subplot(gs[2, 1]), + ] + hist_ax = fig.add_subplot(gs[0, 0], sharey=scenario_axs[0]) + # NOTE: sharey hides second yticklabels, use twin to show it + twin = scenario_axs[0].twinx() + twin.tick_params(axis="y", which="both", right=True, direction="in") + # TODO: can this be moved to ax properties? + twin.set_yticks(cfg.get("yticks", None)) + leg_ax = fig.add_subplot(gs[1:, 0]) + hist_plotted = False + # TODO: dont use reversed on "random" input order. make it explicit. + for n, ((_exp), e_data) in enumerate(reversed(list(data.groupby(["exp"])))): + print("EXP") + print(_exp) + dat = e_data.squeeze() + # pick first and last interval: + if not hist_plotted: + # first = dat.isel(time=slice(0, cfg["intervals"][0][1])) + # print(first["event_ratio"]) + plot( + cfg, + cfg["intervals"][0], + dat["event_ratio"].data, + fig=fig, + ax=hist_ax, + ) + # last = dat.isel(time=slice(cfg["intervals"][-1][0], None)) + plot( + cfg, + cfg["intervals"][-1], + dat["event_ratio"].data, + fig=fig, + ax=scenario_axs[n], + ) + for ax in [hist_ax, *scenario_axs]: # all plots + ax.set_yticks(cfg.get("yticks", None)) + ax.grid(True, which="major", linestyle="--", linewidth=0.5) + ax.tick_params(axis="x", which="both", top=True, bottom=True) + ax.yaxis.tick_right() + for ax in scenario_axs: # right plots + ax.tick_params(axis="y", which="both", left=True, right=True) + for ax in scenario_axs[:-1]: # disable xicks + ax.set_xticklabels([]) + hist_ax.set_ylabel("Historical") + hist_ax.yaxis.tick_right() + hist_ax.set_yticklabels([]) + # axs[1][0].tick_params(axis='x', which='both', pad=10) + leg_ax.axis("off") + hands, labs = scenario_axs[0].get_legend_handles_labels() + legend = leg_ax.legend( + hands[0:8], + labs[0:8], + ncol=2, + loc="center", + bbox_to_anchor=(0.5, 0.34), + fancybox=False, + labelspacing=0, + framealpha=0, + ) + for txt in legend.get_texts(): + txt.set_linespacing(1.3) + leg_ax.axis("off") + fig.tight_layout() + fig.subplots_adjust(hspace=0.2) + fig.savefig(get_plot_filename(f"overview_{cfg['index']}_MMM_{group}", cfg)) + + +def plot_full_periods(cfg, data) -> None: + """Prepare a figure with full time series for each scenario/region.""" + # setup figure with 1 row for legend and 1 for each scenario/region pair + fig = plt.figure(**ut.sub_cfg(cfg, "fullperiod", "fig_kwargs"), dpi=300) + rows = len(data["exp"]) * len(data["region"]) + gs = gridspec.GridSpec(rows + 1, 1, height_ratios=[0.3] + [1] * (rows)) + leg_ax = fig.add_subplot(gs[0, 0]) + axs = [] + # loop through data slices and plot to axis + # TODO: allow for both regional and exp grouping? + # for n, ((exp, reg), dat) in enumerate(reversed(data.groupby(["exp", "region"]))): + for n, ((exp), dat) in enumerate(reversed(list(data.groupby(["exp"])))): + ax = fig.add_subplot(gs[n + 1, 0]) + dat = dat.squeeze() + y = dat["event_ratio"].data + # hardcode full interval for this plot + i = [0, None] + fname = get_plot_filename(f"event_area_{exp}", cfg) + plot(cfg, i, y, fname=fname, fig=fig, ax=ax) + axs.append(ax) + for ax in axs: # all plots + ax.set_yticks(cfg.get("yticks", None)) + ax.grid(True, which="both", linestyle="--", linewidth=0.5) # noqa: FBT003 + ax.tick_params(axis="x", which="both", top=True, bottom=True) + ax.tick_params(axis="y", which="both", left=True, right=True) + ax.set_ylabel(exp) + for ax in axs[:-1]: # disable xicks + ax.set_xticklabels([]) + leg_ax.axis("off") + hands, labs = axs[0].get_legend_handles_labels() + labs = [lab.replace("\n", " ") for lab in labs] + ncol = 4 + legend = leg_ax.legend( + hands[0:8], + labs[0:8], + ncol=ncol, + loc="center", + bbox_to_anchor=(0.5, 0.34), + fancybox=False, + labelspacing=0, + framealpha=0, + ) + for txt in legend.get_texts(): + txt.set_linespacing(1.3) + leg_ax.axis("off") + fig.tight_layout() + # fig.subplots_adjust(hspace=0.2) + fig.subplots_adjust( + left=0.05, + right=0.95, + top=0.95, + bottom=0.05, + hspace=0.2, + ) + fig.savefig(get_plot_filename(f"{cfg['index']}_MMM", cfg)) + + +def plot_each_interval(cfg, exp_metas) -> None: + """Create an individual figure for each interval and each scenario.""" + for _i, (_exp, emetas) in enumerate(exp_metas): + process_datasets(cfg, emetas, fig=None, axs=None) + + +def process_datasets(cfg, metas, fig=None, axs=None) -> None: + """Load all models and call event area calculation for each.""" + last_meta = None + for meta in metas: + fname = meta["filename"] + if meta["short_name"].lower() != cfg["index"]: + log.info("Not matching index (skipped): %s", cfg["index"]) + continue + cube = load_and_prepare(cfg, fname) + if cfg.get("global", True): + plot_area_ratios(cfg, meta, cube) + if cfg.get("regions", False) and not cfg["combine_regions"]: + for region in cfg["regions"]: + log.info("-- region %s", region) + meta["region"] = region + extracted = pp.extract_shape( + cube, + shapefile="ar6", + ids={"Acronym": [region]}, + ) + plot_area_ratios(cfg, meta, extracted) + elif cfg.get("regions", False): + log.info("-- combined region") + extracted = pp.extract_shape( + cube, + shapefile="ar6", + ids={"Acronym": cfg["regions"]}, + ) + if "region" in meta: + del meta["region"] + plot_area_ratios(cfg, meta, extracted) + last_meta = meta + # multi model mean + if cfg.get("plot_mmm", True): + ylabel = cfg.get("ylabels", {}).get(meta["exp"], meta["exp"]) + if cfg.get("global", True) or cfg["combine_regions"]: + plot_mmm(cfg, fig=fig, axs=axs, label=ylabel, meta=last_meta) + else: + for region in cfg.get("regions", [None]): + plot_mmm( + cfg, + region=region, + fig=fig, + axs=axs, + label=ylabel, + meta=last_meta, + ) + + +def extract_regions(cfg, cube) -> dict: + """Extract regions and return a list of cubes. + + TODO: use preproc instead + """ + extracted = {} + params = {"shapefile": "ar6", "ids": {"Acronym": cfg["regions"]}} + if cfg["regions"] and cfg["combine_regions"]: + log.info("extracting combined region") + extracted["combined"] = pp.extract_shape(cube, **params) + elif cfg["regions"]: + for region in cfg["regions"]: + log.info("extracting region %s", region) + params["ids"]["Acronym"] = [region] + extracted[region] = pp.extract_shape(cube, **params) + else: + extracted["global"] = cube + return extracted + + +def regional_weights(cfg, cube) -> np.ndarray: + """Calculate area weights normalized to the total unmasked area.""" + # NOTE: area_weights does not apply cubes mask, normalize manually + if cfg["weighted"]: + weights = area_weights(cube, normalize=True) + else: + weights = np.ones(cube.data.shape) / (cube.shape[1] * cube.shape[2]) + # mask and normalize weights to sum up to 1 for unmasked (land/region) + mask = get_2d_mask(cube, tile=True) + weights = ma.masked_array(weights, mask=mask) + unmasked_area = np.sum(weights) / cube.shape[0] + return weights / unmasked_area + + +def calculate_event_ratios(cfg, metas, output) -> tuple: + """Load data and save calculated event ratio timelines.""" + # data: dataset x exp x region x event + # data_mmm: exp x region x event + if cfg["regions"] and cfg["combine_regions"]: + regions = ["combined"] + elif cfg["regions"]: + regions = cfg["regions"] + else: + regions = ["global"] + coords = { + "dataset": list(group_metadata(metas.values(), "dataset").keys()), + "region": regions, + "exp": list(group_metadata(metas.values(), "exp").keys()), + "event": [e["label"] for e in cfg["events"]], + "time": cfg["times"], + } + dims = list(coords.keys()) + nans = np.full([len(c) for c in coords.values()], np.nan) + data = xr.Dataset({"event_ratio": (dims, nans)}, coords=coords) + for meta in metas.values(): + cube = load_and_prepare(cfg, meta["filename"]) + extracted = extract_regions(cfg, cube) + loc = {"dataset": meta["dataset"], "exp": meta["exp"]} + for region, r_cube in extracted.items(): + loc["region"] = region + log.info("calculating %s", region) + weights = regional_weights(cfg, r_cube) + for event in cfg["events"]: + loc["event"] = event["label"] + ratios = calc_ratio(r_cube, event, weights=weights) + data["event_ratio"].loc[loc] = ratios + fname = get_diagnostic_filename("event_area", cfg) + output[fname] = {"filename": fname, "plottype": "event_area"} + data.to_netcdf(fname) + data_mmm = data.mean("dataset") + # data_mmm = data_mmm.drop_vars("dataset") + fname = get_diagnostic_filename("event_area_mmm", cfg) + output[fname] = {"filename": fname, "plottype": "event_area_mmm"} + data_mmm.to_netcdf(fname) + return data, data_mmm + + +def load_event_ratios(cfg): + """Load calculated event ratios for datasets and MMM from files.""" + names = ["event_area", "event_area_mmm"] + fnames = [get_diagnostic_filename(f, cfg) for f in names] + datas = {} + for fname in fnames: + try: + datas[fname] = xr.open_dataset(fname) + except FileNotFoundError: + log.error("File not found: %s", fname) + datas[fname] = None + return datas[fnames[0]], datas[fnames[1]] + + +def main(cfg): + """Get common time coordinates, execute the diagnostic code. + + Loop over experiments, than datasets. + """ + set_defaults(cfg) + set_timecoords(cfg) + metas = cfg["input_data"] + output = {} + if cfg["reuse"]: + data, data_mmm = load_event_ratios(cfg) + else: + data, data_mmm = calculate_event_ratios(cfg, metas, output) + if not cfg["overview"]["skip"]: + for (reg), _reg_data in data_mmm.groupby("region"): + plot_overview(cfg, data_mmm, group=reg) + if not cfg["fullperiod"]["skip"]: + plot_full_periods(cfg, data_mmm) + # if cfg.get("plot_intervals", False): + # exp_metas = group_metadata(metas.values(), "exp") + # plot_each_interval(cfg, exp_metas) + # TODO: when data is loaded its not written to metadata rn. + ut.save_metadata(cfg, output) + + +if __name__ == "__main__": + with run_diagnostic() as cfg: + main(cfg) diff --git a/esmvaltool/diag_scripts/droughts/event_area_timeseries.yml b/esmvaltool/diag_scripts/droughts/event_area_timeseries.yml new file mode 100644 index 0000000000..41958c515f --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/event_area_timeseries.yml @@ -0,0 +1,71 @@ +# Default configuration for event_area_timeseries diag script +# Each key can be overwritten by the user in the recipes diagnostic block +index: spei +weighted: True +interval: 240 +reuse: False +regions: Null + +ylim: [0, 1] +fig_kwargs: + figsize: [10, 2] +plot_overview: True +overview: + skip: False + fig_kwargs: + figsize: [9, 4] +fullperiod: + skip: False + fig_kwargs: + figsize: [16, 6] +# axes_properties: +# xticklabels: [] +events: + - label: | + extreme wet spell + $SPEI \geq 2$ + color: "#00094e" + min: 2 + max: 100 + - label: | + severe wet spell + $1.5 \leq SPEI < 2$ + color: "#2b97a1" + min: 1.5 + max: 2 + - label: | + moderate wet spell + $1.5 \leq SPEI < 1$ + color: "#9bc193" + min: 1 + max: 1.5 + - label: | + near normal + $-1 \leq SPEI < 1$ + color: "#dadab7" + min: -1 + max: 1 + - label: | + moderate drought + $-1.5 \leq SPEI < -1$ + color: "#daba5f" # "#d98d4e" + min: -1.5 + max: -1 + - label: | + severe drought + $-2 \leq SPEI < -1.5$ + color: "#d96e20" + min: -2 + max: -1.5 + - label: | + extreme drought + $SPEI < -2$ + color: "#c33800" + min: -100 + max: -2 + - label: | + invalid data + $SPEI = nan$ + color: "gray" + min: "nan" + max: "nan" diff --git a/esmvaltool/diag_scripts/droughts/pattern_correlation.py b/esmvaltool/diag_scripts/droughts/pattern_correlation.py new file mode 100644 index 0000000000..bd63ec9c26 --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/pattern_correlation.py @@ -0,0 +1,280 @@ +"""Overview plots for pattern correlations of multiple variables. + +This diagnostic calculates the pattern correlation between pairs of datasets +over several datasets and plots them for all variables and datasets in one +overview plot. In each figure the correlations are grouped by variable and +project. Only projects listed in the recipe are used everything else is added +with individual markers and labels if `extra_datasets` is True. +A single reference dataset needs to be specified in the recipe. + +The diagnostic requires 2d cubes as input. They can be prepared using the +preprocessor or or by any ancestor diagnostic, that provide a metadata.yml. +The additional `group_by` option allows to apply this diagnostic to each entry +of a meta key seperatly including extra facets. For example this can be used to +plot different aggregations of the same variable, like mean and trend in a +single run. + +For example: +.. code-block:: yaml + script: droughtindex/pattern_correlation.py + reference: ERA5 + group_by: diffmap_metric # first, last, diff, total + ancestors: [obs/diffmaps, models/diffmaps] + + +Configuration options in recipe +------------------------------- +reference: str, required + Dataset name used to correlate all other datasets against. +extra_datasets: bool, optional + Add datasets not belonging to any of the ``projects`` as individual markers. + This can be used to add individual reanalysis. By default: True +fig_kwargs: dict, optional + Kwargs passed to the figure creation. By default: {"figsize": (4, 3)} +group_by: str, optional (default: None) + Plot figures for each entry of the given key. +labels: dict, optional + Mapping of variable names to custom xtick labels. Falls back to variable + name if not present. +project_plot_kwargs: dict, optional (default: {}) + Kwargs passed to the patches for specific projects the plot function. +plot_kwargs: dict, optional (default: {}) + Kwargs passed to the plot function. +projects: list, optional (default: ["CMIP6"]) + List of projects to include as individual colors in the plot. +relative_change: bool, optional (default: False) + Creates an additional plot with global relative changes fo each variable + in all datasets. +""" + +# from esmvalcore import preprocessor as pp +import logging +from collections import defaultdict +from pprint import PrettyPrinter + +import iris +import iris.analysis +import iris.analysis.cartography +import matplotlib.pyplot as plt +from iris.analysis import MEAN +from iris.analysis.stats import pearsonr + +from esmvaltool.diag_scripts import shared +from esmvaltool.diag_scripts.droughts import utils as ut + +logger = logging.getLogger(__file__) +p = PrettyPrinter(indent=4) + + +def pattern_correlation(cube1, cube2, centered=False, weighted=False): + """Calculate pattern correlation between two 2d-cubes. + + Uses pearsonr from scipy.stats to calculate the correlation coefficient + along latitude and longitude coordinates. Returns area weighted + coefficient. + weighted applies only to the centering (weighted mean subtraction) and not + to the correlation itself. + """ + weights = None + if weighted: + weights = iris.analysis.cartography.area_weights(cube1) + if centered: + dims = ["latitude", "longitude"] + cube1 -= cube1.collapsed(dims, MEAN, weights=weights) + cube2 -= cube2.collapsed(dims, MEAN, weights=weights) + result = pearsonr(cube1, cube2, corr_coords=["latitude", "longitude"]) + return result.data + + +def ds_markers(extra_labels): + datasets = set() + for labels in extra_labels.values(): + datasets.update(labels) + markers = { + "ERA5": "o", + "ERA5-Land": "s", + "CRU": "x", + } + more_markers = iter("v^<>1sp*hH+xDd|_") + for dataset in datasets: + if dataset not in markers: + markers[dataset] = next(more_markers) + return markers + + +def process(metas, cfg, key=None): + results = defaultdict(dict) + # reference = shared.select_metadata(metas, dataset=cfg["reference"]) + cfg["variables"] = shared.group_metadata(metas, "short_name").keys() + extra_labels = defaultdict(list) + for var, var_metas in shared.group_metadata(metas, "short_name").items(): + logger.info("Processing %s (%s datasets)", var, len(var_metas)) + print(var_metas) + reference = ut.select_single_metadata( + var_metas, dataset=cfg["reference"], short_name=var + ) + ref_cube = iris.load_cube(reference["filename"]) + results[var] = defaultdict(list) + for ds_meta in var_metas: + if ds_meta["dataset"] in ["MMM", cfg["reference"]]: + continue + ds_meta["project"] = ds_meta.get("project", "unknown") + cube = iris.load_cube(ds_meta["filename"]) + ds_result = float(pattern_correlation(ref_cube, cube)) + if ds_meta["project"] in cfg.get("projects", ["CMIP6"]): + results[var][ds_meta["project"]].append(ds_result) + elif cfg.get("extra_datasets", True): + results[var]["extra"].append(ds_result) + extra_labels[var].append(ds_meta["dataset"]) + title = None + if cfg.get("plot_title", False): + title = f"Pattern Correlation with {cfg['reference']} ({key})" + plot( + cfg, + results, + f"pattern_correlation_{key}", + extra_labels=extra_labels, + ylims=(0, 1), + title=title, + ) + + +def process_relative_change(metas, cfg): + results = defaultdict(dict) + cfg["variables"] = shared.group_metadata(metas, "short_name").keys() + extra_labels = defaultdict(list) + for var, var_metas in shared.group_metadata(metas, "short_name").items(): + logger.info("Processing %s (%s datasets)", var, len(var_metas)) + results[var] = defaultdict(list) + for ds_meta in var_metas: + ds_meta["project"] = ds_meta.get("project", "unknown") + # TODO: this need to be fixed in diag_pet.R: + if var == "evspsblpot" and ds_meta["project"] == "unknown": + ds_meta["project"] = "CMIP6" + cube = iris.load_cube(ds_meta["filename"]) + iris.analysis.maths.abs(cube, in_place=True) + rel = cube.collapsed(["latitude", "longitude"], iris.analysis.MEAN) + ds_result = float(rel.data) + if ds_meta["project"] in cfg.get("projects", ["CMIP6"]): + results[var][ds_meta["project"]].append(ds_result) + elif cfg.get("extra_datasets", True): + results[var]["extra"].append(ds_result) + extra_labels[var].append(ds_meta["dataset"]) + title = None + if cfg.get("plot_title", False): + title = "Relative Change over 10 Years" + plot( + cfg, + results, + "relative_change", + extra_labels=extra_labels, + ylims=(0, 10), + title=title, + ) + + +def plot( + cfg, + results, + fname, + extra_labels=None, + title="Pattern Correlation", + ylims=None, +): + if "order" in cfg: + if sorted(cfg["order"]) != sorted(results.keys()): + logger.warning( + "Order does not match result keys: %s, %s", + cfg["order"], + results.keys(), + ) + else: + results = {key: results[key] for key in cfg["order"]} + fig_kwargs = {"figsize": (4, 3)} + fig_kwargs.update(cfg.get("fig_kwargs", {})) + fig, axes = plt.subplots(**fig_kwargs) + plot_kwargs = { + "marker": "_", + "linestyle": "", + "markersize": "5", + "color": "darkred", + } + plot_kwargs.update(cfg.get("plot_kwargs", {})) + var_count = len(results.keys()) + projects = cfg.get("projects", ["CMIP6"]) + + axes.set_title(title) + axes.plot([], **plot_kwargs) + axes.set_xticks(range(var_count)) + labels = results.keys() + if "labels" in cfg: + labels = [cfg["labels"].get(lab, lab) for lab in labels] + axes.set_xticklabels(labels, rotation=90, ha="right") + axes.set_xlim(-0.5, var_count - 0.5) + if ylims is not None: + axes.set_ylim(*ylims) + columns = len(projects) + if cfg.get("extra_datasets", True): + columns += 1 + # single iteration yet + for j, project in enumerate(projects): + # NOTE: dict needs to be complete (each pair of var/proj required) + # TODO: make results a xarray dataset instead? + y_pos = [] + x_pos = [] + shift = -0.2 + 0.4 * j / columns + for i, var in enumerate(results.keys()): + y_pos.extend(results[var][project]) + x_pos.extend([i + shift] * len(results[var][project])) + label = project + axes.plot(x_pos, y_pos, **plot_kwargs, label=label) + # Add extra datasets + if cfg.get("extra_datasets", True): + markers = ds_markers(extra_labels) + scatters = defaultdict(list) + for i, var in enumerate(results.keys()): + # TODO add label here manually? + y_pos = results[var]["extra"] + for j, value in enumerate(y_pos): + scatters[extra_labels[var][j]].append((i + 0.2, value)) + for ds, values in scatters.items(): + label = ds + if label.startswith("CDS"): + label = "CDS-SM" + x_pos, y_pos = zip(*values) + axes.scatter( + x_pos, y_pos, marker=markers[ds], color="gray", label=label + ) + axes.legend() + if cfg.get("plot_properties"): + axes.set(**cfg["plot_properties"]) + # fig.legend(loc="center left", bbox_to_anchor=(1, 0.5)) + # fig.tight_layout() + axes.grid(axis="y") + shared.save_figure(fname, {}, cfg, figure=fig, bbox_inches="tight") + + +def main(cfg): + metas = cfg["input_data"].values() + # print(shared.group_metadata(metas, "short_name")["evspsblpot"]) + if not cfg.get("group_by"): + process(metas, cfg) + if cfg.get("relative_change", False): + process_relative_change(metas, cfg) + else: + grouped = shared.group_metadata(metas, cfg["group_by"]) + groups = cfg.get("groups", list(grouped.keys())) + for key, group in grouped.items(): + if key is None or key not in groups: + continue + if cfg.get("groups", None) is not None: + if key not in cfg["groups"]: + continue + process(group, cfg, key=key) + if key == "percent" and cfg.get("relative_change", False): + process_relative_change(group, cfg) + + +if __name__ == "__main__": + with shared.run_diagnostic() as cfg: + main(cfg) diff --git a/esmvaltool/diag_scripts/droughts/portrait_plot.py b/esmvaltool/diag_scripts/droughts/portrait_plot.py new file mode 100644 index 0000000000..61739acbad --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/portrait_plot.py @@ -0,0 +1,568 @@ +"""Overview plot for performance metrics. + +Description +----------- +This diagnostic provides plot functionalities for performance metrics. +The multi model overview heatmap might be useful for different +tasks and therefore this diagnostic tries to be as flexible as possible. +X and Y axis, grouping parameter and slits for each rectangle can be +configured in the recipe. All *_by parameters can be set to any metadata +key. To split by 'reference' this key needs to be set as extra_facet in recipe. + +NOTE: this is not the original portrait_plot, but a modified version for drought +diagnostics. Check if its possible to use the original portrait plot before +adding this one to the ESMValTool. + +TODO: prov (if we wanna use this in public code) + +Author +------ +Lukas Lindenlaub (Universität Bremen, Germany) +Diego Cammarano + +Configuration parameters through recipe: +---------------------------------------- +normalize: str or None, optional + ('mean', 'median', 'centered_mean', 'centered_median', None). + Subtract median/mean if centered. Divide by median/mean if not None. + By default None. +distance_metric: str or None, optional + A method for the distance_metric preprocessor can be set, to apply it to + the input data along all axis before plotting. If set to None, the input + is expected to contain scalar values for each input file. By default, None. +x_by: str, optional + Metadata key for x coordinate. + By default 'alias'. +y_by: str, optional + Metadata key for y coordinate. + By default 'variable_group'. +group_by: str, optional + Metadata key for grouping. + Grouping is always applied in x direction. Can be set to None to skip + grouping into subplots. By default 'project'. +split_by: str, optional + The rectangles can be split into 2-4 triangles. This is used to show + metrics for different references. For this case there is no need to change + this parameter. Multiple variables can be set in the recipe with `split` + assigned as extra_facet to label the different references. Data without + a split assigned will be plotted as main rectangles, this can be changed + by setting default_split parameter. + By default 'split'. +default_split: str, optional + Data labeled with this string, will be used as main rectangles. All other + splits will be plotted as overlays. This can be used to choose the base + reference, while all references are labeled for the legend. + By default None. +plot_legend: bool, optional + If True, a legend is plotted, when multiple splits are given. + By default True. +legend: dict, optional + Customize, if, how and where the legend is plotted. The 'best' position + and size of the legend depends on multiple parameters of the figure + (i.e. lengths of labels, aspect ratio of the plots...). And might require + manual adjustment of `x`, `y` and `size` to fit the figure layout. + Keys (each optional) that will be handled are: + position: str or None, optional + Position of the legend. Can be 'right' or 'left'. Or set to None to + disable plotting the legend. By default 'right'. + x_offset: float, optional + Manually adjust horizontal position to save space or fix overlap. + Number given in Inches. By default 0. + y_offset: float, optional + Manually adjust vertical position to save space or fix overlap. + Number given in Inches. By default 0. + size: float, optional + Size of the legend in Inches. By default 0.3. +plot_kwargs: dict, optional + Dictionary that gets passed as kwargs to `matplotlib.pyplot.imshow()`. + Colormaps will be converted to 11 discrete steps automatically. Default + colormap is RdYlBu_r but can be changed with cmap. + Other common keywords: vmin, vmax + By default {}. +cbar_kwargs: dict, optional + Dictionary that gets passed to `matplotlib.pyplot.colorbar()`. + E.g. label, ticks... + By default {}. +axes_properties: dict, optional + Dictionary that gets passed to `matplotlib.axes.Axes.set()`. + Subplots can be widely customized. For a full list of + properties see: + https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.set.html + E.g. xlabel, ylabel, yticklabels, xmargin... + By default {}. +nan_color: str or None, optional + Matplotlib named color or hexcode for NaN values. If set to None (null in + yaml), no triangles are plotted for NaN values. + By default 'white'. +figsize: list(float), optional + [width, height] of the figure in inches. The final figure will be saved with + bbox_inches="tight", which can change the resulting aspect ratio. + By default [5, 3]. +dpi: int, optional + Dots per inch for the figure. By default 300. +regions: list(str), optional + If list of acronyms is provided, data is limited to this + regions (rather than global). +""" + +import itertools +import logging + +import iris +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from esmvalcore import preprocessor as pp +from matplotlib import patches +from mpl_toolkits.axes_grid1 import ImageGrid + +from esmvaltool.diag_scripts.shared import ( + get_diagnostic_filename, + get_plot_filename, + group_metadata, + run_diagnostic, + select_metadata, +) + +log = logging.getLogger(__name__) + + +def unify_limits(grid): + """Set same limits for all subplots.""" + vmin, vmax = np.inf, -np.inf + images = [ax.get_images()[0] for ax in grid] + for img in images: + vmin = min(vmin, img.get_clim()[0]) + vmax = max(vmax, img.get_clim()[1]) + for img in images: + img.set_clim(vmin, vmax) + + +def plot_matrix(data, row_labels, col_labels, axe, plot_kwargs): + """Create an image for given data.""" + img = axe.imshow(data, **plot_kwargs) + # Show all ticks and label them with the respective list entries. + axe.set_xticks(np.arange(data.shape[1]), labels=col_labels) + axe.set_yticks(np.arange(data.shape[0]), labels=row_labels) + # Rotate the tick labels and set their alignment. + plt.setp( + axe.get_xticklabels(), + rotation=90, + ha="right", + va="center", + rotation_mode="anchor", + ) + # Turn spines off and create white grid. + # ax.spines[:].set_visible(False) + axe.set_xticks(np.arange(data.shape[1] + 1) - 0.5, minor=True) + axe.set_yticks(np.arange(data.shape[0] + 1) - 0.5, minor=True) + axe.grid(which="minor", color="black", linestyle="-", linewidth=1) + axe.tick_params(which="both", bottom=False, left=False) + return img + + +def remove_reference(metas): + """Remove reference for metric from list of metadata. + + NOTE: list() creates a copy with same references to allow removing in place + """ + for meta in list(metas): + if meta.get("reference_for_metric", False): + metas.remove(meta) + + +def add_split_none(metas): + """List of metadata with split=None if no split is given.""" + for meta in metas: + if "split" not in meta: + meta["split"] = None + + +def open_file(cfg, metadata, **selection): + """Try to find a single file for selection and return data. + + If multiple files are found, raise an error. If no file is found, + return np.nan. + """ + metas = select_metadata(metadata, **selection) + if len(metas) > 1: + raise ValueError(f"Multiple files found for {selection}") + if len(metas) < 1: + log.debug("No Metadata found for %s", selection) + return np.nan + log.debug("Metadata found for %s", selection) + if cfg.get("regions", False): + print("Using iris/esmvalcore to extract regions") + cube = iris.load_cube(metas[0]["filename"]) + cube = pp.extract_shape( + cube, shapefile="ar6", ids={"Acronym": cfg["regions"]} + ) + das = xr.DataArray.from_iris(cube).to_dataset() + else: + das = xr.open_dataset(metas[0]["filename"]) + varname = list(das.data_vars.keys())[0] + return das[varname].values.item() + # iris.load_cube(metas[0]["filename"]).data + + +def load_data(cfg, metas): + """Load all nc files from metadata into xarray dataset. + + The dataset contains all relevant information for the plot. Coord + names are metadata keys, ordered as x, y, group, split. The default + reference is None, or if all references are named the first from the + list. + """ + coords = { # order matters: x, y, group, split + cfg["x_by"]: list(group_metadata(metas, cfg["x_by"]).keys()), + cfg["y_by"]: list(group_metadata(metas, cfg["y_by"]).keys()), + cfg["group_by"]: list(group_metadata(metas, cfg["group_by"]).keys()), + cfg["split_by"]: [ + str(k) for k in group_metadata(metas, cfg["split_by"]).keys() + ], + } + print(coords) + shape = [len(coord) for coord in coords.values()] + var_data = xr.DataArray(np.full(shape, np.nan), dims=list(coords.keys())) + data = xr.Dataset({"var": var_data}, coords=coords) + # loop over each cell (coord combination) and load data if existing + for coord_tuple in itertools.product(*coords.values()): + selection = dict(zip(coords.keys(), coord_tuple)) + print(selection) + data["var"].loc[selection] = open_file(cfg, metas, **selection) + # data[coord_tuple] = (list(coords.keys(), value)) + if None in data.coords[cfg["split_by"]].values: + cfg.update({"default_split": None}) + else: + cfg.update({"default_split": data.coords[cfg["split_by"]].values[0]}) + log.debug("using %s as default split", cfg["default_split"]) + log.debug("Loaded Data:") + log.debug(data) + return data + + +def split_legend(cfg, grid, data): + """Create legend for references, based on split coordinate in the dataset. + + Mpl handles axes positions in relative figure coordinates. To anchor the + legend to the origin of the first graph (bottom left) with fixed size, + without messing up the layout for changing figure sizes a few extra steps + are required. + NOTE: maybe `mpl_toolkits.axes_grid1.axes_divider.AxesDivider` simplifies + this a bit by using `append_axes`. + """ + grid[0].get_figure().canvas.draw() # set axes position in figure + size = cfg["legend"].get("size", 0.5) # rect width in physical size (inch) + fig_size = grid[0].get_figure().get_size_inches() # physical figure size + ax_size = (size / fig_size[0], size / fig_size[1]) # legend (fig coords) + gaps = [0.3 / fig_size[0], 0.3 / fig_size[1]] # margins (fig coords) + # anchor legend on origin of first plot or colorbar + anchor = grid[0].get_position().bounds # relative figure coordinates + if cfg["legend"].get("position", "right") == "right": + cbar_x = grid.cbar_axes[0].get_position().bounds[0] + gaps[0] *= 0.8 # compensate colorbar padding + anchor = ( + cbar_x + gaps[0] + cfg["legend"]["x_offset"], + anchor[1] - gaps[1] - ax_size[1] + cfg["legend"]["y_offset"], + ) + else: + anchor = ( + anchor[0] - gaps[0] - ax_size[0] + cfg["legend"]["x_offset"], + anchor[1] - gaps[1] - ax_size[1] + cfg["legend"]["y_offset"], + ) + # create legend as empty imshow like axes in figure coordinates + axes = {"main": grid[0].get_figure().add_axes([*anchor, *ax_size])} + axes["main"].imshow(np.zeros((1, 1))) # same axes properties as main plot + axes["main"].set_xticks([]) + axes["main"].set_yticks([]) + axes["twiny"], axes["twinx"] = [axes["main"].twiny(), axes["main"].twinx()] + axes["twinx"].set_yticks([]) + axes["twiny"].set_xticks([]) + label_at = [ # order matches get_triangle_nodes (halves and quarters) + axes["main"].set_ylabel, # left + axes["twinx"].set_ylabel, # right + axes["main"].set_xlabel, # bottom + axes["twiny"].set_xlabel, # top + ] + for i, label in enumerate(data.coords[cfg["split_by"]].values): + axes["main"].add_patch( + patches.Polygon( + get_triangle_nodes( + i, len(data.coords[cfg["split_by"]].values) + ), + closed=True, + facecolor=["#bbb", "#ccc", "#ddd", "#eee"][i], + edgecolor="black", + linewidth=0.5, + fill=True, + ) + ) + label_at[i](label) + + +def overlay_reference(cfg, axe, data, triangle): + """Create triangular overlays for given data and axes.""" + # use same colors as in main plot + cmap = axe.get_images()[0].get_cmap() + norm = axe.get_images()[0].norm + if cfg["nan_color"] is not None: + cmap.set_bad(cfg["nan_color"]) + for i, j in itertools.product(*map(range, data.shape)): + if np.isnan(data[i, j]) and cfg["nan_color"] is None: + continue + color = cmap(norm(data[i, j])) + edges = [(e[0] + j, e[1] + i) for e in triangle] + patch = patches.Polygon( + edges, + closed=True, + facecolor=color, + edgecolor="black", + linewidth=0.5, + fill=True, + ) + axe.add_patch(patch) + + +def plot_group(cfg, axe, data, title=None): + """Create matrix for one subplot in ax using plt.imshow. + + by default split None is used, if all splits are named the first is + used. Other splits will be added by overlaying triangles. + """ + split = data.sel({cfg["split_by"]: cfg["default_split"]}) + print(f"Plotting group {title}") + print(split) + plot_matrix( + split.values.T, # 2d numpy array + split.coords[cfg["y_by"]].values, # y_labels + split.coords[cfg["x_by"]].values, # x_labels + axe, + cfg["plot_kwargs"], + ) + if title is not None: + axe.set_title(title) + axe.set(**cfg["axes_properties"]) + + +def get_triangle_nodes(position, total_count=2): + """Return list of nodes with relative x, y coordinates. + + The nodes of the triangle are given as list of three tuples. Each tuples + contains relative coordinates (-0.5 to +0.5). For total of <= 2 a top left + (position=0) and bottom right (position=1) rectangle is returned. + For higher counts (3 or 4) one quartile is returned for each position. + NOTE: Order matters. Ensure axis labels for the legend match when changing. + """ + if total_count < 3: + halves = [ + [(0.5, -0.5), (-0.5, -0.5), (-0.5, 0.5)], # top left + [(0.5, -0.5), (0.5, 0.5), (-0.5, 0.5)], # bottom right + ] + return halves[position] + quarters = [ + [(-0.5, -0.5), (0, 0), (-0.5, 0.5)], # left + [(0.5, -0.5), (0, 0), (0.5, 0.5)], # right + [(-0.5, 0.5), (0, 0), (0.5, 0.5)], # bottom + [(-0.5, -0.5), (0, 0), (0.5, -0.5)], # top + ] + return quarters[position] + + +def plot_overlays(cfg, grid, data): + """Call overlay_reference for each split in data and each group in grid.""" + split_count = data.shape[3] + group_count = data.shape[2] + for i in range(group_count): + if split_count < 2: + log.debug("No additional splits for overlay.") + break + if split_count > 4: + log.warning("Too many splits for overlay, only 3 will be plotted.") + group_data = data.isel({cfg["group_by"]: i}) + group_data = group_data.dropna(cfg["x_by"], how="all") + for sss in range(split_count): + split = group_data.isel({cfg["split_by"]: sss}) + split_label = split.coords[cfg["split_by"]].values.item() + if split_label == cfg["default_split"]: + log.debug("Skipping default split for overlay.") + continue + nodes = get_triangle_nodes(sss, split_count) + overlay_reference(cfg, grid[i], split.values.T, nodes) + + +def plot(cfg, data): + """Create figure with subplots for each group. + + sets same color range and overlays additional references based on + the content of data (xr.DataArray) + """ + fig = plt.figure(1, cfg.get("figsize", (5.5, 3.5))) + group_count = len(data.coords[cfg["group_by"]]) + grid = ImageGrid( + fig, + 111, # similar to subplot(111) + cbar_mode="single", + cbar_location="right", + cbar_pad=0.1, + cbar_size=0.2, + nrows_ncols=(1, group_count), + axes_pad=0.1, + ) + # remap colorbar to 10 discrete steps + cmap = mpl.cm.get_cmap(cfg.get("cmap", "RdYlBu_r"), 10) + cfg["plot_kwargs"]["cmap"] = cmap + for i in range(group_count): + log.debug("Plotting group %s", i) + print("Plotting group %s", i) + group = data.isel({cfg["group_by"]: i}) + print(group) + group = group.dropna(cfg["x_by"], how="all") + print("dropped") + print(group) + title = None + if group_count > 1: + title = group.coords[cfg["group_by"]].values.item() + plot_group(cfg, grid[i], group, title=title) + # use same colorrange and colorbar for all subplots: + unify_limits(grid) + # set cb of first image as single cb for the figure + grid.cbar_axes[0].colorbar(grid[0].get_images()[0], **cfg["cbar_kwargs"]) + if data.shape[3] > 1: + plot_overlays(cfg, grid, data) + if cfg["plot_legend"] and data.shape[3] > 1: + split_legend(cfg, grid, data) + basename = "portrait_plot" + fname = get_plot_filename(basename, cfg) + plt.savefig(fname, bbox_inches="tight", dpi=cfg["dpi"]) + log.info("Figure saved:") + log.info(fname) + + +def normalize(array, method, dims): + """Divide and shift values along dims depending on method.""" + log.debug("Normalizing data with method %s", method) + shift = 0 + norm = 1 + if "mean" in method: + norm = array.mean(dim=dims) + elif "median" in method: + norm = array.median(dim=dims) + if "centered" in method: + shift = norm + normalized = (array - shift) / norm + return normalized + + +def sort_data(cfg, dataset): + """Sort the dataset along by custom or alphabetical order.""" + # custom order: dsimport xarray as xr + # import pandas as pd + # order = ['value3', 'value1', 'value2'] # replace by custom order + # ds[cfg['y_by']] = pd.Categorical(ds[cfg['y_by']], categories=order, + # ordered=True) + # ds = ds.sortby('y_by') + # sort alphabetically (caseinsensitive) + dataset = dataset.sortby( + [ + dataset[cfg["x_by"]].str.lower(), + dataset[cfg["y_by"]].str.lower(), + dataset[cfg["group_by"]].str.lower(), + dataset[cfg["split_by"]].str.lower(), + ] + ) + # apply custom orders if given: + if cfg.get("y_order"): + # order = cfg["y_order"] + # dataset[cfg["y_by"]] = pd.Categorical( + # dataset[cfg["y_by"]], categories=order, ordered=True + # ) + dataset = dataset.reindex({cfg["y_by"]: cfg["y_order"]}) + # new_order = [dataset[cfg["x_by"]].values + # dataset = dataset.reindex() + return dataset + + +def apply_distance_metric(cfg, metas): + """Optionally apply preproc method. reference_for_metric facet required.""" + if not cfg["distance_metric"]: + return + new_metas = [] + for y_metas in group_metadata(metas, cfg["y_by"]).values(): + references = select_metadata(y_metas, reference_for_metric=True) + for ref_meta in references: + ref_cube = iris.load_cube(ref_meta["filename"]) + for meta in y_metas: # usually variable + if meta.get("reference_for_metric", False): + continue # skip distance to itself or other references + cube = iris.load_cube(meta["filename"]) + # TODO: fix units should be done in preproc or the diagnostics + if cube.units != ref_cube.units: + pp.convert_units(cube, ref_cube.units) + distance = pp.distance_metric( + [cube], reference=ref_cube, metric=cfg["distance_metric"] + ) + # basename = f"{Path(meta['filename']).stem}" + split = ref_meta.get(cfg["split_by"], cfg["default_split"]) + basename = "" + basename += f"_{meta[cfg['group_by']]}" + basename += f"_{split}" + basename += f"_{meta[cfg['y_by']]}" + basename += f"_{meta[cfg['x_by']]}_{cfg['distance_metric']}" + fname = get_diagnostic_filename(basename, cfg) + iris.save(distance, fname) + log.info("Distance metric saved: %s", fname) + # TODO: adjust all relevant meta data + new_meta = meta.copy() # duplicate for multiple references + new_meta[cfg["split_by"]] = split + new_meta["filename"] = fname + new_metas.append(new_meta) + return new_metas + + +def set_defaults(cfg): + """Set default values for most important config parameters.""" + cfg.setdefault("distance_metric", None) + cfg.setdefault("normalize", "centered_median") + cfg.setdefault("x_by", "dataset") + cfg.setdefault("y_by", "short_name") + cfg.setdefault("group_by", "project") + cfg.setdefault("split_by", "split") # extra facet + cfg.setdefault("default_split", None) + cfg.setdefault("cbar_kwargs", {}) + cfg.setdefault("axes_properties", {}) + cfg.setdefault("nan_color", "white") + cfg.setdefault("figsize", (7.5, 3.5)) + cfg.setdefault("dpi", 300) + cfg.setdefault("plot_legend", True) + cfg.setdefault("plot_kwargs", {}) + cfg["plot_kwargs"].setdefault("cmap", "RdYlBu_r") + cfg["plot_kwargs"].setdefault("vmin", -0.5) + cfg["plot_kwargs"].setdefault("vmax", 0.5) + cfg.setdefault("legend", {}) + cfg["legend"].setdefault("x_offset", 0) + cfg["legend"].setdefault("y_offset", 0) + cfg["legend"].setdefault("size", 0.3) + + +def main(cfg): + """Run the diagnostic.""" + print("RUN MAIN SCRIPT") + metas = list(cfg["input_data"].values()) + set_defaults(cfg) + print(cfg["distance_metric"]) + metas = apply_distance_metric(cfg, metas) + remove_reference(metas) + add_split_none(metas) + dataset = load_data(cfg, metas) + dataset = sort_data(cfg, dataset) + if cfg["normalize"] is not None: + dataset["var"] = normalize( + dataset["var"], cfg["normalize"], [cfg["x_by"], cfg["group_by"]] + ) + plot(cfg, dataset["var"]) + + +if __name__ == "__main__": + with run_diagnostic() as config: + main(config) diff --git a/esmvaltool/diag_scripts/droughts/regional_hexagons.py b/esmvaltool/diag_scripts/droughts/regional_hexagons.py new file mode 100644 index 0000000000..49c58f122c --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/regional_hexagons.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python +"""Hexagonal overview plot for IPCC AR6 WG1 reference regions. + +Configuration options in recipe +------------------------------- +group_by: str, optional (default: "dataset") + The `group_by` config parameter can be used to create individual figures + for input data which differs in values of the given key. +split_by: str, optional (default: "exp") + Metadata key to split the data into different tiles of the hexagon. + This is ignored for `split_by_statistic: True`. + Only keys with 6 or less different values are supported + (1,2,3,6 can be distributed symmetrically). +split_by_statistic: bool, optional (default: False) + Split the hexagons into different tiles for each statistic, + rather than different metadata. +statistics: list, optional (default: ['mean']) + Any of the operators valid for `esmvalcore.preprocessor.area_statistics` + can be used: 'mean', 'median', 'min', 'max', 'std_dev', 'sum', 'variance' + or 'rms'. The regions are collapsed using the given operator. + If not `split_by_statistic: True`, a figure is created for each operator. +plot_mmm: bool, optional (default: True) + wether to plot multi-model mean + TODO: support this and plot_models +cmap: string, optional (default: 'YlOrRd') + colormap to use +vmin: float, optional (default: None) + minimum value for colormap +vmax: float, optional (default: None) + maximum value for colormap +cbar: bool, optional (default: True) + wether to plot colorbar +labels: dict, optional + dictionary with labels for each split. Dict keys must match the values + corresponding to the split_by key. Set it to False to disable legend. + Default: generated from statistics or split. + TODO: implement dicts (rn only list works) +strip_plot: bool, optional (default: False) + wether to plot the colorbar to a seperate file +cb_label: string, optional (default: 'Decadal change of SPEI') + colorbar label +select_metadata: dict, optional + limit the metadata to use for the plot. Keys must match metadata keys. + Default: {'short_name': 'spei', 'diffmap_metric': 'diff'} +shapefile: string, optional (default: None) + For IPCC WGI reference regions use + `ar6_regions/IPCC-WGI-reference-regions-v4.shp`. + TODO: move to preprocessor +exclude_regions: list, optional (default: []) + regions that are excluded from the plot. + TODO: move to preprocessor as well? +filename: string, optional (default: {group}_{split}_{operator}.png) + Filename template for plot files. `group`, `split` and `operator` + and all metadata keys can be used as placeholders. + TODO: Replace suffix by filename (Not implemented yet). +show_values: bool, optional (default: False) + Add corresponding value in a new line to each regions label. For multiple + splits only the value of the first split is shown. +""" + +import logging + +import iris +import matplotlib.pyplot as plt +import numpy as np +from matplotlib import cm as mplcm +from matplotlib import colors as mplcolors +from matplotlib.patches import Polygon + +import esmvaltool.diag_scripts.droughtindex.utils as ut +import esmvaltool.diag_scripts.shared as e +from esmvaltool.diag_scripts.shared import ( + # ProvenanceLogger, + group_metadata, + select_metadata, +) + +logger = logging.getLogger(__file__) + + +def plot_colorbar( + cfg: dict, + plotfile: str, + plot_kwargs: dict, + orientation="vertical", + mappable=None, +) -> None: + """Creates a colorbar in its own figure. Usefull for multi panel plots.""" + fig, ax = plt.subplots(figsize=(1, 4), layout="constrained") + if mappable is None: + cmap = plot_kwargs.get("cmap", "RdYlBu") + norm = mplcolors.Normalize( + vmin=plot_kwargs.get("vmin"), + vmax=plot_kwargs.get("vmax"), + ) + mappable = mplcm.ScalarMappable(norm=norm, cmap=cmap) + fig.colorbar( + mappable, + cax=ax, + orientation=orientation, + label=plot_kwargs["cbar_label"], + ) + plotfile = plotfile.removesuffix(".png") + fig.savefig(plotfile + "_cb.png", bbox_inches="tight") + + +def hexmap( + cfg, + regions, + values, + labels=None, + suffix="", + r=0.8, + bg=False, + filename=None, + draw_nans=True, +): + """Plot hexagons for IPPC WG1 reference regions for data pairs + + Parameters + ---------- + cfg + config with indexname and optional plot parameters: + cmap: string, vmin: float, vmax: float, cbar: bool, + regions + list of IDs (strings) for the reference regions + values + list (each split) of list (each region) of values (floats) + labels, optional + names of the splits to create legend. Same length as values + texts, optional + array (same length as regions) of strings which are added to each cell, + by default None + suffix, optional + string to include in filename. TODO: replace by filename format str. + r, optional + scaling parameter of a hexagon, by default 1 + bg, optional + draw white background instead of transparent, by default False + draw_nans, optional + draw gray polygons for nan values, by default True. + filename, optional + filename to save the plot. If not provided the filename is generated + automatically using shared.get_plot_filename(). By default None + TODO: format with metadata. + + Raises + ------ + ValueError + For missmatching input array lengths + """ + if not len(regions) == len(values[0]): + raise ValueError("regions and values must have the same length") + if labels and not len(labels) == len(values): + raise ValueError("values and labels must have the same length") + values = np.array(values) # np array makes it easier to deal with inf/nan + figsize = (12, 6) if cfg["cbar"] and not cfg["strip_plot"] else (10, 6) + fig, axx = plt.subplots(figsize=figsize, dpi=300, frameon=False) + axx.tick_params( + bottom=False, + left=False, + labelbottom=False, + labelleft=False, + ) + axx.set_xlim(-0.5, 19.5) + axx.set_ylim(0, 12) + cmap = plt.get_cmap(cfg.get("cmap", "YlOrRd")) + cmap.set_bad(cfg.get("cmap_nan", "lightgray"), 1.0) + cmap.set_over(cfg.get("cmap_inf", "dimgray"), 1.0) + if not bg: + plt.axis("off") + # calculate hexagon positions based on figure and scale + rx = np.sqrt(3) / 2 * r # 0.8660254037844386 + corners = np.array( + [ + [0, r], + [rx, r / 2], + [rx, -r / 2], + [0, -r], + [-rx, -r / 2], + [-rx, r / 2], + ], + ) + cells = ut.get_hex_positions() # dict of coordinates for hexagons + cells = { + a: [c[0] * rx, (8 - c[1]) * (3 / 2 * r)] for a, c in cells.items() + } + vmin = cfg.get("vmin", values[np.isfinite(values)].min()) + vmax = cfg.get("vmax", values[np.isfinite(values)].max()) + norm = mplcolors.Normalize(vmin=vmin, vmax=vmax) + print(vmin) + print(vmax) + if "levels" in cfg: + lvls = cfg["levels"] + # cmap_colors = [cmap(i/(vmax-vmin)) for i in lvls] + cmap_colors = [cmap(norm(lvl)) for lvl in lvls] + cmap = mplcolors.ListedColormap(cmap_colors) + norm = mplcolors.BoundaryNorm(lvls, cmap.N) + cmap.set_bad(cfg.get("cmap_nan", "lightgray"), 1.0) + cmap.set_over(cfg.get("cmap_inf", "lightgray"), 1.0) + abbrs = ut.get_region_abbrs() + # draw hexagon for each region + for iii, name in enumerate(regions): + if name not in abbrs: + logger.warning("No hex cells for %s. Skipping.", name) + continue + abbr = abbrs[name] + exclude = cfg.get("exclude_regions", []) + if abbr in exclude or name in exclude: + logger.info("Region %s excluded. Skipping hexagon.", abbr) + continue + if abbr not in cells: + logger.warning("Region %s not found. Skipping hexagon.", abbr) + continue + if len(cfg["regions"]) > 1 and abbr not in cfg["regions"]: + logger.info("Region %s not in regions. Skipping hexagon.", abbr) + # continue + values[0][iii] = np.nan + c = cells[abbr] # center + hex_corners = np.array([c, c, c, c, c, c]) + corners + hc = list(hex_corners) + hc.append(hc[0]) # last corner = first corner for closed polies + # create polygon vertices depending on the number of values per hex + if len(values) > 3: # 6 pieces + verts = [(hc[i], hc[i + 1], c) for i in range(6)] + elif len(values) > 2: + verts = [ + (hc[2 * i], hc[2 * i + 1], hc[2 * i + 2], c) for i in range(3) + ] + elif len(values) > 1: + verts = np.array([hc[0:4], hc[3:]]) + else: + verts = [hc] + color = "black" + for vvv in range(len(values[:])): + val = values[vvv][iii] + color = cmap(norm(val)) + if not draw_nans and np.isnan(val): + continue + poly = Polygon(verts[vvv], ec="white", fc=color, lw=1) + axx.add_artist(poly) + base = Polygon(hex_corners, ec="white", fc=None, lw=2, fill=False) + axx.add_artist(base) + tval = values[0][iii] # first split for text label + text = abbr + if cfg["show_values"] and not np.isnan(tval): + text = f"{abbr}\n{tval:.2f}" + text_style = {"ha": "center", "va": "center", "fontsize": 10} + axx.text(c[0], c[1], text, **text_style, color=ut.font_color(color)) + + if filename is None: + filename = ut.get_plot_filename(cfg, "hexmap_regions" + suffix) + + mappable = mplcm.ScalarMappable(norm=norm, cmap=cmap) + if cfg.get("cbar", True) and not cfg.get("strip_plot", False): + plt.colorbar(mappable, ax=axx) + if cfg.get("strip_plot", False): + cb_kwargs = {"cbar_label": cfg.get("cb_label", "")} + plot_colorbar(cfg, filename, cb_kwargs, mappable=mappable) + if labels: + pos = (1, 2) + c = pos + anchors = [ # corner, ha, va + [(rx, rx), "bottom", "left"], + [(1.5 * rx, 0), "center", "left"], + [(rx, -rx), "top", "left"], + [(-rx, -rx), "top", "right"], + [(-1.5 * rx, 0), "center", "right"], + [(-rx, rx), "bottom", "right"], + ] + # TODO: repeated below.. make function + hex_corners = np.array([c, c, c, c, c, c]) + corners + hc = list(hex_corners) + hc.append(hc[0]) # last corner = first corner for closed polies + # create polygon vertices depending on the number of values per hex + if len(values) > 3: # 6 pieces + verts = [(hc[i], hc[i + 1], c) for i in range(6)] + elif len(values) > 2: + verts = [ + (hc[2 * i], hc[2 * i + 1], hc[2 * i + 2], c) for i in range(3) + ] + elif len(values) > 1: + verts = np.array([hc[0:4], hc[3:]]) + else: + verts = [hc] + for vvv in range(len(values[:])): + color = ["#fff", "#aaa", "#777", "#555", "#333", "#000"][vvv] + poly = Polygon(verts[vvv], ec="white", fc=color, lw=1) + axx.add_artist(poly) + anc = anchors[vvv] + lpos = (anc[0][0] + pos[0], anc[0][1] + pos[1]) + lab = labels[vvv] + axx.text(lpos[0], lpos[1], lab, fontsize=10, va=anc[1], ha=anc[2]) + fig.savefig(filename, bbox_inches="tight") + + +def ensure_single_meta(meta, txt): + """Raise error if there is not exactly one entry in meta list.""" + if len(meta) == 0: + raise ValueError(f"No files for {txt}.") + if len(meta) > 1: + raise ValueError(f"Too many files for {txt}.") + return meta[0] + + +def load_and_plot_splits(cfg, splits, group, operator): + """Create hexmap with tiles belonging to different meta data / files.""" + if len(splits) > 6: + raise ValueError("Too many inputs for {group}. Max: 6.") + labels = cfg.get("labels", list(splits.keys())) + values, regions, meta = [], [], {} + # extract regional average from each cube + for split, meta in splits.items(): + meta = ensure_single_meta(meta, f"{group}/{split}") + cube = iris.load_cube(meta["filename"]) + collapsed = ut.regional_stats(cfg, cube, operator) + values.append(collapsed.data) + regions = collapsed.coord("shape_id").points + basename = cfg.get("basename", f"hexmap_regions_{group}") + fmeta = meta.copy() + fmeta["group"] = group + filename = ut.get_plot_filename(cfg, basename, meta) + hexmap(cfg, regions, values, labels=labels, filename=filename) + + +def load_and_plot_stats(cfg, metas, group, split, statistics): + """Create hexmap with tiles for different operators for the same data.""" + meta = ensure_single_meta(metas, f"{group}") + cube = iris.load_cube(meta["filename"]) + values, regions = [], [] + for operator in statistics: + collapsed = ut.regional_stats(cfg, cube, operator) + values.append(collapsed.data) + regions = collapsed.coord("shape_id").points + hexmap( + cfg, + regions, + values, + labels=statistics, + suffix=f"_{group}_{split}_statistics", + ) + + +def set_defaults(cfg): + """Ensure all config parameters are set.""" + cfg.setdefault("group_by", "dataset") + cfg.setdefault("statistics", ["mean"]) + cfg.setdefault("cmap", "YlOrRd") + cfg.setdefault("vmin", None) + cfg.setdefault("vmax", None) + cfg.setdefault("cbar", True) + cfg.setdefault("labels", None) + cfg.setdefault("show_values", False) + cfg.setdefault("cb_label", "Decadal change of SPEI") + cfg.setdefault("strip_plot", False) + cfg.setdefault("split_by", "exp") + cfg.setdefault("split_by_statistic", False) + cfg.setdefault("plot_mmm", True) + cfg.setdefault("exclude_regions", []) + cfg.setdefault("regions", []) + cfg.setdefault("filename", "{group}_{split}_{operator}.png") + cfg.setdefault( + "select_metadata", + {"short_name": "spei", "diffmap_metric": "diff"}, + ) + + +def main(cfg): + set_defaults(cfg) + # select metadata + metas = cfg["input_data"].values() + metas = select_metadata(metas, **cfg.get("select_metadata", {})) + groups = group_metadata(metas, cfg["group_by"]) + statistics = cfg["statistics"] + for group, metas in groups.items(): + # plot splits (segments in hex) for each group (figures) + splits = group_metadata(metas, cfg["split_by"]) + if cfg.get("split_by_statistic"): + for split, split_metas in splits.items(): + load_and_plot_stats(cfg, split_metas, group, split, statistics) + else: + for operator in statistics: + load_and_plot_splits(cfg, splits, group, operator) + + +if __name__ == "__main__": + with e.run_diagnostic() as config: + main(config) diff --git a/esmvaltool/diag_scripts/droughts/seasonal_cycle.py b/esmvaltool/diag_scripts/droughts/seasonal_cycle.py new file mode 100644 index 0000000000..7a18f8b049 --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/seasonal_cycle.py @@ -0,0 +1,330 @@ +"""Diagnostic script to plot multi panel seasonal cycles for different regions. + +NOTE: Its just a script yet, but could be easily adaptable to a diagnostic. + +regions: List(List(str)): + Nested list of regions. One panel for each first level entry. + Each element can be a list of acronyms to combine (mean) multiple regions. +legend_r: bool + If True, legend is placed right of the panels. Otherwise below. +""" + +from pathlib import Path + +import iris +import matplotlib +import yaml +from esmvalcore import preprocessor as pp +from matplotlib import pyplot as plt + +SESSION_HIST = Path( + "/work/bd0854/b309169/output/recipe_lindenlaub25_historical_20250320_162808" +) +REGIONS = [ + ["WAF"], + ["NES"], + ["EAS"], + ["SAS"], + ["WCE"], + ["CNA"], + ["EEU"], + ["SES"], + ["WSB"], + ["MED"], + ["SAU"], +] +COMBINED = [ + "WCE", + "CNA", + "SAS", + "EAS", + "EEU", + "WAF", + "SES", + "WSB", + "MED", + "NES", + "SAU", +] +REGIONS.append(COMBINED) +VAR = "tasmin" +# VAR = "pr" +TASMA = True # calculate tasmin+max/2 instead of tasmin +# tas plot +TSLICE = None +FILL = False +# TSLICE = [0, 120] # 0 +# TSLICE = [300, 420] # 0 + + +LEGEND_R = True # default bottom + +FNAME = f"~/seasonal_cycle_{VAR}_w.png" +if TSLICE is not None: + FNAME = FNAME.replace(".png", f"_{TSLICE[0]}-{TSLICE[1]}.png") +SHARE_Y = False +YLIMS = None # limit per row or None for auto + +cfg = { + "regions": REGIONS, + "var": VAR, + "plot_stdv": FILL, + "interval": TSLICE, + "tas_from_min_max": TASMA, + "cmip6_styles": yaml.safe_load(Path("cmip6.yml").read_text()), + "share_y": SHARE_Y, + "ylims": YLIMS, +} + +if cfg["var"] == "pr": + cfg["ylims"] = [(0, 12), (0, 6), (0, 6)] # limit per row or None for auto + cfg["share_y"] = True +elif cfg["var"] == "tasmin": + cfg["share_y"] = True + cfg["ylims"] = [ + (260, 305), + (260, 305), + (260, 305), + ] # limit per row or None for auto + +hist_metas = yaml.safe_load( + (SESSION_HIST / f"preproc/pet_historical/{VAR}/metadata.yml").read_text() +) +obs_metas = yaml.safe_load( + (SESSION_HIST / f"preproc/pet_obs/{VAR}/metadata.yml").read_text() +) +extra_metas = yaml.safe_load( + (SESSION_HIST / f"preproc/validate_obs/{VAR}/metadata.yml").read_text() +) +obs_metas.update(extra_metas) + +metas = obs_metas.copy() +metas.update(hist_metas) + + +styles = yaml.safe_load(Path("cmip6.yml").read_text()) + +# REGIONS = [["MED"], ["WCE"], []] +# hist_metas = yaml.safe_load((SESSION_HIST / "preproc/pet_historical/pr/metadata.yml").read_text()) +# obs_metas = yaml.safe_load((SESSION_HIST / "preproc/pet_obs/pr/metadata.yml").read_text()) + +# pr plot + + +def create_figure_layout(cfg): + fig = plt.figure() + axs = [] + nreg = len(cfg["regions"]) + if nreg > 4: + ncols = 4 + nrows = (nreg // ncols) + (1 if nreg % ncols > 0 else 0) + else: + ncols = nreg + nrows = 1 + if cfg.get("legend_r", False): + fig.set_size_inches(2.5 * (ncols + 1.5), 2.5 * nrows) + gs = matplotlib.gridspec.GridSpec( + nrows, + ncols + 1, + width_ratios=[3] * ncols + [1.9], + wspace=0.07, + hspace=0.07, + ) + leg_ax = fig.add_subplot(gs[:, -1]) + else: + fig.set_size_inches(2.5 * ncols, 2.5 * nrows + 1) + gs = matplotlib.gridspec.GridSpec( + nrows + 1, + ncols, + height_ratios=[3] * nrows + [1.5], + wspace=0.07, + hspace=0.07, + ) + leg_ax = fig.add_subplot(gs[-1, 0:]) + col_ax = None + for i in range(nrows): + for j in range(ncols): + if i * ncols + j >= nreg: + break + ax = fig.add_subplot(gs[i, j]) + if j == 0: + # TODO: use metadata instead + if cfg["var"] == "pr": + ax.set_ylabel("Precipitation (mm/day)") + elif cfg["var"] in ["tasmin", "tasmax", "tas"]: + ax.set_ylabel("Temperature (K)") + else: + ax.set_ylabel(cfg["var"]) + col_ax = ax + if cfg["ylims"] is not None: + ax.ylim = cfg["ylims"][i] + elif cfg["share_y"]: + ax.sharey(col_ax) + if cfg["share_y"] and j == 0: + ax.tick_params(axis="y", labelleft=True, labelright=False) + elif cfg["share_y"] and j == ncols - 1: + ax.tick_params(axis="y", labelleft=False, labelright=True) + else: + ax.tick_params(axis="y", labelleft=False, labelright=False) + ax.tick_params( + axis="both", which="both", direction="in", top=True, right=True + ) + ax.yaxis.grid(True, which="major", linestyle=":", alpha=0.4) + ax.xaxis.grid(True, which="major", linestyle=":", alpha=0.4) + axs.append(ax) + ax.set_xticks(range(1, 13)) + if i == nrows - 1: + ax.set_xlabel("Month") + ax.set_xticklabels( + [ + "Jan", + "", + "Mar", + "", + "May", + "", + "Jul", + "", + "Sep", + "", + "Nov", + "", + ] + ) + else: + ax.set_xticklabels([]) + return fig, axs, gs, nrows, ncols, leg_ax + + +def plot_cycle(cube, ax, meta, **kwargs): + ax.plot( + cube.coord("month_number").points, + cube.data, + label=meta["dataset"], + **kwargs, + ) + + +def plot_stdv(cube, std_dev, ax, **kwargs): + ax.fill_between( + std_dev.coord("month_number").points, + cube.data - std_dev.data, + cube.data + std_dev.data, + color=kwargs.get("color", "gray"), + alpha=0.3, + # label="± 1 std. dev." if "± 1 std. dev." not in ax.get_legend_handles_labels()[1] else None + ) + + +def guess_bounds(cube): + if not cube.coord("latitude").has_bounds(): + cube.coord("latitude").guess_bounds() + if not cube.coord("longitude").has_bounds(): + cube.coord("longitude").guess_bounds() + return cube + + +def regional_cycle(cube, operator="mean", regions=None): + cube = guess_bounds(cube) + if regions is None: + regions = ["MED"] + if len(regions) > 0: + extracted = pp.extract_shape( + cube, shapefile="ar6", ids={"Acronym": regions} + ) + else: + extracted = cube + reg_mean = pp.area_statistics(extracted, operator="mean") + cycle = pp.climate_statistics( + reg_mean, operator=operator, period="monthly" + ) + return cycle + + +def load_cube(fname): + cube = iris.load_cube(fname) + if cfg["var"] == "tasmin" and cfg["tas_from_min_max"]: + max_cube = iris.load_cube(fname.replace("tasmin", "tasmax")) + cube = (cube + max_cube) / 2 + if cfg["interval"] is not None: + cube = cube[cfg["interval"][0] : cfg["interval"][1]] + return cube + + +def process_region(ax, hist_metas, obs_metas, regions, **kwargs): + # plot cmip6 models + for fname, meta in hist_metas.items(): + cube = load_cube(fname) + cycle = regional_cycle(cube, regions=regions) + if meta["dataset"] not in styles: + print(f"WARNING: No style for {meta['dataset']} found!") + color = styles.get(meta["dataset"], {}).get("color", "black") + linestyle = styles.get(meta["dataset"], {}).get("dash", "-") + plot_cycle( + cycle, ax, meta, linestyle=linestyle, color=color, alpha=0.5 + ) + title = regions[0] if len(regions) == 1 else "Combined" + ax.text( + 0.06, + 0.92, + title, + transform=ax.transAxes, + va="top", + ha="left", + bbox=dict(facecolor="white", alpha=0.6, edgecolor="none"), + ) + # plot era5 + for fname, meta in obs_metas.items(): + cube = load_cube(fname) + cycle = regional_cycle(cube, regions=regions) + color = "red" + if meta["dataset"].upper() == "CRU": + color = "black" + if cfg["plot_stdv"]: + std_dev = regional_cycle(cube, operator="std_dev") + plot_stdv(cycle, std_dev, ax, color=color) + plot_cycle(cycle, ax, meta, color=color, linewidth=1.5) + + +def add_legend(cfg, leg_ax, axs): + leg_ax.axis("off") + hands, labs = axs[0].get_legend_handles_labels() + # leg_ax.legend(handles=hands, labels=labs, bbox_to_anchor=(1, 1), loc='lower right', ncols=6) + if cfg.get("legend_r", False): + bbox = (1.3, 0.5) + ncols = 1 + loc = "center right" + else: + bbox = (0.5, -0.25) + ncols = 5 + loc = "lower center" + leg_ax.legend( + handles=hands, + labels=labs, + ncols=ncols, + loc=loc, + fancybox=False, + bbox_to_anchor=bbox, + ) # Lower position + + +def main(cfg, metas): + # create and plot + fig, axs, gs, nrows, ncols, leg_ax = create_figure_layout(cfg) + for i, regs in enumerate(cfg["regions"]): + print("regs:", regs) + process_region(axs[i], hist_metas, obs_metas, regs) + + add_legend(cfg, leg_ax, axs) + # save + fig.tight_layout() + plt.subplots_adjust(left=0.06, right=0.96, top=0.96, bottom=0.06) + if cfg.get("tas_from_min_max", False): + cfg["fname"] = cfg["fname"].replace("tasmin", "tasmid") + fig.savefig(cfg["fname"], dpi=300) + print("DONE") + + +if __name__ == "__main__": + print("starting") + main(cfg, metas) diff --git a/esmvaltool/diag_scripts/droughts/styles.py b/esmvaltool/diag_scripts/droughts/styles.py new file mode 100644 index 0000000000..6218c78065 --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/styles.py @@ -0,0 +1,111 @@ +# precipitation11 = mpl_cm.get_cmap("brewer_BrBG_11") +# fix colors from AR6 +# https://github.com/IPCC-WG1/colormaps + + +def rgb(r, g, b): + return [r / 256, g / 256, b / 256] + + +historical = rgb(10, 10, 10) +ssp119 = rgb(0, 173, 207) +ssp126 = rgb(23, 60, 102) +ssp245 = rgb(247, 148, 32) +ssp370 = rgb(231, 29, 37) +ssp585 = rgb(149, 27, 30) + + +experiment_colors = { + "historical": historical, + "ssp119": ssp119, + "ssp126": ssp126, + "ssp245": ssp245, + "ssp370": ssp370, + "ssp585": ssp585, +} + + +prec_div_5 = [ + rgb(84, 48, 5), + rgb(200, 148, 79), + rgb(248, 248, 247), + rgb(85, 167, 160), + rgb(0, 60, 48), +] + +prec_div_6 = [ + rgb(84, 48, 5), + rgb(191, 129, 44), + rgb(229, 209, 180), + rgb(183, 216, 213), + rgb(53, 151, 143), + rgb(0, 60, 48), +] + +prec_div_7 = [ + rgb(84, 48, 5), + rgb(173, 115, 38), + rgb(216, 182, 135), + rgb(248, 248, 247), + rgb(140, 194, 190), + rgb(44, 135, 127), + rgb(0, 60, 48), +] + +prec_div_8 = [ + rgb(84, 48, 5), + rgb(160, 105, 33), + rgb(207, 163, 103), + rgb(235, 220, 200), + rgb(202, 225, 223), + rgb(109, 179, 173), + rgb(37, 125, 115), + rgb(0, 60, 48), +] + +prec_div_9 = [ + rgb(84, 48, 5), + rgb(150, 98, 30), + rgb(200, 148, 79), + rgb(224, 199, 164), + rgb(248, 248, 247), + rgb(167, 208, 204), + rgb(85, 167, 160), + rgb(33, 116, 107), + rgb(0, 60, 48), +] + +prec_div_10 = [ + rgb(84, 48, 5), + rgb(143, 93, 27), + rgb(195, 137, 60), + rgb(216, 182, 135), + rgb(238, 226, 211), + rgb(212, 230, 229), + rgb(140, 194, 190), + rgb(67, 158, 150), + rgb(29, 110, 100), + rgb(0, 60, 48), +] + +prec_div_11 = [ + rgb(84, 48, 5), + rgb(137, 88, 25), + rgb(191, 129, 44), + rgb(210, 169, 113), + rgb(229, 209, 180), + rgb(248, 248, 247), + rgb(183, 216, 213), + rgb(118, 183, 178), + rgb(53, 151, 143), + rgb(26, 105, 95), + rgb(0, 60, 48), +] + + +# prec_11 = mpl_cm.colors.ListedColormap(prec_11, name="pr_11") +# prec_div = mpl_cm.colors.LinearSegmentedColormap.from_list("pr", prec_11) +# prec_seq + +# plt.register_cmap('prec_div', prec_div) +# plt.register_cmap('prec_11', prec_11) diff --git a/esmvaltool/diag_scripts/droughts/timeseries_scenarios.py b/esmvaltool/diag_scripts/droughts/timeseries_scenarios.py new file mode 100644 index 0000000000..134875bec2 --- /dev/null +++ b/esmvaltool/diag_scripts/droughts/timeseries_scenarios.py @@ -0,0 +1,326 @@ +"""Plot timeseries of historical period and different ssps for each variable. + +Observations will be shown as reference when given. Shaded area shows MM stddv. +Works with combined or individual inputs for historical/ssp experiments. + +Configuration options in recipe +------------------------------- +plot_mmm: bool, optional (default: True) +smooth: bool, optional (default: True) + Yearly averages for each variable, before mm operations and plot. +combined_split_years: int, optional (default: 65) + If experiments are already combined, this is the number of full years that + is used to split the data into two seperated experiments. Historical data + is only plotted once. +plot_properties: dict, optional + Additional properties to set on the plot. Passed to ax.set(). +figsize: tuple, optional (default: (10.5, 1.5)) +reuse_mm: bool, optional (default: False) +subplots: bool, optional (default: False) + Plot all time series as subplots in one figure with shared x-axis. +legend: dict, optional (default: {}) + Names to rename the default legend labels. Keys are the original labels. + Set to None (null in yaml) to disable legend. +ylabels: dict, optional (default: {}) + Dictionary with short_names as keys and names to replace them as values. +""" + +import datetime as dt +import logging +import os +import warnings +from copy import deepcopy +from pathlib import Path + +import iris +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +from esmvalcore import preprocessor as pp +from iris import plot as iplt +from iris.analysis import MEAN, cartography +from iris.coord_categorisation import add_year +from iris.cube import Cube +from matplotlib.axes import Axes + +from esmvaltool.diag_scripts.droughts import styles +from esmvaltool.diag_scripts.droughts import utils as ut +from esmvaltool.diag_scripts.shared import ( + # ProvenanceLogger, + get_plot_filename, + group_metadata, + run_diagnostic, + select_metadata, +) + +# import nc_time_axis # noqa allow cftime axis to be plotted by mpl +# nc_time_axis works but seems to show wrong days on axis when using cftime +logger = logging.getLogger(Path(__file__).stem) +warnings.filterwarnings( + "ignore", module="iris", message="Using DEFAULT_SPHERICAL_EARTH_RADIUS" +) +warnings.filterwarnings( + "ignore", + message="Degrees of freedom <= 0 for slice", + category=RuntimeWarning, +) +warnings.filterwarnings( + "ignore", module="iris", message="invalid value encountered in divide" +) + + +def global_mean(cfg: dict, cube: Cube) -> Cube: + """Calculate global mean.""" + ut.guess_lat_lon_bounds(cube) + if "regions" in cfg: + cube = pp.extract_shape( + cube, + shapefile="ar6", + ids={"Acronym": cfg["regions"]}, + ) + area_weights = cartography.area_weights(cube) + return cube.collapsed( + ["latitude", "longitude"], + MEAN, + weights=area_weights, + ) + + +def yearly_average(cube: Cube) -> Cube: + """Calculate yearly average.""" + add_year(cube, "time") + return cube.aggregated_by("year", MEAN) + + +def plot_experiment(cfg, mean, std_dev, experiment, ax) -> None: + time = mean.coord("time") + exp_color = getattr(styles, experiment) + iplt.fill_between( + time, + mean - std_dev, + mean + std_dev, + color=exp_color, + alpha=0.2, + axes=ax, + ) + iplt.plot(time, mean, color=exp_color, label=experiment, axes=ax) + y_label = f"{mean.var_name} [{mean.units}]" + if mean.long_name: + y_label = f"{mean.long_name}\n[{mean.units}]" + y_label = cfg.get("ylabels", {}).get(mean.var_name, y_label) + y_label = cfg.get("ylabels", {}).get(mean.long_name, y_label) + ax.set_ylabel(y_label) + + +def plot_each_model(cubes, metas, cfg, experiment, smooth=False) -> None: + fig, ax = plt.subplots(figsize=cfg["figsize"], dpi=150) + ax.grid(axis="y", color="0.95") + time = cubes[0].coord("time") + for cube, meta in zip(cubes, metas): + iplt.plot(time, cube, label=meta["dataset"]) + if not cfg.get("strip_plots", False): + ax.legend() + basename = f"timeseries_scenarios_{meta['short_name']}_{experiment}" + fig.savefig(get_plot_filename(basename, cfg), bbox_inches="tight") + + +def plot_models(cfg, metas, ax, smooth=False) -> None: + historical_plotted = False + for experiment, models in group_metadata(metas, "exp").items(): + if experiment == "historical" and historical_plotted: + continue + + basename = f"{experiment}_{metas[0]['short_name']}" + fname = os.path.join(cfg["work_dir"], f"{basename}") + recalc = not cfg.get("reuse_mm", False) + if cfg.get("reuse_mm", False): + try: + std_dev = iris.load_cube(fname + "_stddev.nc") + mean = iris.load_cube(fname + "_mean.nc") + except FileNotFoundError: + recalc = True + if recalc or cfg.get("plot_models", False): + cubes = [ + global_mean(cfg, iris.load_cube(meta["filename"])) + for meta in models + ] + if smooth: + cubes = [yearly_average(cube) for cube in cubes] + if cfg.get("plot_models", False): + # TODO: convert units for single models? + plot_each_model(cubes, models, cfg, experiment, smooth=smooth) + # mean = global_mean(mm["mean"]) + if recalc: + mm = pp.multi_model_statistics( + cubes, + "overlap", + ["mean", "std_dev"], + ) + std_dev = mm["std_dev"] # global_mean(mm["std_dev"]) + mean = mm["mean"] + if recalc and cfg.get("save_mm", True): + iris.save(mm["mean"], fname + "_mean.nc") + iris.save(mm["std_dev"], fname + "_stddev.nc") + + if experiment.startswith("historical-"): + experiment = experiment.split("-")[1] + steps = 65 if smooth else 65 * 12 + plot_experiment( + cfg, + mean[(steps - 1) :], + std_dev[(steps - 1) :], + experiment, + ax, + ) + if not historical_plotted: + plot_experiment( + cfg, + mean[:steps], + std_dev[:steps], + "historical", + ax, + ) + historical_plotted = True + continue + if experiment == "historical": + historical_plotted = True + plot_experiment(cfg, mean, std_dev, experiment, ax) + + +def plot_obs(cfg, metas, ax, smooth=False) -> None: + for meta in metas: + cube = iris.load_cube(meta["filename"]) + if smooth: + cube = yearly_average(cube) + mean = global_mean(cfg, cube) + time = mean.coord("time") + iplt.plot(time, mean, linestyle="--", label=meta["dataset"], axes=ax) + + +def process_variable( + cfg, metas, short_name, fig=None, ax: Axes = None +) -> None: + """Process variable.""" + project = cfg.get("project", "CMIP6") + model_metas = select_metadata(metas, project=project) + obs_metas = [meta for meta in metas if meta["project"] != project] + if not cfg.get("subplots", False): + fig, ax = plt.subplots(figsize=cfg["figsize"], dpi=300) + plot_models(cfg, model_metas, ax, smooth=cfg.get("smooth", False)) + plot_obs(cfg, obs_metas, ax, smooth=cfg.get("smooth", False)) + basename = f"timeseries_scenarios_{short_name}" + if not cfg.get("subplots", False): + ax.set_xlabel("Time") + ax.legend() + else: + ax.set_xticklabels([]) + ax.set_xticks([]) + ax.grid( + axis="both", + color="0.6", + which="major", + linestyle="--", + linewidth=0.5, + ) + ax.set_xlim([dt.datetime(1950, 1, 1), dt.datetime(2100, 1, 1)]) + ax.xaxis.set_major_locator(mdates.YearLocator(10)) + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) + for label in ax.get_xticklabels(which="major"): + label.set(rotation=0, horizontalalignment="center") + ax.xaxis.set_minor_locator(mdates.YearLocator()) + if "plot_properties" in cfg: + ax.set(**cfg["plot_properties"]) + if not cfg.get("subplots", False): + fig.savefig(get_plot_filename(basename, cfg), bbox_inches="tight") + + +def main(cfg) -> None: + """Run diagnostic.""" + cfg = deepcopy(cfg) + groups = group_metadata(cfg["input_data"].values(), "short_name") + fig, axs = None, [None] * len(groups) + if cfg["subplots"]: + figsize = cfg.get("figsize", (9, 2)) + height = len(groups) * (figsize[1] + 0.5) + fig, axs = plt.subplots( + len(groups), + 1, + figsize=(figsize[0], height), + dpi=300, + ) + for i, (short_name, metas) in enumerate(groups.items()): + process_variable(cfg, metas, short_name, fig=fig, ax=axs[i]) + if cfg["subplots"]: + basename = "timeseries_scenarios" + for ax in axs: + ax.tick_params( + axis="x", + which="both", + bottom=False, + top=False, + labelbottom=False, + ) + ax.tick_params( + axis="y", + which="both", + left=True, + right=True, + labelleft=False, + labelright=True, + ) + for spine in ["top", "bottom"]: + ax.spines[spine].set_visible(False) + axs[-1].tick_params( + axis="x", + which="both", + bottom=True, + top=False, + labelbottom=True, + ) + axs[-1].spines["bottom"].set_visible(True) + axs[0].spines["top"].set_visible(True) + lines, labels = axs[-1].get_legend_handles_labels() + if cfg["legend"] is not None: + leg_dict = dict(zip(labels, lines)) + labels = list(cfg["legend"].values()) + handles = [leg_dict[lab] for lab in cfg["legend"].keys()] + labels.append("Multi-model std") + rect = plt.Rectangle( + (0, 0), + 1, + 1, + fc="0.8", + alpha=0.2, + edgecolor="black", + linewidth=0.5, + ) + handles.append(rect) + axs[-1].legend(handles, labels) + fig.subplots_adjust(hspace=0.02) + fig.tight_layout() + fig.savefig(get_plot_filename(basename, cfg), bbox_inches="tight") + + +def set_defaults(cfg) -> None: + cfg.setdefault("plot_mmm", True) + cfg.setdefault("smooth", True) + cfg.setdefault("combined_split_years", 65) + cfg.setdefault("plot_properties", {}) + cfg.setdefault("figsize", (9, 2)) + cfg.setdefault("reuse_mm", False) + cfg.setdefault("subplots", False) + cfg.setdefault( + "legend", + { + "ssp126": "SSP1-2.6", + "ssp245": "SSP2-4.5", + "ssp585": "SSP5-8.5", + "historical": "Historical", + }, + ) + cfg.setdefault("ylabels", {}) + + +if __name__ == "__main__": + with run_diagnostic() as cfg: + set_defaults(cfg) + main(cfg) diff --git a/esmvaltool/diag_scripts/droughts/utils.py b/esmvaltool/diag_scripts/droughts/utils.py index 01e5414993..03b2612695 100644 --- a/esmvaltool/diag_scripts/droughts/utils.py +++ b/esmvaltool/diag_scripts/droughts/utils.py @@ -106,7 +106,7 @@ def get_plot_fname( for key, value in replace.items(): basename = basename.replace(key, value) fpath = Path(cfg["plot_dir"]) / basename - return str(fpath.with_suffix(cfg["output_file_type"])) + return str(fpath.with_suffix("." + cfg["output_file_type"])) def add_ancestor_input(cfg: dict) -> None: @@ -354,13 +354,13 @@ def daily_to_monthly( cube.units = units -def _get_data_hlp(axis, data, ilat, ilon): +def _get_data_hlp(axis, data, ilat, ilon) -> np.ndarray: """Get data_help dependend on axis.""" if axis == 0: data_help = (data[:, ilat, ilon])[:, 0] elif axis == 1: data_help = (data[ilat, :, ilon])[:, 0] - elif axis == 2: + elif axis == 2: # noqa: PLR2004 data_help = data[ilat, ilon, :] else: data_help = None @@ -553,7 +553,7 @@ def mmm( mdtol: float = 0, dropcoords: list | None = None, *, - dropmethods=False, + dropmethods: bool = False, ) -> tuple: """Calculate mean and stdev along a cube list over all cubes. @@ -698,6 +698,50 @@ def select_meta_from_combi(meta: list, combi: dict, groups: dict) -> tuple: return this_meta, this_cfg +def _compare_dicts(dict1, dict2, sort) -> bool: + if dict1.kyes() != dict2.keys(): + return False + return all( + _compare_values(dict1[key], dict2.get(key), sort) for key in dict1 + ) + + +def _compare_values(val1, val2, sort) -> bool: + if isinstance(val1, dict) and isinstance(val2, dict): + return _compare_dicts(val1, val2) + if isinstance(val1, list) and isinstance(val2, list): + if len(val1) != len(val2): + return False + if sort: + val1 = sorted(val1) + val2 = sorted(val2) + return all(_compare_values(v1, v2, sort) for v1, v2 in zip(val1, val2)) + try: + return val1 == val2 + except ValueError: + return False + + +def get_common_meta(metas: list, *, sort: bool = False) -> dict: + """Return a dictionary of meta data, that is common between all inputs. + + Parameters + ---------- + metas : list + List of meta data dictionaries. + sort : bool, optional + Sort lists before comparison. If true lists with same elements but + different order are considered to be equal. By default False. + """ + common = {} + for key in metas[0]: + if all( + _compare_values(metas[0][key], m.get(key), sort) for m in metas + ): + common[key] = metas[0][key] + return common + + def list_meta_keys(meta: list, group: dict) -> list: """Return a list of all keys found for a group in the meta data.""" return list(group_metadata(meta, group).keys()) @@ -931,6 +975,6 @@ def font_color(background: str | tuple | float) -> str: background : str, tuple, or float color as string (grayscale value, name, hex) or tuple (rgb, rgba) """ - if sum(mpl.colors.to_rgb(background)) > 1.5: + if sum(mpl.colors.to_rgb(background)) > 1.5: # noqa: PLR2004 return "black" return "white" diff --git a/esmvaltool/recipes/droughts/recipe_lindenlaub25_historical.yml b/esmvaltool/recipes/droughts/recipe_lindenlaub25_historical.yml new file mode 100644 index 0000000000..ec746cad35 --- /dev/null +++ b/esmvaltool/recipes/droughts/recipe_lindenlaub25_historical.yml @@ -0,0 +1,364 @@ +--- +documentation: + title: "Validation of drought related CMIP6 variables with ERA5" + description: | + This recipe calculates PET for ERA5 and a subset of CMIP6 models and + compares the results and each individual input variable and derived + soil moisture. For the comparison averages and change rates are plotted as + maps and pattern correlation between ERA5 and CMIP6 multi-model mean are + calculated. The normalized centered root mean square error vs ERA5 is + calculated for each individual variable and CMIP6 model and displayed as + portrait plot. + + The recipe is split into blocks that can be run individually by using the + `--diagnostics` argument, e.g. `pet_obs`, `validate_models/diffmaps` or + `validate_models/pattern_correlation` on the esmvaltool run command: + `esmvaltool run recipes/droughts/recipe_lindenlaub25_validation.yml` + authors: + - lindenlaub_lukas + maintainers: + - lindenlaub_lukas + projects: + - eval4cmip + +GRID: &grid + # target_grid: 3x3 + target_grid: 1x1 + +OBS_PERIOD: &obs_period + start_year: 1980 + end_year: 2014 + +CMIP_DEFAULT: &cmip_default + project: CMIP6 + mip: Amon + ensemble: r1i1p1f1 + grid: gn + + +# Subset of models with soil moisture: +CMIP6_DATA_LMON: &cmip6_data_lmon + - {<<: *cmip_default, mip: Lmon, dataset: ACCESS-CM2, institute: CSIRO-ARCCSS } + - {<<: *cmip_default, mip: Lmon, dataset: BCC-CSM2-MR, institute: BCC } + - {<<: *cmip_default, mip: Lmon, dataset: CanESM5, institute: CCCma } + - {<<: *cmip_default, mip: Lmon, dataset: CMCC-ESM2, institute: CMCC } # retry other ds name + - {<<: *cmip_default, mip: Lmon, dataset: CNRM-CM6-1, institute: CNRM-CERFACS, ensemble: r1i1p1f2, grid: gr} # pet works 3x3 + - {<<: *cmip_default, mip: Lmon, dataset: EC-Earth3-Veg-LR, institute: EC-Earth-Consortium, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: FGOALS-g3, institute: CAS } # latitude as auxilary (not monotonic) + # - {<<: *cmip_default, mip: Lmon, dataset: FIO-ESM-2-0, institute: FIO-QLNM } # lat not monotonic cmorchecker + # - {<<: *cmip_default, mip: Lmon, dataset: GFDL-ESM4 , institute: NOAA-GFDL, grid: gr1} # sdepth1 does not exist cmorchecker + - {<<: *cmip_default, mip: Lmon, dataset: GISS-E2-1-G, institute: NASA-GISS, ensemble: r1i1p1f2} + - {<<: *cmip_default, mip: Lmon, dataset: IPSL-CM6A-LR, institute: IPSL, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: KACE-1-0-G, institute: NIMS-KMA, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: MIROC6, institute: MIROC, grid: gn} # works as single source + - {<<: *cmip_default, mip: Lmon, dataset: MPI-ESM1-2-LR, institute: MPI-M, grid: gn} + - {<<: *cmip_default, mip: Lmon, dataset: MRI-ESM2-0, institute: MRI } # also calendar issues.. but works solo + - {<<: *cmip_default, mip: Lmon, dataset: UKESM1-0-LL, institute: MOHC, ensemble: r1i1p1f2 } # pet works 3x3 + + +CMIP6_DATA: &cmip6_data + - {<<: *cmip_default, dataset: ACCESS-CM2, institute: CSIRO-ARCCSS} + - {<<: *cmip_default, dataset: AWI-CM-1-1-MR} + - {<<: *cmip_default, dataset: BCC-CSM2-MR, institute: BCC} + - {<<: *cmip_default, dataset: CanESM5, institute: CCCma} + # - {<<: *cmip_default, dataset: CAS-ESM2-0, institute: CAS} why not in our list of models? + - {<<: *cmip_default, dataset: CMCC-ESM2} # retry other ds name + - {<<: *cmip_default, dataset: CNRM-CM6-1, ensemble: r1i1p1f2, grid: gr} # pet works 3x3 + - {<<: *cmip_default, dataset: EC-Earth3-Veg-LR, institute: EC-Earth-Consortium, grid: gr} + - {<<: *cmip_default, dataset: FGOALS-g3, institute: CAS} # latitude as auxilary (not monotonic) + - {<<: *cmip_default, dataset: FIO-ESM-2-0, institute: FIO-QLNM} + - {<<: *cmip_default, dataset: GFDL-ESM4 , institute: NOAA-GFDL, grid: gr1} + - {<<: *cmip_default, dataset: GISS-E2-1-G, institute: NASA-GISS, ensemble: r1i1p1f2} + - {<<: *cmip_default, dataset: INM-CM5-0, grid: gr1} # no mrsos? works for pr/tas pet 3x3 + - {<<: *cmip_default, dataset: INM-CM5-0, institute: INM, grid: gr1} # no mrsos? works for pr/tas + - {<<: *cmip_default, dataset: IPSL-CM6A-LR, institute: IPSL, grid: gr} + - {<<: *cmip_default, dataset: KACE-1-0-G, grid: gr} + - {<<: *cmip_default, dataset: MIROC6, institute: MIROC, grid: gn} # works as single source + - {<<: *cmip_default, dataset: MPI-ESM1-2-LR, institute: MPI-M, grid: gn} + - {<<: *cmip_default, dataset: MRI-ESM2-0, institute: MRI} + - {<<: *cmip_default, dataset: UKESM1-0-LL, ensemble: r1i1p1f2} + +ERA5: &era5 + dataset: ERA5 + project: native6 + type: reanaly + version: v1 + tier: 3 + <<: *obs_period + reference_for_metric: true + split: ERA5 + +CRU: &cru + dataset: CRU + project: OBS6 + type: reanaly + version: TS4.07 + tier: 2 + <<: *obs_period + split: REF2 + reference_for_metric: true + +GPCP-SG: &gpcp + dataset: GPCP-SG + project: OBS + type: atmos + version: 2.3 + tier: 2 + <<: *obs_period + reference_for_metric: true + +CDS: &cds + dataset: CDS-SATELLITE-SOIL-MOISTURE + project: OBS + type: sat + split: REF2 + tier: 3 + version: COMBINED + start_year: 1980 + end_year: 2014 + reference_for_metric: true + +# ERA5 and CRU data is available for most variables. PET is derived only for +# ERA5 and soil moisture is derived from ERA5 and CDS-SATELLITE-SOIL-MOISTURE +# for this variables datasets need to set manually and/or added. +# Also note that both variables are found in different mips. +OBS_DATA: &obs_data + - <<: *era5 + mip: Amon + - <<: *cru + mip: Amon + + +VAR_DEFAULT: &var_default + grid: gn + preprocessor: default + mip: Amon + project: CMIP6 + exp: historical + <<: *obs_period + + +# RECIPE STARTS HERE +preprocessors: + default: &default + regrid: + <<: *grid + scheme: nearest + regrid_time: + calendar: standard + mask_landsea: + mask_out: sea + mask_glaciated: + mask_out: glaciated + + perday: + <<: *default + convert_units: + units: mm day-1 + + +diagnostics: + pet_historical: &pet_historical + variables: + tasmin: &pet_default + <<: *var_default + exp: ["historical"] + <<: *obs_period + additional_datasets: *cmip6_data + tasmax: *pet_default + sfcWind: *pet_default + rsds: *pet_default + ps: *pet_default + pr: + <<: *pet_default + preprocessor: perday + scripts: &scripts_pet + pet_pm: + script: droughts/pet.R + pet_type: "Penman" + method: ICID + + spei_historical: &spei_historical + scripts: + spei: + script: droughts/spei.R + spei_type: "SPEI" + ancestors: + - pet_historical/pet_pm + - pet_historical/pr + smooth_month: 6 + distribution: "log-Logistic" + refstart_year: 1950 + refend_year: 2014 + refstart_month: 1 + refend_month: 12 + + pet_obs: &pet_obs + variables: + tasmin: &pet_default_obs + <<: *obs_period + preprocessor: default + mip: Amon + additional_datasets: + - <<: *era5 + tasmax: *pet_default_obs + sfcWind: *pet_default_obs + rsds: *pet_default_obs + ps: *pet_default_obs + pr: + <<: *pet_default_obs + preprocessor: perday + scripts: + pet_pm: + script: droughts/pet.R + pet_type: "Penman" + method: ICID + + validate_obs: + variables: + sm: + additional_datasets: + - <<: *era5 + mip: Lmon + start_year: 1980 + end_year: 2014 + - <<: *cds + mip: Lmon + tasmin: &add_cru + mip: Amon + additional_datasets: + - <<: *cru + tasmax: *add_cru + evspsblpot: + preprocessor: perday + additional_datasets: + - <<: *cru + mip: Emon + pr: + <<: *add_cru + preprocessor: perday + scripts: + diffmaps: + script: droughts/diffmap.py + metrics: ["total", "diff"] + plot_models: True + save_models: True + plot_mmm: False + save_mmm: False + ancestors: + - pet_obs/pr + - pet_obs/tasmin + - pet_obs/tasmax + - pet_obs/sfcWind + - pet_obs/rsds + - pet_obs/ps + - validate_obs/pr + - validate_obs/sm + - validate_obs/tasmin + - validate_obs/tasmax + - pet_obs/pet_pm + - validate_obs/evspsblpot + + validate_models: &validate_models + # additional_datasets: *cmip6_data + variables: + tasmin: &var_default_historical + <<: *var_default + <<: *obs_period + exp: ["historical"] + additional_datasets: *cmip6_data + tasmax: *var_default_historical + sfcWind: *var_default_historical + ps: *var_default_historical + rsds: *var_default_historical + sm: + <<: *var_default + mip: Lmon + start_year: 1980 + end_year: 2014 + derive: true + exp: ["historical"] + additional_datasets: *cmip6_data_lmon + pr: + <<: *var_default_historical + preprocessor: perday + scripts: + diffmaps: + script: droughts/diffmap.py + plot_models: False + save_models: True + plot_mmm: True + save_mmm: True + metrics: ["total", "diff"] + ancestors: + - pet_historical/pr + - pet_historical/tasmin + - pet_historical/tasmax + - pet_historical/sfcWind + - pet_historical/rsds + - pet_historical/ps + - pet_historical/pet_pm + - validate_models/sm + + pattern_correlation: + reference: ERA5 + relative_change: true + group_by: diffmap_metric + script: droughts/pattern_correlation.py + labels: + pr: $P$ + evspsblpot: $ET_0$ + tasmax: $T_{\text{max}}$ + tasmin: $T_{\text{min}}$ + sfcWind: $V_{\text{10m}}$ + clt: $C_{\text{total}}$ + rsds: $RS_{\text{down}}$ + ps: $p_{\text{surf}}$ + sm: $SM_{\text{surf}}$ + ancestors: + - validate_models/diffmaps + - validate_obs/diffmaps + perfmetric: + # TODO: check if general portrait plot is possible to + script: droughts/portrait_plot.py + distance_metric: rmse + nan_color: null + y_labels: + pr: $PR$ + evspsblpot: $ET_0$ + tasmax: $T_{\text{max}}$ + tasmin: $T_{\text{min}}$ + sfcWind: $V_{\text{10m}}$ + ps: $p_{\text{surf}}$ + sm: $SM_{\text{surf}}$ + rsds: $RS_{\text{down}}$ + y_order: + - pr + - evspsblpot + - tasmax + - tasmin + - sfcWind + - rsds + - ps + - sm + ancestors: + - pet_obs/pet_pm + - pet_obs/pr + - pet_obs/tasmin + - pet_obs/tasmax + - pet_obs/ps + - pet_obs/rsds + - pet_obs/sfcWind + - pet_historical/pet_pm + - pet_historical/pr + - pet_historical/tasmin + - pet_historical/tasmax + - pet_historical/ps + - pet_historical/rsds + - pet_historical/sfcWind + - validate_obs/pr + - validate_obs/tasmin + - validate_obs/tasmax + - validate_obs/evspsblpot + - validate_obs/sm + - validate_models/sm diff --git a/esmvaltool/recipes/droughts/recipe_lindenlaub25_scenarios.yml b/esmvaltool/recipes/droughts/recipe_lindenlaub25_scenarios.yml new file mode 100644 index 0000000000..59c87fc51e --- /dev/null +++ b/esmvaltool/recipes/droughts/recipe_lindenlaub25_scenarios.yml @@ -0,0 +1,463 @@ +--- +documentation: + title: "Analysis of droughts in CMIP6 future projections." + description: | + This recipe calculates PET and SPEI values for CMIP6 future projections. + 12 models for the SSP126, SSP245 and SSP585 scenarios are used. + The reference period for index calibration 1950-2014 is part of the + historical experiment. + The indices are analysed using a couple of diagnostics: + - `*_ssp*` for maps of average and change rates of variables per scenarios. + - `pet_*, spei_*` for PET and index calculation. + - `*_trend` for trend maps of PET and SPEI. + authors: + - lindenlaub_lukas + maintainers: + - lindenlaub_lukas + projects: + - eval4cmip + realms: [atmos, land] + themes: [phys] + +GRID: &grid + # target_grid: 0.25x0.25 + # target_grid: 1x1 + target_grid: 1x1 # might require a lot of memory (limit parallel tasks) + + +# longest period for CMIP6 only analysis +FULL_PERIOD: &full_period + start_year: 1950 + end_year: 2100 + +# future period for projected CMIP6 variables +SSP_PERIOD: &ssp_period + start_year: 2015 + end_year: 2100 + + +DIFFMAPS_DEFAULT: &diffmaps_default + script: droughts/diffmap.py + plot_models: False + plot_mmm: True + clip_land: True + strip_plots: True + + +CMIP_DEFAULT: &cmip_default + project: CMIP6 + mip: Amon + ensemble: r1i1p1f1 + grid: gn + + +CMIP6_DATA_LMON: &cmip6_data_lmon # no awi or inm + - {<<: *cmip_default, mip: Lmon, dataset: ACCESS-CM2, institute: CSIRO-ARCCSS } + - {<<: *cmip_default, mip: Lmon, dataset: BCC-CSM2-MR, institute: BCC } + - {<<: *cmip_default, mip: Lmon, dataset: CanESM5, institute: CCCma } + - {<<: *cmip_default, mip: Lmon, dataset: CMCC-ESM2, institute: CMCC } # retry other ds name + - {<<: *cmip_default, mip: Lmon, dataset: CNRM-CM6-1, institute: CNRM-CERFACS, ensemble: r1i1p1f2, grid: gr} # pet works 3x3 + - {<<: *cmip_default, mip: Lmon, dataset: EC-Earth3-Veg-LR, institute: EC-Earth-Consortium, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: FGOALS-g3, institute: CAS } # latitude as auxilary (not monotonic) + - {<<: *cmip_default, mip: Lmon, dataset: GISS-E2-1-G, institute: NASA-GISS, ensemble: r1i1p1f2} + - {<<: *cmip_default, mip: Lmon, dataset: IPSL-CM6A-LR, institute: IPSL, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: KACE-1-0-G, institute: NIMS-KMA, grid: gr} + - {<<: *cmip_default, mip: Lmon, dataset: MIROC6, institute: MIROC, grid: gn} # works as single source + - {<<: *cmip_default, mip: Lmon, dataset: MPI-ESM1-2-LR, institute: MPI-M, grid: gn} + - {<<: *cmip_default, mip: Lmon, dataset: MRI-ESM2-0, institute: MRI } # also calendar issues.. but works solo + - {<<: *cmip_default, mip: Lmon, dataset: UKESM1-0-LL, institute: MOHC, ensemble: r1i1p1f2 } # pet works 3x3 + + +CMIP6_DATA: &cmip6_data + - {<<: *cmip_default, dataset: ACCESS-CM2, institute: CSIRO-ARCCSS } + - {<<: *cmip_default, dataset: AWI-CM-1-1-MR, institute: AWI } + - {<<: *cmip_default, dataset: BCC-CSM2-MR, institute: BCC } + - {<<: *cmip_default, dataset: CanESM5, institute: CCCma } + - {<<: *cmip_default, dataset: CMCC-ESM2, institute: CMCC } # retry other ds name + - {<<: *cmip_default, dataset: CNRM-CM6-1, institute: CNRM-CERFACS, ensemble: r1i1p1f2, grid: gr} # pet works 3x3 + - {<<: *cmip_default, dataset: EC-Earth3-Veg-LR, institute: EC-Earth-Consortium, grid: gr} + - {<<: *cmip_default, dataset: FGOALS-g3, institute: CAS } # latitude as auxilary (not monotonic) + - {<<: *cmip_default, dataset: FIO-ESM-2-0, institute: FIO-QLNM } + - {<<: *cmip_default, dataset: GFDL-ESM4 , institute: NOAA-GFDL, grid: gr1} + - {<<: *cmip_default, dataset: GISS-E2-1-G, institute: NASA-GISS, ensemble: r1i1p1f2} + - {<<: *cmip_default, dataset: INM-CM5-0, institute: INM, grid: gr1} # no mrsos? works for pr/tas + - {<<: *cmip_default, dataset: IPSL-CM6A-LR, institute: IPSL, grid: gr} + - {<<: *cmip_default, dataset: KACE-1-0-G, institute: NIMS-KMA, grid: gr} + - {<<: *cmip_default, dataset: MIROC6, institute: MIROC, grid: gn} # works as single source + - {<<: *cmip_default, dataset: MPI-ESM1-2-LR, institute: MPI-M, grid: gn} + - {<<: *cmip_default, dataset: MRI-ESM2-0, institute: MRI } # also calendar issues.. but works solo + - {<<: *cmip_default, dataset: UKESM1-0-LL, institute: MOHC, ensemble: r1i1p1f2 } # pet works 3x3 + + + +VAR_DEFAULT: &var_default + grid: gn + # ensemble: r1i1p1f1 + mip: Amon + project: CMIP6 + additional_datasets: *cmip6_data + +VAR_SM: &var_sm + grid: gn + mip: Lmon + project: CMIP6 + additional_datasets: *cmip6_data_lmon + +preprocessors: + default: &default_preproc + regrid: + <<: *grid + scheme: nearest + regrid_time: + calendar: standard + mask_landsea: + mask_out: sea + mask_glaciated: + mask_out: glaciated + perday: + <<: *default_preproc + convert_units: + units: mm day-1 + + +diagnostics: + ssp_maps: + variables: + tasmin: &ssp585_variable + <<: *var_default + exp: ssp585 + <<: *ssp_period + tasmax: *ssp585_variable + pr: + <<: *ssp585_variable + preprocessor: perday + # sm: *var_sm + scripts: + diffmaps: &pr_plot + <<: *diffmaps_default + + sm_ssp585: &sm_ssp585 + variables: + sm: &sm_default + <<: *var_default + exp: ssp585 + <<: *ssp_period + mip: Lmon + derive: true + additional_datasets: *cmip6_data_lmon + scripts: + diffmaps: &sm_plot + <<: *diffmaps_default + # plot_kwargs_diff: + # vmin: -0.04 + # vmax: 0.04 + # cmap: "RdYlBu" + sm_ssp245: + variables: + sm: + <<: *sm_default + exp: ssp245 + scripts: + diffmaps: + <<: *diffmaps_default + sm_ssp126: + variables: + sm: + <<: *sm_default + exp: ssp126 + scripts: + diffmaps: + <<: *diffmaps_default + + # --- FULL PERIOD SPEI --- # + # TODO: test with python repo + pet_ssp585: &pet_ssp585 + variables: + tasmin: &pet_585 + <<: *var_default + exp: ["historical", "ssp585"] + <<: *full_period + additional_datasets: *cmip6_data + tasmax: *pet_585 + sfcWind: *pet_585 + # clt: *pet_585 + rsds: *pet_585 + ps: *pet_585 + pr: + <<: *pet_585 + preprocessor: perday + scripts: + pet_pm: + script: droughts/pet.R + pet_type: "Penman" # "Penman_clt" + pet_ssp245: &pet_ssp245 + variables: + tasmin: &pet_245 + <<: *var_default + exp: ["historical", "ssp245"] + <<: *full_period + additional_datasets: *cmip6_data + tasmax: *pet_245 + sfcWind: *pet_245 + # clt: *pet_245 + rsds: *pet_245 + ps: *pet_245 + pr: + <<: *pet_245 + preprocessor: perday + scripts: + pet_pm: + script: droughts/pet.R + pet_type: "Penman" # "Penman_clt" + pet_ssp126: &pet_ssp126 + variables: + tasmin: &pet_126 + <<: *var_default + exp: ["historical", "ssp126"] + <<: *full_period + additional_datasets: *cmip6_data + tasmax: *pet_126 + sfcWind: *pet_126 + # clt: *pet_126 + rsds: *pet_126 + ps: *pet_126 + pr: + <<: *pet_126 + preprocessor: perday + scripts: + pet_pm: + script: droughts/pet.R + pet_type: "Penman" # "Penman_clt" + + spei_ssp585: &spei_ssp585 + scripts: + spei: &script_spei_585 + script: droughts/spei.R + smooth_month: 6 + distribution: "log-Logistic" + refstart_year: 1950 + refend_year: 2014 + refstart_month: 1 + refend_month: 12 + ancestors: [pet_ssp585/pet_pm, pet_ssp585/pr] + spei_ssp245: &spei_ssp245 + scripts: + spei: + <<: *script_spei_585 + ancestors: [pet_ssp245/pet_pm, pet_ssp245/pr] + spei_ssp126: &spei_ssp126 + scripts: + spei: + <<: *script_spei_585 + ancestors: [pet_ssp126/pet_pm, pet_ssp126/pr] + + # --- EXTRA WB --- # + # wb_ssp126: &wb_ssp126 + # scripts: + # wb: + # script: droughts/diag_wb.py + # ancestors: [pet_ssp126/pet_pm, pet_ssp126/pr] + # wb_ssp245: &wb_ssp245 + # scripts: + # wb: + # script: droughts/diag_wb.py + # ancestors: [pet_ssp245/pet_pm, pet_ssp245/pr] + # wb_ssp585: &wb_ssp585 + # scripts: + # wb: + # script: droughts/diag_wb.py + # ancestors: [pet_ssp585/pet_pm, pet_ssp585/pr] + # --- SPEI Plots --- # + diffmap: + scripts: + ssp: &diff_ssp_default + # THIS does not work for different scenarios as only looped over group by and + # not scenarios. The loop to calc multi model means need to be aware of exp (hardcoded) + # or allow multiple group_by keys as combination.. (or based on basename?) + # until than use diffmap/spei_ssp126.. runs + script: droughts/diffmap.py + ancestors: + - pet_ssp126/pr + - pet_ssp245/pr + - pet_ssp585/pr + - pet_ssp126/pet_pm + - pet_ssp245/pet_pm + - pet_ssp585/pet_pm + - spei_ssp126/spei + - spei_ssp245/spei + - spei_ssp585/spei + save_models: True + save_mmm: True + plot_models: False + plot_mmm: True + <<: *ssp_period + plot_kwargs_diff: + cmap: YlOrRd + vmax: 6e-5 + vmin: 0 + ssp585: &diff_pet_default + script: droughts/diffmap.py + ancestors: + - "pet_ssp585/pet_pm" + - "spei_ssp585/spei" + save_models: True + save_mmm: True + plot_models: False + plot_mmm: True + <<: *ssp_period + plot_kwargs_diff: + cmap: YlOrRd + vmax: 6e-5 + vmin: 0 + pet_ssp245: + <<: *diff_pet_default + ancestors: ["pet_ssp245/pet_pm"] + pet_ssp126: + <<: *diff_pet_default + ancestors: ["pet_ssp126/pet_pm"] + spei_ssp585: &diff_spei_default + script: droughts/diffmap.py + ancestors: ["spei_ssp585/spei"] + <<: *ssp_period + save_models: True + save_mmm: True + plot_models: False + plot_mmm: True + plot_kwargs_diff: + cmap: RdYlBu + vmax: 4 + vmin: -4 + spei_ssp245: + <<: *diff_spei_default + ancestors: ["spei_ssp245/spei"] + spei_ssp126: + <<: *diff_spei_default + ancestors: ["spei_ssp126/spei"] + # wb_ssp126: &diff_wb_default + # script: droughts/diffmap.py + # ancestors: ["wb_ssp126/wb"] + # <<: *ssp_period + # save_models: False + # save_mmm: True + # plot_models: False + # plot_mmm: True + # wb_ssp245: + # <<: *diff_wb_default + # ancestors: ["wb_ssp245/wb"] + # wb_ssp585: + # <<: *diff_wb_default + # ancestors: ["wb_ssp585/wb"] + event_area: + scripts: + ssps: &event_area_global + subplots: True + figsize: [9, 1.3] + yticks: [0, 0.2, 0.4, 0.6, 0.8, 1] + ylabels: + historical-ssp126: SSP1-2.6 + historical-ssp245: SSP2-4.5 + historical-ssp585: SSP5-8.5 + intervals: + - [240, 481] + # - [780, 1021] + - [1560, null] + reference_dataset: MIROC6 # to pick timeseries from + ancestors: [spei_ssp585/spei, spei_ssp245/spei, spei_ssp126/spei] + script: droughts/event_area_timeseries.py + interval: 240 + latest_legend: true + plot_models: False + index: SPEI + plot_kwargs: + baseline: "zero" + shapefile: ar6_regions/IPCC-WGI-reference-regions-v4.shp + # regions: ['WCE'] + ssps_harvest: + <<: *event_area_global + regions: [] + regions: ®ions + scripts: + spei_ssp585: + ancestors: [diffmap/spei_ssp585] + select_metadata: + experiment: ssp585 + short_name: spei + dataset: MMM + diffmap_metric: diff + script: droughts/regional_hexagons.py + shapefile: ar6_regions/IPCC-WGI-reference-regions-v4.shp + exclude_regions: [] + statistics: [mean] + split_by: exp + labels: false + vmin: -0.4 + vmax: 0.4 + cmap: "RdYlBu" + strip_plot: true + trend_spei_scenarios: + ancestors: + - diffmap/spei_ssp126 + - diffmap/spei_ssp245 + - diffmap/spei_ssp585 + script: droughts/regional_hexagons.py + # TODO: use preproc instead of shapefile + shapefile: ar6_regions/IPCC-WGI-reference-regions-v4.shp + exclude_regions: [] + statistics: ['mean', 'std_dev'] + split_by: exp + vmin: -0.4 + vmax: 0.4 + cmap: "RdYlBu" + statistics: + scripts: + histogram: + script: droughts/distribution.py + clip_land: true + # comparison_period: 20 + ancestors: + - spei_ssp126/spei + - spei_ssp245/spei + - spei_ssp585/spei + strip_plots: true + plot_mmm: true + plot_models: false + plot_regions: true + plog_global: false + sort_regions: true + split_by: exp + plot_kwargs: + alpha: 0.75 + plot_properties: + ylim: (0, 0.45) + + + timeseries: + scripts: + scenarios: + script: droughts/timeseries_scenarios.py + ancestors: + - pet_ssp126/pr + - pet_ssp245/pr + - pet_ssp585/pr + - pet_ssp126/pet_pm + - pet_ssp245/pet_pm + - pet_ssp585/pet_pm + - spei_ssp126/spei + - spei_ssp245/spei + - spei_ssp585/spei + plot_models: False + strip_plots: True + save_mm: True + reuse_mm: False + subplots: True + smooth: True + figsize: [10.5, 1.5] + legend: + ssp126: "SSP1-2.6" + ssp245: "SSP2-4.5" + ssp585: "SSP5-8.5" + historical: "Historical" + ylabels: + pr: $PR$ [mm/day] + evspsblpot: $ET_0$ [mm/day] + spei: $SPEI$