From 403ea2870c91c8e86cb2ef53b28d751e5e46705f Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 18:08:38 +0100 Subject: [PATCH 1/9] Start on fixing fix_file --- esmvalcore/cmor/fix.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 4f66ffda25..5c6fb7e240 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -9,15 +9,16 @@ import logging from collections import defaultdict +from pathlib import Path from typing import TYPE_CHECKING, Any from iris.cube import CubeList from esmvalcore.cmor._fixes.fix import Fix +from esmvalcore.io.local import LocalFile if TYPE_CHECKING: from collections.abc import Sequence - from pathlib import Path import ncdata import xarray as xr @@ -84,6 +85,19 @@ def fix_file( # noqa: PLR0913 Fixed data or a path to them. """ + # TODO: the code in `esmvalcore.preprocessor.preprocess` called from + # `esmvalcore.dataset.Dataset.load` currently relies on this function + # returning an esmvalcore.io.local.LocalFile (or an iris.cube.Cube or a + # list of those). Maybe this function could be updated so it returns a + # CubeList instead of a xr.Dataset or ncdata.NcData object? + # All fix_file methods currently seem to return a Path, so this is not a + # problem just yet. + if not isinstance(file, Path): + # Skip this function for anything that is not a path to a file. + # TODO: it would be nice to make this work for any + # `esmvalcore.io.DataElement`. + return file + # Update extra_facets with variable information given as regular arguments # to this function extra_facets.update( @@ -96,6 +110,7 @@ def fix_file( # noqa: PLR0913 }, ) + result = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -105,12 +120,20 @@ def fix_file( # noqa: PLR0913 session=session, frequency=frequency, ): - file = fix.fix_file( - file, + result = fix.fix_file( + result, output_dir, add_unique_suffix=add_unique_suffix, ) - return file + + if isinstance(file, LocalFile): + result = LocalFile(result) + result.facets = file.facets + result.ignore_warnings = file.ignore_warnings + result.to_iris() + file.attributes = result.attributes + + return result def fix_metadata( From 8610779f05a623e2a4b17c3900364afd0f02f05e Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 21:11:51 +0100 Subject: [PATCH 2/9] Improve compatibility with esmvalcore.preprocessor --- esmvalcore/cmor/fix.py | 46 +++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 5c6fb7e240..5f093a436a 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -20,8 +20,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - import ncdata - import xarray as xr from iris.cube import Cube from esmvalcore.config import Session @@ -40,7 +38,7 @@ def fix_file( # noqa: PLR0913 session: Session | None = None, frequency: str | None = None, **extra_facets: Any, -) -> str | Path | xr.Dataset | ncdata.NcData: +) -> Path | CubeList: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data with @@ -52,7 +50,7 @@ def fix_file( # noqa: PLR0913 ------- A path should only be returned if it points to the original (unchanged) file (i.e., a fix was not necessary). If a fix is necessary, this function - should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object. + should return a :class:`~iris.cube.CubeList`. Under no circumstances a copy of the input data should be created (this is very inefficient). @@ -81,21 +79,13 @@ def fix_file( # noqa: PLR0913 Returns ------- - str | pathlib.Path | xr.Dataset | ncdata.NcData: + : Fixed data or a path to them. """ - # TODO: the code in `esmvalcore.preprocessor.preprocess` called from - # `esmvalcore.dataset.Dataset.load` currently relies on this function - # returning an esmvalcore.io.local.LocalFile (or an iris.cube.Cube or a - # list of those). Maybe this function could be updated so it returns a - # CubeList instead of a xr.Dataset or ncdata.NcData object? - # All fix_file methods currently seem to return a Path, so this is not a - # problem just yet. if not isinstance(file, Path): - # Skip this function for anything that is not a path to a file. - # TODO: it would be nice to make this work for any - # `esmvalcore.io.DataElement`. + # Skip this function for `esmvalcore.io.DataElement` that is not a path + # to a file. return file # Update extra_facets with variable information given as regular arguments @@ -110,7 +100,7 @@ def fix_file( # noqa: PLR0913 }, ) - result = Path(file) + result: Path | CubeList = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -127,11 +117,25 @@ def fix_file( # noqa: PLR0913 ) if isinstance(file, LocalFile): - result = LocalFile(result) - result.facets = file.facets - result.ignore_warnings = file.ignore_warnings - result.to_iris() - file.attributes = result.attributes + # This happens when this function is called from + # `esmvalcore.dataset.Dataset.load`. + if isinstance(result, Path): + if result == file: + # No fixes have been applied, return the original file. + result = file + else: + # The file has been fixed and the result is a path to the fixed + # file. The result needs to be loaded to read the global + # attributes for recording provenance. + fixed_file = LocalFile(result) + fixed_file.facets = file.facets + fixed_file.ignore_warnings = file.ignore_warnings + result = fixed_file.to_iris() + + if isinstance(result, CubeList): + # Set the attributes for recording provenance here because + # to_iris will not be called on the original file. + file.attributes = result[0].attributes.globals.copy() return result From 41d59cd4f9596c2f8698a65f6ef5fcc27336b4ee Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 19 Feb 2026 21:30:50 +0100 Subject: [PATCH 3/9] Use correct type for function argument --- tests/unit/cmor/test_fix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/cmor/test_fix.py b/tests/unit/cmor/test_fix.py index 7b5fc3d0ba..c54bbba7b7 100644 --- a/tests/unit/cmor/test_fix.py +++ b/tests/unit/cmor/test_fix.py @@ -14,7 +14,7 @@ class TestFixFile: @pytest.fixture(autouse=True) def setUp(self): """Prepare for testing.""" - self.filename = "filename" + self.filename = Path("filename") self.mock_fix = Mock() self.mock_fix.fix_file.return_value = "new_filename" self.expected_get_fixes_call = { @@ -40,7 +40,7 @@ def test_fix(self): return_value=[self.mock_fix], ) as mock_get_fixes: file_returned = fix_file( - file="filename", + file=Path("filename"), short_name="short_name", project="project", dataset="model", @@ -62,7 +62,7 @@ def test_nofix(self): return_value=[], ) as mock_get_fixes: file_returned = fix_file( - file="filename", + file=Path("filename"), short_name="short_name", project="project", dataset="model", From 68ce98aa9f351e6dcfe7abbdaff5a601f9f77df1 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:39:10 +0100 Subject: [PATCH 4/9] Further updates to type hints --- esmvalcore/cmor/_fixes/fix.py | 8 +++----- esmvalcore/cmor/fix.py | 13 +++++-------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 2fbd9856fb..cb61fabae6 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -35,8 +35,6 @@ if TYPE_CHECKING: from collections.abc import Sequence - import ncdata - import xarray as xr from iris.coords import Coord from iris.cube import Cube @@ -84,10 +82,10 @@ def __init__( def fix_file( self, - file: str | Path | xr.Dataset | ncdata.NcData, + file: Path, output_dir: Path, # noqa: ARG002 add_unique_suffix: bool = False, # noqa: ARG002 - ) -> str | Path | xr.Dataset | ncdata.NcData: + ) -> Path | Sequence[Cube]: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data @@ -116,7 +114,7 @@ def fix_file( Returns ------- - str | pathlib.Path | xr.Dataset | ncdata.NcData: + : Fixed data or a path to them. """ diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 5f093a436a..3f6bc4f60d 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -9,19 +9,16 @@ import logging from collections import defaultdict +from collections.abc import Sequence from pathlib import Path from typing import TYPE_CHECKING, Any -from iris.cube import CubeList +from iris.cube import Cube, CubeList from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.io.local import LocalFile if TYPE_CHECKING: - from collections.abc import Sequence - - from iris.cube import Cube - from esmvalcore.config import Session logger = logging.getLogger(__name__) @@ -38,7 +35,7 @@ def fix_file( # noqa: PLR0913 session: Session | None = None, frequency: str | None = None, **extra_facets: Any, -) -> Path | CubeList: +) -> Path | Sequence[Cube]: """Fix files before loading them into a :class:`~iris.cube.CubeList`. This is mainly intended to fix errors that prevent loading the data with @@ -100,7 +97,7 @@ def fix_file( # noqa: PLR0913 }, ) - result: Path | CubeList = Path(file) + result: Path | Sequence[Cube] = Path(file) for fix in Fix.get_fixes( project=project, dataset=dataset, @@ -132,7 +129,7 @@ def fix_file( # noqa: PLR0913 fixed_file.ignore_warnings = file.ignore_warnings result = fixed_file.to_iris() - if isinstance(result, CubeList): + if isinstance(result, Sequence) and isinstance(result[0], Cube): # Set the attributes for recording provenance here because # to_iris will not be called on the original file. file.attributes = result[0].attributes.globals.copy() From 8334fd09796060527d6d32357210a5efc66b4299 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:43:11 +0100 Subject: [PATCH 5/9] Use correct derived coordinate loading in iris --- esmvalcore/config/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/config/__init__.py b/esmvalcore/config/__init__.py index 333c3e30e1..d3614283b4 100644 --- a/esmvalcore/config/__init__.py +++ b/esmvalcore/config/__init__.py @@ -29,6 +29,7 @@ for attr, value in { "save_split_attrs": True, "date_microseconds": True, + "derived_bounds": True, }.items(): with contextlib.suppress(AttributeError): setattr(iris.FUTURE, attr, value) From 951ea2906081abf89204c19757307cb55ac16d57 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:45:42 +0100 Subject: [PATCH 6/9] Remove workaround for iris bug --- esmvalcore/preprocessor/_concatenate.py | 2 - esmvalcore/preprocessor/_regrid.py | 7 +--- esmvalcore/preprocessor/_shared.py | 36 ---------------- tests/unit/preprocessor/test_shared.py | 56 ------------------------- 4 files changed, 1 insertion(+), 100 deletions(-) diff --git a/esmvalcore/preprocessor/_concatenate.py b/esmvalcore/preprocessor/_concatenate.py index 6d027ed137..626c4d341f 100644 --- a/esmvalcore/preprocessor/_concatenate.py +++ b/esmvalcore/preprocessor/_concatenate.py @@ -13,7 +13,6 @@ from esmvalcore.cmor.check import CheckLevels from esmvalcore.io.esgf.facets import FACETS from esmvalcore.iris_helpers import merge_cube_attributes -from esmvalcore.preprocessor._shared import _rechunk_aux_factory_dependencies if TYPE_CHECKING: from collections.abc import Iterable, Sequence @@ -282,7 +281,6 @@ def concatenate( cubes = _sort_cubes_by_time(cubes) _fix_calendars(cubes) cubes = _remove_time_overlaps(cubes) - cubes = [_rechunk_aux_factory_dependencies(cube) for cube in cubes] result = _concatenate_cubes(cubes, check_level=check_level) if len(result) == 1: diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index 461e75002a..db455d2ccd 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -35,7 +35,6 @@ ) from esmvalcore.iris_helpers import has_irregular_grid, has_unstructured_grid from esmvalcore.preprocessor._shared import ( - _rechunk_aux_factory_dependencies, get_array_module, get_dims_along_axes, preserve_float_dtype, @@ -1324,18 +1323,14 @@ def extract_levels( # Add extra coordinates coord_names = [coord.name() for coord in cube.coords()] - if coordinate in coord_names: - cube = _rechunk_aux_factory_dependencies(cube, coordinate) - else: + if coordinate not in coord_names: # Try to calculate air_pressure from altitude coordinate or # vice versa using US standard atmosphere for conversion. if coordinate == "air_pressure" and "altitude" in coord_names: # Calculate pressure level coordinate from altitude. - cube = _rechunk_aux_factory_dependencies(cube, "altitude") add_plev_from_altitude(cube) if coordinate == "altitude" and "air_pressure" in coord_names: # Calculate altitude coordinate from pressure levels. - cube = _rechunk_aux_factory_dependencies(cube, "air_pressure") add_altitude_from_plev(cube) src_levels = cube.coord(coordinate) diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py index ecf58028e2..073d1f9e44 100644 --- a/esmvalcore/preprocessor/_shared.py +++ b/esmvalcore/preprocessor/_shared.py @@ -664,39 +664,3 @@ def apply_mask( array_module = get_array_module(mask, array) return array_module.ma.masked_where(mask, array) - - -def _rechunk_aux_factory_dependencies( - cube: iris.cube.Cube, - coord_name: str | None = None, -) -> iris.cube.Cube: - """Rechunk coordinate aux factory dependencies. - - This ensures that the resulting coordinate has reasonably sized - chunks that are aligned with the cube data for optimal computational - performance. - """ - # Workaround for https://github.com/SciTools/iris/issues/5457 - if coord_name is None: - factories = cube.aux_factories - else: - try: - factories = [cube.aux_factory(coord_name)] - except iris.exceptions.CoordinateNotFoundError: - return cube - - cube = cube.copy() - cube_chunks = cube.lazy_data().chunks - for factory in factories: - for orig_coord in factory.dependencies.values(): - coord_dims = cube.coord_dims(orig_coord) - if coord_dims: - coord = orig_coord.copy() - chunks = tuple(cube_chunks[i] for i in coord_dims) - coord.points = coord.lazy_points().rechunk(chunks) - if coord.has_bounds(): - coord.bounds = coord.lazy_bounds().rechunk( - (*chunks, None), - ) - cube.replace_coord(coord) - return cube diff --git a/tests/unit/preprocessor/test_shared.py b/tests/unit/preprocessor/test_shared.py index e69d610247..ef37e05ea9 100644 --- a/tests/unit/preprocessor/test_shared.py +++ b/tests/unit/preprocessor/test_shared.py @@ -9,7 +9,6 @@ import numpy as np import pytest from cf_units import Unit -from iris.aux_factory import HybridPressureFactory from iris.coords import AuxCoord, DimCoord from iris.cube import Cube @@ -17,7 +16,6 @@ from esmvalcore.preprocessor._shared import ( _compute_area_weights, _group_products, - _rechunk_aux_factory_dependencies, aggregator_accept_weights, apply_mask, get_array_module, @@ -471,60 +469,6 @@ def test_apply_mask(mask, array, dim_map, expected): assert_array_equal(result, expected) -def test_rechunk_aux_factory_dependencies(): - delta = AuxCoord( - points=np.array([0.0, 1.0, 2.0], dtype=np.float64), - bounds=np.array( - [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]], - dtype=np.float64, - ), - long_name="level_pressure", - units="Pa", - ) - sigma = AuxCoord( - np.array([1.0, 0.9, 0.8], dtype=np.float64), - long_name="sigma", - units="1", - ) - surface_air_pressure = AuxCoord( - np.arange(4).astype(np.float64).reshape(2, 2), - long_name="surface_air_pressure", - units="Pa", - ) - factory = HybridPressureFactory( - delta=delta, - sigma=sigma, - surface_air_pressure=surface_air_pressure, - ) - - cube = Cube( - da.asarray( - np.arange(3 * 2 * 2).astype(np.float32).reshape(3, 2, 2), - chunks=(1, 2, 2), - ), - ) - cube.add_aux_coord(delta, 0) - cube.add_aux_coord(sigma, 0) - cube.add_aux_coord(surface_air_pressure, [1, 2]) - cube.add_aux_factory(factory) - - result = _rechunk_aux_factory_dependencies(cube, "air_pressure") - - # Check that the 'air_pressure' coordinate of the resulting cube has been - # rechunked: - assert result.coord("air_pressure").core_points().chunks == ( - (1, 1, 1), - (2,), - (2,), - ) - # Check that the original cube has not been modified: - assert cube.coord("air_pressure").core_points().chunks == ( - (3,), - (2,), - (2,), - ) - - def get_0d_time(): """Get 0D time coordinate.""" return AuxCoord( From e5e2e9d199ba83c67b6688cecd753bba5899897d Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 12:46:09 +0100 Subject: [PATCH 7/9] Avoid copying input data in CESM2 fixes --- esmvalcore/cmor/_fixes/cmip6/cesm2.py | 109 +++++++++----------- esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 52 +++++++--- 2 files changed, 86 insertions(+), 75 deletions(-) diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 7279d0aa54..6019a837cd 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -2,13 +2,13 @@ from __future__ import annotations -from shutil import copyfile from typing import TYPE_CHECKING import iris import iris.coords +import ncdata +import ncdata.netcdf4 import numpy as np -from netCDF4 import Dataset from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor._fixes.fix import Fix @@ -19,40 +19,36 @@ add_scalar_typesea_coord, fix_ocean_depth_coord, ) +from esmvalcore.iris_helpers import dataset_to_iris if TYPE_CHECKING: + from collections.abc import Sequence from pathlib import Path + from iris.cube import Cube + class Cl(Fix): """Fixes for ``cl``.""" - def _fix_formula_terms( - self, - file: str | Path, - output_dir: str | Path, - add_unique_suffix: bool = False, - ) -> Path: + @staticmethod + def _fix_formula_terms(dataset: ncdata.NcData) -> None: """Fix ``formula_terms`` attribute.""" - new_path = self.get_fixed_filepath( - output_dir, - file, - add_unique_suffix=add_unique_suffix, + lev = dataset.variables["lev"] + lev.set_attrval("formula_terms", "p0: p0 a: a b: b ps: ps") + lev.set_attrval( + "standard_name", + "atmosphere_hybrid_sigma_pressure_coordinate", ) - copyfile(file, new_path) - with Dataset(new_path, mode="a") as dataset: - dataset.variables["lev"].formula_terms = "p0: p0 a: a b: b ps: ps" - dataset.variables[ - "lev" - ].standard_name = "atmosphere_hybrid_sigma_pressure_coordinate" - return new_path + lev.set_attrval("units", "1") + dataset.variables["lev_bnds"].attributes.pop("units") def fix_file( self, - file: str | Path, - output_dir: str | Path, - add_unique_suffix: bool = False, - ) -> Path: + file: Path, + output_dir: Path, # noqa: ARG002 + add_unique_suffix: bool = False, # noqa: ARG002 + ) -> Path | Sequence[Cube]: """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -79,45 +75,38 @@ def fix_file( Path to the fixed file. """ - new_path = self._fix_formula_terms( + dataset = ncdata.netcdf4.from_nc4( file, - output_dir, - add_unique_suffix=add_unique_suffix, + # Use iris-style chunks to avoid mismatching chunks between data + # and derived coordinates, as the latter are automatically rechunked + # by iris. + dim_chunks={ + "time": "auto", + "lev": None, + "lat": None, + "lon": None, + "nbnd": None, + }, ) - with Dataset(new_path, mode="a") as dataset: - dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][ - ::-1, - :, - ] - dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][ - ::-1, - :, - ] - return new_path - - def fix_metadata(self, cubes): - """Fix ``atmosphere_hybrid_sigma_pressure_coordinate``. - - See discussion in #882 for more details on that. - - Parameters - ---------- - cubes : iris.cube.CubeList - Input cubes. - - Returns - ------- - iris.cube.CubeList - - """ - cube = self.get_cube_from_list(cubes) - lev_coord = cube.coord(var_name="lev") - a_coord = cube.coord(var_name="a") - b_coord = cube.coord(var_name="b") - lev_coord.points = a_coord.core_points() + b_coord.core_points() - lev_coord.bounds = a_coord.core_bounds() + b_coord.core_bounds() - lev_coord.units = "1" - return cubes + self._fix_formula_terms(dataset) + + # Correct order of bounds data + a_bnds = dataset.variables["a_bnds"] + a_bnds.data = a_bnds.data[::-1, :] + b_bnds = dataset.variables["b_bnds"] + b_bnds.data = b_bnds.data[::-1, :] + + # Correct lev and lev_bnds data + lev = dataset.variables["lev"] + lev.data = dataset.variables["a"].data + dataset.variables["b"].data + lev_bnds = dataset.variables["lev_bnds"] + lev_bnds.data = ( + dataset.variables["a_bnds"].data + dataset.variables["b_bnds"].data + ) + # Remove 'title' attribute that duplicates long name + for var_name in dataset.variables: + dataset.variables[var_name].attributes.pop("title", None) + return [self.get_cube_from_list(dataset_to_iris(dataset, file))] Cli = Cl diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index 35626f14ce..aea71adb05 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -1,8 +1,13 @@ """Fixes for CESM2-WACCM model.""" -from netCDF4 import Dataset +from __future__ import annotations + +from typing import TYPE_CHECKING + +import ncdata.netcdf4 from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.iris_helpers import dataset_to_iris from .cesm2 import Cl as BaseCl from .cesm2 import Fgco2 as BaseFgco2 @@ -12,11 +17,22 @@ from .cesm2 import Tasmax as BaseTasmax from .cesm2 import Tasmin as BaseTasmin +if TYPE_CHECKING: + from collections.abc import Sequence + from pathlib import Path + + from iris.cube import Cube + class Cl(BaseCl): """Fixes for cl.""" - def fix_file(self, file, output_dir, add_unique_suffix=False): + def fix_file( + self, + file: Path, + output_dir: Path, # noqa: ARG002 + add_unique_suffix: bool = False, # noqa: ARG002 + ) -> Path | Sequence[Cube]: """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -43,21 +59,27 @@ def fix_file(self, file, output_dir, add_unique_suffix=False): Path to the fixed file. """ - new_path = self._fix_formula_terms( + dataset = ncdata.netcdf4.from_nc4( file, - output_dir, - add_unique_suffix=add_unique_suffix, + # Use iris-style chunks to avoid mismatching chunks between data + # and derived coordinates, as the latter are automatically rechunked + # by iris. + dim_chunks={ + "time": "auto", + "lev": None, + "lat": None, + "lon": None, + "nbnd": None, + }, ) - with Dataset(new_path, mode="a") as dataset: - dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][ - :, - ::-1, - ] - dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][ - :, - ::-1, - ] - return new_path + self._fix_formula_terms(dataset) + + # Correct order of bounds data + a_bnds = dataset.variables["a_bnds"] + a_bnds.data = a_bnds.data[:, ::-1] + b_bnds = dataset.variables["b_bnds"] + b_bnds.data = b_bnds.data[:, ::-1] + return [self.get_cube_from_list(dataset_to_iris(dataset, file))] Cli = Cl From d56664d7ff6465c20a7aa809e5fb2a2ce71ccf8a Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 14:11:20 +0100 Subject: [PATCH 8/9] Update dependencies --- environment.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 76a30d9f91..0a4cc7c198 100644 --- a/environment.yml +++ b/environment.yml @@ -21,7 +21,7 @@ dependencies: - humanfriendly - intake-esgf >=2025.10.22 - intake-esm - - iris + - iris >=3.13 - iris-esmf-regrid >=0.11.0 - iris-grib >=0.20.0 # github.com/ESMValGroup/ESMValCore/issues/2535 - isodate >=0.7.0 # incompatible with very old 0.6.1 diff --git a/pyproject.toml b/pyproject.toml index 92b7e92757..0341da85fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dependencies = [ "requests", "rich", "scipy>=1.6", - "scitools-iris", + "scitools-iris>=3.13", "shapely>=2.0.0", "stratify>=0.3", "xarray", From 6e5eb70ef88929c26c78b615d3afd4903354cf7c Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 23 Feb 2026 14:13:45 +0100 Subject: [PATCH 9/9] Update tests for CESM2 --- .../cmor/_fixes/cmip6/test_cesm2.py | 120 +++--------------- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 35 +++-- 2 files changed, 36 insertions(+), 119 deletions(-) diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index e9786826ca..e93049d410 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -1,7 +1,9 @@ """Tests for the fixes of CESM2.""" -import os -import unittest.mock +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING import iris import iris.cube @@ -27,7 +29,10 @@ from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor._fixes.fix import GenericFix from esmvalcore.cmor.fix import Fix -from esmvalcore.cmor.table import get_var_info +from esmvalcore.cmor.table import VariableInfo, get_var_info + +if TYPE_CHECKING: + from pathlib import Path def test_get_cl_fix(): @@ -66,11 +71,7 @@ def test_get_cl_fix(): ) -@unittest.mock.patch( - "esmvalcore.cmor._fixes.cmip6.cesm2.Fix.get_fixed_filepath", - autospec=True, -) -def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): +def test_cl_fix_file(tmp_path: Path, test_data_path: Path) -> None: """Test ``fix_file`` for ``cl``.""" nc_path = test_data_path / "cesm2_cl.nc" cubes = iris.load(str(nc_path)) @@ -89,24 +90,17 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): assert not raw_cube.coords("air_pressure") # Apply fix - mock_get_filepath.return_value = os.path.join( - tmp_path, - "fixed_cesm2_cl.nc", - ) - fix = Cl(None) - fixed_file = fix.fix_file(nc_path, tmp_path) - mock_get_filepath.assert_called_once_with( - tmp_path, - nc_path, - add_unique_suffix=False, - ) - fixed_cubes = iris.load(fixed_file) - assert len(fixed_cubes) == 2 - var_names = [cube.var_name for cube in fixed_cubes] - assert "cl" in var_names - assert "ps" in var_names - fixed_cl_cube = fixed_cubes.extract_cube( - "cloud_area_fraction_in_atmosphere_layer", + vardef = get_var_info("CMIP6", "Amon", "cl") + assert isinstance(vardef, VariableInfo) + fix = Cl(vardef) + fixed_cubes = fix.fix_file(nc_path, tmp_path) + assert isinstance(fixed_cubes, Sequence) + assert len(fixed_cubes) == 1 + fixed_cl_cube = fixed_cubes[0] + assert fixed_cl_cube.var_name == "cl" + assert ( + fixed_cl_cube.standard_name + == "cloud_area_fraction_in_atmosphere_layer" ) fixed_air_pressure_coord = fixed_cl_cube.coord("air_pressure") assert fixed_air_pressure_coord.points is not None @@ -121,80 +115,6 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): ) -@pytest.fixture -def cl_cubes(): - """``cl`` cube.""" - time_coord = iris.coords.DimCoord( - [0.0, 1.0], - var_name="time", - standard_name="time", - units="days since 1850-01-01 00:00:00", - ) - a_coord = iris.coords.AuxCoord( - [0.1, 0.2, 0.1], - bounds=[[0.0, 0.15], [0.15, 0.25], [0.25, 0.0]], - var_name="a", - units="1", - ) - b_coord = iris.coords.AuxCoord( - [0.9, 0.3, 0.1], - bounds=[[1.0, 0.8], [0.8, 0.25], [0.25, 0.0]], - var_name="b", - units="1", - ) - lev_coord = iris.coords.DimCoord( - [999.0, 99.0, 9.0], - var_name="lev", - standard_name="atmosphere_hybrid_sigma_pressure_coordinate", - units="hPa", - attributes={"positive": "up"}, - ) - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], - var_name="lat", - standard_name="latitude", - units="degrees", - ) - lon_coord = iris.coords.DimCoord( - [0.0, 1.0], - var_name="lon", - standard_name="longitude", - units="degrees", - ) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - cube = iris.cube.Cube( - np.arange(2 * 3 * 2 * 2).reshape(2, 3, 2, 2), - var_name="cl", - standard_name="cloud_area_fraction_in_atmosphere_layer", - units="%", - dim_coords_and_dims=coord_specs, - aux_coords_and_dims=[(a_coord, 1), (b_coord, 1)], - ) - return iris.cube.CubeList([cube]) - - -def test_cl_fix_metadata(cl_cubes): - """Test ``fix_metadata`` for ``cl``.""" - vardef = get_var_info("CMIP6", "Amon", "cl") - fix = Cl(vardef) - out_cubes = fix.fix_metadata(cl_cubes) - out_cube = out_cubes.extract_cube( - "cloud_area_fraction_in_atmosphere_layer", - ) - lev_coord = out_cube.coord(var_name="lev") - assert lev_coord.units == "1" - np.testing.assert_allclose(lev_coord.points, [1.0, 0.5, 0.2]) - np.testing.assert_allclose( - lev_coord.bounds, - [[1.0, 0.95], [0.95, 0.5], [0.5, 0.0]], - ) - - def test_get_cli_fix(): """Test getting of fix.""" fix = Fix.get_fixes("CMIP6", "CESM2", "Amon", "cli") diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 0b1c4054af..096e8da088 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -1,7 +1,9 @@ """Tests for the fixes of CESM2-WACCM.""" -import os -import unittest.mock +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING import iris import numpy as np @@ -28,6 +30,10 @@ from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor._fixes.fix import GenericFix from esmvalcore.cmor.fix import Fix +from esmvalcore.cmor.table import VariableInfo, get_var_info + +if TYPE_CHECKING: + from pathlib import Path def test_get_cl_fix(): @@ -41,25 +47,16 @@ def test_cl_fix(): assert issubclass(Cl, BaseCl) -@unittest.mock.patch( - "esmvalcore.cmor._fixes.cmip6.cesm2.Fix.get_fixed_filepath", - autospec=True, -) -def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): +def test_cl_fix_file(tmp_path: Path, test_data_path: Path) -> None: """Test ``fix_file`` for ``cl``.""" nc_path = test_data_path / "cesm2_waccm_cl.nc" - mock_get_filepath.return_value = os.path.join( - tmp_path, - "fixed_cesm2_waccm_cl.nc", - ) - fix = Cl(None) - fixed_file = fix.fix_file(nc_path, tmp_path) - mock_get_filepath.assert_called_once_with( - tmp_path, - nc_path, - add_unique_suffix=False, - ) - fixed_cube = iris.load_cube(fixed_file) + vardef = get_var_info("CMIP6", "Amon", "cl") + assert isinstance(vardef, VariableInfo) + fix = Cl(vardef) + fixed_cubes = fix.fix_file(nc_path, tmp_path) + assert isinstance(fixed_cubes, Sequence) + assert len(fixed_cubes) == 1 + fixed_cube = fixed_cubes[0] lev_coord = fixed_cube.coord(var_name="lev") a_coord = fixed_cube.coord(var_name="a") b_coord = fixed_cube.coord(var_name="b")