From dc6369c0de1b5a609a5d655c2a9ed69fed9d50cb Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 21 Feb 2024 15:03:05 +0000 Subject: [PATCH 01/13] fix usage of map_blocks --- lib/iris/_lazy_data.py | 4 ++-- lib/iris/analysis/__init__.py | 10 ++++------ lib/iris/analysis/_area_weighted.py | 10 ++++------ lib/iris/analysis/_regrid.py | 8 ++++---- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 36c0825ad8..b7e9486ed2 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -450,7 +450,7 @@ def lazy_elementwise(lazy_array, elementwise_op): return da.map_blocks(elementwise_op, lazy_array, dtype=dtype) -def map_complete_blocks(src, func, dims, out_sizes): +def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): """Apply a function to complete blocks. Complete means that the data is not chunked along the chosen dimensions. @@ -488,4 +488,4 @@ def map_complete_blocks(src, func, dims, out_sizes): for dim, size in zip(dims, out_sizes): out_chunks[dim] = size - return data.map_blocks(func, chunks=out_chunks, dtype=src.dtype) + return data.map_blocks(func, *args, chunks=out_chunks, dtype=src.dtype, **kwargs) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 773e804a14..df069550b8 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1378,18 +1378,16 @@ def _percentile(data, percent, fast_percentile_method=False, **kwargs): percent = [percent] percent = np.array(percent) - # Perform the percentile calculation. - _partial_percentile = functools.partial( + result = iris._lazy_data.map_complete_blocks( + data, _calc_percentile, + (-1,), + percent.shape, percent=percent, fast_percentile_method=fast_percentile_method, **kwargs, ) - result = iris._lazy_data.map_complete_blocks( - data, _partial_percentile, (-1,), percent.shape - ) - # Check whether to reduce to a scalar result, as per the behaviour # of other aggregators. if result.shape == (1,): diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 263f83838c..8ee8509fcf 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -392,9 +392,11 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( tgt_shape = (len(grid_y.points), len(grid_x.points)) - # Calculate new data array for regridded cube. - regrid = functools.partial( + new_data = map_complete_blocks( + src_cube, _regrid_along_dims, + (src_y_dim, src_x_dim), + meshgrid_x.shape, x_dim=src_x_dim, y_dim=src_y_dim, weights=weights, @@ -402,10 +404,6 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( mdtol=mdtol, ) - new_data = map_complete_blocks( - src_cube, regrid, (src_y_dim, src_x_dim), meshgrid_x.shape - ) - # Wrap up the data as a Cube. _regrid_callback = functools.partial( diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py index b85265e5d9..70b28df1c7 100644 --- a/lib/iris/analysis/_regrid.py +++ b/lib/iris/analysis/_regrid.py @@ -932,9 +932,11 @@ def __call__(self, src): x_dim = src.coord_dims(src_x_coord)[0] y_dim = src.coord_dims(src_y_coord)[0] - # Define regrid function - regrid = functools.partial( + data = map_complete_blocks( + src, self._regrid, + (y_dim, x_dim), + sample_grid_x.shape, x_dim=x_dim, y_dim=y_dim, src_x_coord=src_x_coord, @@ -945,8 +947,6 @@ def __call__(self, src): extrapolation_mode=self._extrapolation_mode, ) - data = map_complete_blocks(src, regrid, (y_dim, x_dim), sample_grid_x.shape) - # Wrap up the data as a Cube. _regrid_callback = functools.partial( self._regrid, From 2d9867e4c5a0bc52ce357811a00e49448ead77dc Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 21 Feb 2024 15:45:02 +0000 Subject: [PATCH 02/13] fix map_blocks for non-lazy data --- lib/iris/_lazy_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index b7e9486ed2..89d25fdfc6 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -471,9 +471,9 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): data = src elif not hasattr(src, "has_lazy_data"): # Not a lazy array and not a cube. So treat as ordinary numpy array. - return func(src) + return func(src, *args, **kwargs) elif not src.has_lazy_data(): - return func(src.data) + return func(src.data, *args, **kwargs) else: data = src.lazy_data() From 65cc17d6b17975d0928e72e02c1574fbfb53b222 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 23 Feb 2024 13:30:25 +0000 Subject: [PATCH 03/13] add benchmark --- benchmarks/benchmarks/regridding.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index b311c94717..e304386ab9 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -14,6 +14,8 @@ from iris.analysis import AreaWeighted, PointInCell from iris.coords import AuxCoord +from . import TrackAddedMemoryAllocation, on_demand_benchmark + class HorizontalChunkedRegridding: def setup(self) -> None: @@ -51,6 +53,13 @@ def time_regrid_area_w_new_grid(self) -> None: # Realise data out.data + # Vulnerable to noise, so disabled by default. + @on_demand_benchmark + @TrackAddedMemoryAllocation.decorator + def track_addedmem_full_regrid(self): + for _ in range(8): + result = self.cube.regrid(self.template_cube, self.scheme_area_w) + class CurvilinearRegridding: def setup(self) -> None: From 3f549c9f25e72e9cddbbe668717f57f72dca2aa3 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 23 Feb 2024 14:20:43 +0000 Subject: [PATCH 04/13] unskip benchmark --- benchmarks/benchmarks/regridding.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index e304386ab9..a77bf616e6 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -53,8 +53,6 @@ def time_regrid_area_w_new_grid(self) -> None: # Realise data out.data - # Vulnerable to noise, so disabled by default. - @on_demand_benchmark @TrackAddedMemoryAllocation.decorator def track_addedmem_full_regrid(self): for _ in range(8): From 8742018d1116a8e2a032761764fe7dd682ef9460 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 26 Feb 2024 23:29:20 +0000 Subject: [PATCH 05/13] add benchmark --- benchmarks/benchmarks/regridding.py | 6 +++++- benchmarks/bm_runner.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index a77bf616e6..44f24ae144 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -9,6 +9,7 @@ from iris import tests # isort:skip import numpy as np +import tracemalloc import iris from iris.analysis import AreaWeighted, PointInCell @@ -53,10 +54,13 @@ def time_regrid_area_w_new_grid(self) -> None: # Realise data out.data - @TrackAddedMemoryAllocation.decorator def track_addedmem_full_regrid(self): + begin_snapshot = tracemalloc.take_snapshot() for _ in range(8): result = self.cube.regrid(self.template_cube, self.scheme_area_w) + end_snapshot = tracemalloc.take_snapshot() + diff_stats = end_snapshot.compare_to(begin_snapshot, 'lineno') + return diff_stats[0].size_diff class CurvilinearRegridding: diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py index 10dc5f469a..6c89c941e3 100644 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -467,6 +467,7 @@ def csperf(args: argparse.Namespace, run_type: Literal["cperf", "sperf"]) -> Non # Activate on demand benchmarks (C/SPerf are deactivated for # 'standard' runs). environ["ON_DEMAND_BENCHMARKS"] = "True" + environ["PYTHONTRACEMALLOC"] = 25 commit_range = "upstream/main^!" asv_command = ASV_HARNESS.format(posargs=commit_range) + f" --bench={run_type}" From 3571eeb4c51660fdb31090a738abf1626ac900a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 23:30:33 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmarks/benchmarks/regridding.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index 44f24ae144..cf6822a397 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -8,9 +8,10 @@ # importing anything else from iris import tests # isort:skip -import numpy as np import tracemalloc +import numpy as np + import iris from iris.analysis import AreaWeighted, PointInCell from iris.coords import AuxCoord @@ -59,7 +60,7 @@ def track_addedmem_full_regrid(self): for _ in range(8): result = self.cube.regrid(self.template_cube, self.scheme_area_w) end_snapshot = tracemalloc.take_snapshot() - diff_stats = end_snapshot.compare_to(begin_snapshot, 'lineno') + diff_stats = end_snapshot.compare_to(begin_snapshot, "lineno") return diff_stats[0].size_diff From b0017bbec327a14000a8de6096fc87e32bb257d7 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 27 Feb 2024 14:28:00 +0000 Subject: [PATCH 07/13] remove benchmarks --- benchmarks/benchmarks/regridding.py | 10 ---------- benchmarks/bm_runner.py | 1 - 2 files changed, 11 deletions(-) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index cf6822a397..29287c17f2 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -8,8 +8,6 @@ # importing anything else from iris import tests # isort:skip -import tracemalloc - import numpy as np import iris @@ -55,14 +53,6 @@ def time_regrid_area_w_new_grid(self) -> None: # Realise data out.data - def track_addedmem_full_regrid(self): - begin_snapshot = tracemalloc.take_snapshot() - for _ in range(8): - result = self.cube.regrid(self.template_cube, self.scheme_area_w) - end_snapshot = tracemalloc.take_snapshot() - diff_stats = end_snapshot.compare_to(begin_snapshot, "lineno") - return diff_stats[0].size_diff - class CurvilinearRegridding: def setup(self) -> None: diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py index 6c89c941e3..10dc5f469a 100644 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -467,7 +467,6 @@ def csperf(args: argparse.Namespace, run_type: Literal["cperf", "sperf"]) -> Non # Activate on demand benchmarks (C/SPerf are deactivated for # 'standard' runs). environ["ON_DEMAND_BENCHMARKS"] = "True" - environ["PYTHONTRACEMALLOC"] = 25 commit_range = "upstream/main^!" asv_command = ASV_HARNESS.format(posargs=commit_range) + f" --bench={run_type}" From 9a7ec2c4513f101fe47140f726e5b36bded33b63 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 28 Feb 2024 15:29:37 +0000 Subject: [PATCH 08/13] remove unnecessary import --- benchmarks/benchmarks/regridding.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py index 29287c17f2..b311c94717 100644 --- a/benchmarks/benchmarks/regridding.py +++ b/benchmarks/benchmarks/regridding.py @@ -14,8 +14,6 @@ from iris.analysis import AreaWeighted, PointInCell from iris.coords import AuxCoord -from . import TrackAddedMemoryAllocation, on_demand_benchmark - class HorizontalChunkedRegridding: def setup(self) -> None: From 0457df7bfc2d62aba4076b84f369a80904b66d75 Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 1 Mar 2024 15:36:31 +0000 Subject: [PATCH 09/13] What's New entry. --- docs/src/whatsnew/latest.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 76745f8667..9150568316 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -51,6 +51,10 @@ This document explains the changes made to Iris for this release #. `@bouweandela`_ made :func:`iris.util.rolling_window` work with lazy arrays. (:pull:`5775`) +#. `@stephenworsley`_ fixed a potential memory leak for Iris uses of + :func:`dask.array.map_blocks`; known specifically to be a problem in the + :class:`iris.analysis.AreaWeighted` regridder. (:pull:`5767`) + 🔥 Deprecations =============== From 70c5435156fc072819722f2f3781d87b88f36dfe Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 1 Mar 2024 16:01:43 +0000 Subject: [PATCH 10/13] map_complete_blocks docstring. --- lib/iris/_lazy_data.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 63cd3e7cbe..80a4d8a001 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -454,6 +454,7 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): """Apply a function to complete blocks. Complete means that the data is not chunked along the chosen dimensions. + Uses :func:`dask.array.map_blocks` to implement the mapping. Parameters ---------- @@ -465,6 +466,14 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): Dimensions that cannot be chunked. out_sizes : tuple of int Output size of dimensions that cannot be chunked. + *args : tuple + Additional arguments to pass to `func`. + **kwargs : dict + Additional keyword arguments to pass to `func`. + + See Also + -------- + :func:`dask.array.map_blocks` : The function used for the mapping. """ if is_lazy_data(src): From 6fb3c6c00ba68bb20c1e43e8ec0e2dec9022e98b Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 1 Mar 2024 16:18:29 +0000 Subject: [PATCH 11/13] map_complete_blocks returns. --- lib/iris/_lazy_data.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 80a4d8a001..5837af3f0e 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -471,30 +471,42 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): **kwargs : dict Additional keyword arguments to pass to `func`. + Returns + ------- + Array-like + See Also -------- :func:`dask.array.map_blocks` : The function used for the mapping. """ + data = None + result = None + if is_lazy_data(src): data = src elif not hasattr(src, "has_lazy_data"): # Not a lazy array and not a cube. So treat as ordinary numpy array. - return func(src, *args, **kwargs) + result = func(src, *args, **kwargs) elif not src.has_lazy_data(): - return func(src.data, *args, **kwargs) + result = func(src.data, *args, **kwargs) else: data = src.lazy_data() - # Ensure dims are not chunked - in_chunks = list(data.chunks) - for dim in dims: - in_chunks[dim] = src.shape[dim] - data = data.rechunk(in_chunks) + if result is not None and data is not None: + # Ensure dims are not chunked + in_chunks = list(data.chunks) + for dim in dims: + in_chunks[dim] = src.shape[dim] + data = data.rechunk(in_chunks) + + # Determine output chunks + out_chunks = list(data.chunks) + for dim, size in zip(dims, out_sizes): + out_chunks[dim] = size - # Determine output chunks - out_chunks = list(data.chunks) - for dim, size in zip(dims, out_sizes): - out_chunks[dim] = size + result = data.map_blocks( + func, *args, chunks=out_chunks, dtype=src.dtype, **kwargs + ) - return data.map_blocks(func, *args, chunks=out_chunks, dtype=src.dtype, **kwargs) + return result From 10db85a4184486dce88ac2910a0698330ae4675d Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 1 Mar 2024 16:24:23 +0000 Subject: [PATCH 12/13] Typo. --- lib/iris/_lazy_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 5837af3f0e..139dd3fe33 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -493,7 +493,7 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): else: data = src.lazy_data() - if result is not None and data is not None: + if result is None and data is None: # Ensure dims are not chunked in_chunks = list(data.chunks) for dim in dims: From d91c7c6d0dd032b78234b1c8cb257a9088bbdf20 Mon Sep 17 00:00:00 2001 From: Martin Yeo Date: Fri, 1 Mar 2024 16:31:49 +0000 Subject: [PATCH 13/13] Typo. --- lib/iris/_lazy_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 139dd3fe33..40984248d1 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -493,7 +493,7 @@ def map_complete_blocks(src, func, dims, out_sizes, *args, **kwargs): else: data = src.lazy_data() - if result is None and data is None: + if result is None and data is not None: # Ensure dims are not chunked in_chunks = list(data.chunks) for dim in dims: