From 198f6caf2b5b9862e1aed4894fc6dc61f017cb3c Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 29 Sep 2025 22:08:49 +0100 Subject: [PATCH 01/12] improve memory for map_complete_blocks --- lib/iris/_lazy_data.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 992789ed90..172c52f978 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -640,17 +640,39 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): else: data = src.lazy_data() + shape = list(src.shape) + if result is None and data is not None: # Ensure dims are not chunked in_chunks = list(data.chunks) for dim in dims: - in_chunks[dim] = src.shape[dim] - data = data.rechunk(in_chunks) + in_chunks[dim] = (src.shape[dim],) # Determine output chunks - out_chunks = list(data.chunks) + out_chunks = in_chunks.copy() for dim, size in zip(dims, out_sizes): - out_chunks[dim] = size + out_chunks[dim] = (size,) + shape[dim] = size + + max_outchunks = [max(chunk) for chunk in out_chunks] + df = [False]*len(max_outchunks) + for dim in dims: + df[dim] = True + df = tuple(df) + opt_outchunks = _optimum_chunksize(max_outchunks, shape, dtype=dtype, dims_fixed=df) + for i, (chunk, max_out, opt_out) in enumerate(zip(out_chunks,max_outchunks,opt_outchunks)): + if opt_out < max_out: + new_chunks = [] + for c in chunk: + new_chunks.extend((c//opt_out)*[opt_out]) + if chunk_end := c % opt_out > 0: + new_chunks.append(chunk_end) + in_chunks[i] = tuple(new_chunks) + out_chunks[i] = tuple(new_chunks) + + + + data = data.rechunk(in_chunks) # Assume operation preserves mask. meta = da.utils.meta_from_array(data).astype(dtype) From acd68d62181010b2d206685a6ed5aeb3c6bc68d9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Sep 2025 21:12:01 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/_lazy_data.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 172c52f978..b12c8699ff 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -655,23 +655,25 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): shape[dim] = size max_outchunks = [max(chunk) for chunk in out_chunks] - df = [False]*len(max_outchunks) + df = [False] * len(max_outchunks) for dim in dims: df[dim] = True df = tuple(df) - opt_outchunks = _optimum_chunksize(max_outchunks, shape, dtype=dtype, dims_fixed=df) - for i, (chunk, max_out, opt_out) in enumerate(zip(out_chunks,max_outchunks,opt_outchunks)): + opt_outchunks = _optimum_chunksize( + max_outchunks, shape, dtype=dtype, dims_fixed=df + ) + for i, (chunk, max_out, opt_out) in enumerate( + zip(out_chunks, max_outchunks, opt_outchunks) + ): if opt_out < max_out: new_chunks = [] for c in chunk: - new_chunks.extend((c//opt_out)*[opt_out]) + new_chunks.extend((c // opt_out) * [opt_out]) if chunk_end := c % opt_out > 0: new_chunks.append(chunk_end) in_chunks[i] = tuple(new_chunks) out_chunks[i] = tuple(new_chunks) - - data = data.rechunk(in_chunks) # Assume operation preserves mask. From bf4548ad774f5e22fd822a978dd597e5cfd7d160 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 7 Oct 2025 09:38:16 +0100 Subject: [PATCH 03/12] fix test failures --- lib/iris/_lazy_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 172c52f978..89d5eb9689 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -659,6 +659,7 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): for dim in dims: df[dim] = True df = tuple(df) + dtype = np.dtype(dtype) opt_outchunks = _optimum_chunksize(max_outchunks, shape, dtype=dtype, dims_fixed=df) for i, (chunk, max_out, opt_out) in enumerate(zip(out_chunks,max_outchunks,opt_outchunks)): if opt_out < max_out: From a1fd8a11f23d932360af69ed877845ded8dde7e7 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 16 Oct 2025 21:53:11 +0100 Subject: [PATCH 04/12] add test --- lib/iris/_lazy_data.py | 11 ++++++++- .../lazy_data/test_map_complete_blocks.py | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index daf9bf4a15..c3f9f347fb 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -626,6 +626,14 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): -------- :func:`dask.array.map_blocks` : The function used for the mapping. + Notes + ----- + .. note: + + If the output chunks would larger than the maximum chunksize set + in the dask config, the input is rechunked, where possible to + optimise the output chunksize. + """ data = None result = None @@ -654,6 +662,7 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): out_chunks[dim] = (size,) shape[dim] = size + # Ensure the chunksize of the output is a reasonable size. max_outchunks = [max(chunk) for chunk in out_chunks] df = [False] * len(max_outchunks) for dim in dims: @@ -670,7 +679,7 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): new_chunks = [] for c in chunk: new_chunks.extend((c // opt_out) * [opt_out]) - if chunk_end := c % opt_out > 0: + if chunk_end := c % opt_out: new_chunks.append(chunk_end) in_chunks[i] = tuple(new_chunks) out_chunks[i] = tuple(new_chunks) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index be25ab6c09..f35b6b8880 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -6,10 +6,12 @@ from unittest.mock import Mock, PropertyMock +import dask.config import dask.array as da import numpy as np from iris._lazy_data import is_lazy_data, map_complete_blocks +from iris.cube import Cube from iris.tests._shared_utils import assert_array_equal @@ -134,3 +136,25 @@ def test_multidimensional_input(self): ) assert is_lazy_data(result) assert_array_equal(result.compute(), array + 1) + + def test_rechunking(self): + # Choose a dask array with an irregularly chunked dimension to be rechunked. + lazy_array = da.ones((5, 9, 10, 10), chunks=(2, 5, 10, 5)) + cube, _ = create_mock_cube(lazy_array) + + result = map_complete_blocks( + cube, self.func, dims=(2, 3), out_sizes=(30, 40), dtype=lazy_array.dtype + ) + assert is_lazy_data(result) + # Reduce the optimum dask chunksize. + with dask.config.set({"array.chunk-size": "32KiB"}): + + result = map_complete_blocks( + cube, self.func, dims=(2, 3), out_sizes=(30, 40), dtype=lazy_array.dtype + ) + assert is_lazy_data(result) + expected_chunksize = (1, 2, 30, 40) + assert result.chunksize == expected_chunksize + # Note that one chunk is irregularly rechunked and the other isn't. + expected_chunk = (2, 2, 1, 2, 2) + assert result.chunks[1] == expected_chunk From fdbfaabde5d8e2bc9ff7e50d436b9308338f5d13 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 16 Oct 2025 22:04:23 +0100 Subject: [PATCH 05/12] add whatsnew --- docs/src/whatsnew/latest.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 2292aab082..86150f5de0 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -78,6 +78,10 @@ This document explains the changes made to Iris for this release is hoped that a future ``libnetcdf`` release will recover the original performance. See `netcdf-c#3183`_ for more details. (:pull:`6747`) +#. `@stephenworsley`_ reduced the memory load for regridding and other operations + using :func:`~iris._lazy_data.map_complete_blocks` when the output chunks would + exceed the optimum chunksize set in dask. (:pull:`6730`) + 🔥 Deprecations =============== From 7467fc221d77a121a8b6526c419e590a631de2fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 Oct 2025 21:06:24 +0000 Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index f35b6b8880..e04f718af2 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -6,8 +6,8 @@ from unittest.mock import Mock, PropertyMock -import dask.config import dask.array as da +import dask.config import numpy as np from iris._lazy_data import is_lazy_data, map_complete_blocks @@ -148,7 +148,6 @@ def test_rechunking(self): assert is_lazy_data(result) # Reduce the optimum dask chunksize. with dask.config.set({"array.chunk-size": "32KiB"}): - result = map_complete_blocks( cube, self.func, dims=(2, 3), out_sizes=(30, 40), dtype=lazy_array.dtype ) From 8fab0005e79c6706dcd9aaab0dd7d140170ca077 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 20 Oct 2025 10:48:16 +0100 Subject: [PATCH 07/12] remove import --- lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index f35b6b8880..7ff8a708cc 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -11,7 +11,6 @@ import numpy as np from iris._lazy_data import is_lazy_data, map_complete_blocks -from iris.cube import Cube from iris.tests._shared_utils import assert_array_equal From 6d9702a197f434f7fdd12881f25a8581dd67ac70 Mon Sep 17 00:00:00 2001 From: stephenworsley <49274989+stephenworsley@users.noreply.github.com> Date: Thu, 23 Oct 2025 15:53:48 +0100 Subject: [PATCH 08/12] Update lib/iris/_lazy_data.py Co-authored-by: Patrick Peglar --- lib/iris/_lazy_data.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index c3f9f347fb..c89fca10a4 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -664,10 +664,7 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): # Ensure the chunksize of the output is a reasonable size. max_outchunks = [max(chunk) for chunk in out_chunks] - df = [False] * len(max_outchunks) - for dim in dims: - df[dim] = True - df = tuple(df) + df = tuple(i in dims for i in range(len(shape))) dtype = np.dtype(dtype) opt_outchunks = _optimum_chunksize( max_outchunks, shape, dtype=dtype, dims_fixed=df From 12b670ea1a15140b5badfef5fb8ff25a3ae37194 Mon Sep 17 00:00:00 2001 From: stephenworsley <49274989+stephenworsley@users.noreply.github.com> Date: Thu, 23 Oct 2025 15:53:56 +0100 Subject: [PATCH 09/12] Update lib/iris/_lazy_data.py Co-authored-by: Patrick Peglar --- lib/iris/_lazy_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index c89fca10a4..c74da78d6b 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -630,7 +630,7 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs): ----- .. note: - If the output chunks would larger than the maximum chunksize set + If the output chunks would be larger than the maximum chunksize set in the dask config, the input is rechunked, where possible to optimise the output chunksize. From 2a00a8c8fb937eaaf5774e1b171df51c84ee3189 Mon Sep 17 00:00:00 2001 From: stephenworsley <49274989+stephenworsley@users.noreply.github.com> Date: Thu, 23 Oct 2025 15:54:13 +0100 Subject: [PATCH 10/12] Update lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py Co-authored-by: Patrick Peglar --- lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index d5f7511c41..e143d2dba7 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -154,5 +154,5 @@ def test_rechunking(self): expected_chunksize = (1, 2, 30, 40) assert result.chunksize == expected_chunksize # Note that one chunk is irregularly rechunked and the other isn't. - expected_chunk = (2, 2, 1, 2, 2) - assert result.chunks[1] == expected_chunk + assert result.chunks[0] == (1, 1, 1, 1, 1) + assert result.chunks[1] == (2, 2, 1, 2, 2) # split from the original chunks of (5, 4) From 054dff0f4119c80f229c6cc7ac43b8c610f545ca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:56:05 +0000 Subject: [PATCH 11/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index e143d2dba7..6da3d49709 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -155,4 +155,10 @@ def test_rechunking(self): assert result.chunksize == expected_chunksize # Note that one chunk is irregularly rechunked and the other isn't. assert result.chunks[0] == (1, 1, 1, 1, 1) - assert result.chunks[1] == (2, 2, 1, 2, 2) # split from the original chunks of (5, 4) + assert result.chunks[1] == ( + 2, + 2, + 1, + 2, + 2, + ) # split from the original chunks of (5, 4) From a19ba7ac2031cb01cc8f57da2451b00f002d45cf Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 23 Oct 2025 16:19:01 +0100 Subject: [PATCH 12/12] address review comments --- .../lazy_data/test_map_complete_blocks.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py index 6da3d49709..6dc8286996 100644 --- a/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py +++ b/lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py @@ -138,27 +138,18 @@ def test_multidimensional_input(self): def test_rechunking(self): # Choose a dask array with an irregularly chunked dimension to be rechunked. - lazy_array = da.ones((5, 9, 10, 10), chunks=(2, 5, 10, 5)) + lazy_array = da.ones((5, 10, 9, 10), chunks=(2, 10, 5, 5)) cube, _ = create_mock_cube(lazy_array) - result = map_complete_blocks( - cube, self.func, dims=(2, 3), out_sizes=(30, 40), dtype=lazy_array.dtype - ) - assert is_lazy_data(result) # Reduce the optimum dask chunksize. with dask.config.set({"array.chunk-size": "32KiB"}): result = map_complete_blocks( - cube, self.func, dims=(2, 3), out_sizes=(30, 40), dtype=lazy_array.dtype + cube, self.func, dims=(1, 3), out_sizes=(30, 40), dtype=lazy_array.dtype ) assert is_lazy_data(result) - expected_chunksize = (1, 2, 30, 40) + expected_chunksize = (1, 30, 2, 40) assert result.chunksize == expected_chunksize # Note that one chunk is irregularly rechunked and the other isn't. assert result.chunks[0] == (1, 1, 1, 1, 1) - assert result.chunks[1] == ( - 2, - 2, - 1, - 2, - 2, - ) # split from the original chunks of (5, 4) + # split from the original chunks of (5, 4) + assert result.chunks[2] == (2, 2, 1, 2, 2)