Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ This document explains the changes made to Iris for this release
constraints are given. This was previously only implemented where one such
constraint was given. (:issue:`6228`, :pull:`6754`)

#. `@stephenworsley`_ reduced the memory load for regridding and other operations
using :func:`~iris._lazy_data.map_complete_blocks` when the output chunks would
exceed the optimum chunksize set in dask. (:pull:`6730`)


🔥 Deprecations
===============
Expand Down
39 changes: 35 additions & 4 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,14 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs):
--------
:func:`dask.array.map_blocks` : The function used for the mapping.

Notes
-----
.. note:

If the output chunks would be larger than the maximum chunksize set
in the dask config, the input is rechunked, where possible to
optimise the output chunksize.

"""
data = None
result = None
Expand All @@ -640,17 +648,40 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs):
else:
data = src.lazy_data()

shape = list(src.shape)

if result is None and data is not None:
# Ensure dims are not chunked
in_chunks = list(data.chunks)
for dim in dims:
in_chunks[dim] = src.shape[dim]
data = data.rechunk(in_chunks)
in_chunks[dim] = (src.shape[dim],)

# Determine output chunks
out_chunks = list(data.chunks)
out_chunks = in_chunks.copy()
for dim, size in zip(dims, out_sizes):
out_chunks[dim] = size
out_chunks[dim] = (size,)
shape[dim] = size

# Ensure the chunksize of the output is a reasonable size.
max_outchunks = [max(chunk) for chunk in out_chunks]
df = tuple(i in dims for i in range(len(shape)))
dtype = np.dtype(dtype)
opt_outchunks = _optimum_chunksize(
max_outchunks, shape, dtype=dtype, dims_fixed=df
)
for i, (chunk, max_out, opt_out) in enumerate(
zip(out_chunks, max_outchunks, opt_outchunks)
):
if opt_out < max_out:
new_chunks = []
for c in chunk:
new_chunks.extend((c // opt_out) * [opt_out])
if chunk_end := c % opt_out:
new_chunks.append(chunk_end)
in_chunks[i] = tuple(new_chunks)
out_chunks[i] = tuple(new_chunks)

data = data.rechunk(in_chunks)

# Assume operation preserves mask.
meta = da.utils.meta_from_array(data).astype(dtype)
Expand Down
19 changes: 19 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from unittest.mock import Mock, PropertyMock

import dask.array as da
import dask.config
import numpy as np

from iris._lazy_data import is_lazy_data, map_complete_blocks
Expand Down Expand Up @@ -134,3 +135,21 @@ def test_multidimensional_input(self):
)
assert is_lazy_data(result)
assert_array_equal(result.compute(), array + 1)

def test_rechunking(self):
# Choose a dask array with an irregularly chunked dimension to be rechunked.
lazy_array = da.ones((5, 10, 9, 10), chunks=(2, 10, 5, 5))
cube, _ = create_mock_cube(lazy_array)

# Reduce the optimum dask chunksize.
with dask.config.set({"array.chunk-size": "32KiB"}):
result = map_complete_blocks(
cube, self.func, dims=(1, 3), out_sizes=(30, 40), dtype=lazy_array.dtype
)
assert is_lazy_data(result)
expected_chunksize = (1, 30, 2, 40)
assert result.chunksize == expected_chunksize
# Note that one chunk is irregularly rechunked and the other isn't.
assert result.chunks[0] == (1, 1, 1, 1, 1)
# split from the original chunks of (5, 4)
assert result.chunks[2] == (2, 2, 1, 2, 2)
Loading