Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 51 additions & 70 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3140,47 +3140,57 @@ def convolution_filter(

return d

@daskified(_DASKIFIED_VERBOSE)
@_inplace_enabled(default=False)
def cumsum(self, axis, masked_as_zero=False, inplace=False):
def cumsum(
self,
axis=None,
masked_as_zero=False,
method="sequential",
inplace=False,
):
"""Return the data cumulatively summed along the given axis.

.. versionadded:: 3.0.0

.. seealso:: `sum`
.. seealso:: `diff`, `sum`

:Parameters:

axis: `int`, optional
Select the axis over which the cumulative sums are to be
calculated.
Select the axis over which the cumulative sums are to
be calculated. By default the cumulative sum is
computed over the flattened array.

masked_as_zero: `bool`, optional
If True then set missing data values to zero before
calculating the cumulative sum. By default the output data
will be masked at the same locations as the original data.
method: `str`, optional
Choose which method to use to perform the cumulative
sum. See `dask.array.cumsum` for details.

.. note:: Sums produced entirely from masked elements will
always result in masked values in the output
data, regardless of the setting of
*masked_as_zero*.
.. versionadded:: TODODASK

{{inplace: `bool`, optional}}

.. versionadded:: 3.3.0

masked_as_zero: deprecated at version TODODASK
See the examples for the new behaviour when there are
masked values.

:Returns:

`Data`
The data with the cumulatively summed axis, or `None` if
the operation was in-place.
`Data` or `None`
The data with the cumulatively summed axis, or `None`
if the operation was in-place.

**Examples:**
**Examples**

>>> d = cf.Data(numpy.arange(12).reshape(3, 4))
>>> print(d.array)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
>>> print(d.cumsum().array)
[ 0 1 3 6 10 15 21 28 36 45 55 66]
>>> print(d.cumsum(axis=0).array)
[[ 0 1 2 3]
[ 4 6 8 10]
Expand All @@ -3191,73 +3201,44 @@ def cumsum(self, axis, masked_as_zero=False, inplace=False):
[ 8 17 27 38]]

>>> d[0, 0] = cf.masked
>>> d[1, 1] = cf.masked
>>> d[1, [1, 3]] = cf.masked
>>> d[2, 0:2] = cf.masked
>>> print(d.array)
[[-- 1 3 6]
[ 4 -- 10 17]
[-- -- 10 21]]
[[-- 1 2 3]
[4 -- 6 --]
[-- -- 10 11]]
>>> print(d.cumsum(axis=0).array)
[[-- 1 2 3]
[4 -- 8 --]
[-- -- 18 14]]
>>> print(d.cumsum(axis=1).array)
[[-- 1 3 6]
[ 4 -- 10 17]
[-- -- 10 21]]
>>> print(d.cumsum(axis=1, masked_as_zero=True).array)
[[-- 1 3 6]
[ 4 4 10 17]
[[-- 1 3 6]
[4 -- 10 --]
[-- -- 10 21]]

"""
# Parse axis
ndim = self._ndim
if -ndim - 1 <= axis < 0:
axis += ndim + 1
elif not 0 <= axis <= ndim:
raise ValueError(
"Can't cumsum: Invalid axis specification: Expected "
"-{0}<=axis<{0}, got axis={1}".format(ndim, axis)
)
if masked_as_zero:
_DEPRECATION_ERROR_KWARGS(
self,
"cumsum",
{"masked_as_zero": None},
message="",
version="TODODASK",
removed_at="5.0.0",
) # pragma: no cover

d = _inplace_enabled_define_and_cleanup(self)

sections = self.section(axis, chunks=True)

# Cumulatively sum each section
for key, data in sections.items():
array = data.array

filled = False
if masked_as_zero and np.ma.is_masked(array):
mask = array.mask
array = array.filled(0)
filled = True

array = np.cumsum(array, axis=axis)

if filled:
size = array.shape[axis]
shape = [1] * array.ndim
shape[axis] = size
new_mask = np.cumsum(mask, axis=axis) == np.arange(
1, size + 1
).reshape(shape)
array = np.ma.array(array, mask=new_mask, copy=False)

sections[key] = type(self)(
array, units=self.Units, fill_value=self.fill_value
)

# Glue the sections back together again
out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic())
dx = d._get_dask()
dx = dx.cumsum(axis=axis, method=method)

if inplace:
d.__dict__ = out.__dict__
else:
d = out
# Note: The dask cumsum method resets the mask hardness to the
# numpy default, so we need to reset the mask hardness
# during _set_dask.
d._set_dask(dx, reset_mask_hardness=True)

return d

return out

@_inplace_enabled(default=False)
def rechunk(
self,
Expand Down
3 changes: 2 additions & 1 deletion cf/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3223,6 +3223,7 @@ def _DEPRECATION_ERROR_KWARGS(
relaxed_identity=False,
info=False,
version="3.0.0",
removed_at="4.0.0",
):
# Unsafe to set mutable '{}' as default in the func signature.
if kwargs is None: # distinguish from falsy '{}'
Expand All @@ -3238,7 +3239,7 @@ def _DEPRECATION_ERROR_KWARGS(
f"Keyword {key!r} of method "
f"'{instance.__class__.__name__}.{method}' has been deprecated "
f"at version {version} and is no longer available and will be "
f"removed at version 4.0.0. {message}"
f"removed at version {removed_at}. {message}"
)


Expand Down
11 changes: 5 additions & 6 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,6 @@ def test_Data_digitize(self):
self.assertIsNone(d.digitize(bins, inplace=True))
self.assertTrue(d.equals(e))

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attribute '_ndim'")
def test_Data_cumsum(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return
Expand All @@ -883,18 +882,18 @@ def test_Data_cumsum(self):
self.assertIsNone(e.cumsum(axis=0, inplace=True))
self.assertTrue(e.equals(f, verbose=2))

d = cf.Data(self.a)
d = cf.Data(self.a, chunks=3)

for i in range(d.ndim):
for i in [None] + list(range(d.ndim)):
b = np.cumsum(self.a, axis=i)
e = d.cumsum(axis=i)
self.assertTrue((e.array == b).all())

d = cf.Data(self.ma)
d = cf.Data(self.ma, chunks=3)

for i in range(d.ndim):
for i in [None] + list(range(d.ndim)):
b = np.cumsum(self.ma, axis=i)
e = d.cumsum(axis=i, masked_as_zero=False)
e = d.cumsum(axis=i)
self.assertTrue(cf.functions._numpy_allclose(e.array, b))

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attribute '_ndim'")
Expand Down