From a64e014bf3f27de882c0b8e34774bec4e3d3c188 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 7 Mar 2022 12:57:36 +0000 Subject: [PATCH 1/3] Data.cumsum --- cf/data/data.py | 163 ++++++++++++++++++++++++++----------------- cf/functions.py | 3 +- cf/test/test_Data.py | 11 ++- 3 files changed, 105 insertions(+), 72 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 31cb3566dd..0bf3e63beb 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3140,8 +3140,15 @@ def convolution_filter( return d + @daskified(_DASKIFIED_VERBOSE) @_inplace_enabled(default=False) - def cumsum(self, axis, masked_as_zero=False, inplace=False): + def cumsum( + self, + axis=None, + masked_as_zero=False, + method="sequential", + inplace=False, + ): """Return the data cumulatively summed along the given axis. .. versionadded:: 3.0.0 @@ -3151,36 +3158,48 @@ def cumsum(self, axis, masked_as_zero=False, inplace=False): :Parameters: axis: `int`, optional - Select the axis over which the cumulative sums are to be - calculated. + Select the axis over which the cumulative sums are to + be calculated. By default the cumulative sum is + computed over the flattened array. masked_as_zero: `bool`, optional + Deprecated at version TODODASK. + If True then set missing data values to zero before - calculating the cumulative sum. By default the output data - will be masked at the same locations as the original data. + calculating the cumulative sum. By default the output + data will be masked at the same locations as the + original data. - .. note:: Sums produced entirely from masked elements will - always result in masked values in the output - data, regardless of the setting of + .. note:: Sums produced entirely from masked elements + will always result in masked values in the + output data, regardless of the setting of *masked_as_zero*. + method: `str`, optional + Choose which method to use to perform the cumulative + sum. See `dask.array.cumsum` for details. + + .. versionadded:: TODODASK + {{inplace: `bool`, optional}} .. versionadded:: 3.3.0 :Returns: - `Data` - The data with the cumulatively summed axis, or `None` if - the operation was in-place. + `Data` or `None` + The data with the cumulatively summed axis, or `None` + if the operation was in-place. - **Examples:** + **Examples** >>> d = cf.Data(numpy.arange(12).reshape(3, 4)) >>> print(d.array) [[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11]] + >>> print(d.cumsum().array) + [ 0 1 3 6 10 15 21 28 36 45 55 66] >>> print(d.cumsum(axis=0).array) [[ 0 1 2 3] [ 4 6 8 10] @@ -3194,69 +3213,83 @@ def cumsum(self, axis, masked_as_zero=False, inplace=False): >>> d[1, 1] = cf.masked >>> d[2, 0:2] = cf.masked >>> print(d.array) - [[-- 1 3 6] - [ 4 -- 10 17] - [-- -- 10 21]] + [[-- 1 2 3] + [4 -- 6 7] + [-- -- 10 11]] >>> print(d.cumsum(axis=1).array) - [[-- 1 3 6] - [ 4 -- 10 17] - [-- -- 10 21]] - >>> print(d.cumsum(axis=1, masked_as_zero=True).array) - [[-- 1 3 6] - [ 4 4 10 17] + [[-- 1 3 6] + [4 -- 10 17] [-- -- 10 21]] """ - # Parse axis - ndim = self._ndim - if -ndim - 1 <= axis < 0: - axis += ndim + 1 - elif not 0 <= axis <= ndim: - raise ValueError( - "Can't cumsum: Invalid axis specification: Expected " - "-{0}<=axis<{0}, got axis={1}".format(ndim, axis) - ) + if masked_as_zero: + _DEPRECATION_ERROR_KWARGS( + self, + "cumsum", + {"masked_as_zero": None}, + message="", + version="TODODASK", + removed_at="5.0.0", + ) # pragma: no cover d = _inplace_enabled_define_and_cleanup(self) - sections = self.section(axis, chunks=True) - - # Cumulatively sum each section - for key, data in sections.items(): - array = data.array - - filled = False - if masked_as_zero and np.ma.is_masked(array): - mask = array.mask - array = array.filled(0) - filled = True - - array = np.cumsum(array, axis=axis) - - if filled: - size = array.shape[axis] - shape = [1] * array.ndim - shape[axis] = size - new_mask = np.cumsum(mask, axis=axis) == np.arange( - 1, size + 1 - ).reshape(shape) - array = np.ma.array(array, mask=new_mask, copy=False) - - sections[key] = type(self)( - array, units=self.Units, fill_value=self.fill_value - ) - - # Glue the sections back together again - out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic()) - - if inplace: - d.__dict__ = out.__dict__ - else: - d = out + dx = d._get_dask() + dx = dx.cumsum(axis=axis, method=method) + d._set_dask(dx, reset_mask_hardness=True) return d - return out + # # Parse axis + # ndim = self._ndim + # if -ndim - 1 <= axis < 0: + # axis += ndim + 1 + # elif not 0 <= axis <= ndim: + # raise ValueError( + # "Can't cumsum: Invalid axis specification: Expected " + # "-{0}<=axis<{0}, got axis={1}".format(ndim, axis) + # ) + # + # d = _inplace_enabled_define_and_cleanup(self) + # + # sections = self.section(axis, chunks=True) + # + # # Cumulatively sum each section + # for key, data in sections.items(): + # array = data.array + # + # filled = False + # if masked_as_zero and np.ma.is_masked(array): + # mask = array.mask + # array = array.filled(0) + # filled = True + # + # array = np.cumsum(array, axis=axis) + # + # if filled: + # size = array.shape[axis] + # shape = [1] * array.ndim + # shape[axis] = size + # new_mask = np.cumsum(mask, axis=axis) == np.arange( + # 1, size + 1 + # ).reshape(shape) + # array = np.ma.array(array, mask=new_mask, copy=False) + # + # sections[key] = type(self)( + # array, units=self.Units, fill_value=self.fill_value + # ) + # + # # Glue the sections back together again + # out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic()) + # + # if inplace: + # d.__dict__ = out.__dict__ + # else: + # d = out + # + # return d + # + # return out @_inplace_enabled(default=False) def rechunk( diff --git a/cf/functions.py b/cf/functions.py index 9b0a0973c4..23f146ee18 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -3223,6 +3223,7 @@ def _DEPRECATION_ERROR_KWARGS( relaxed_identity=False, info=False, version="3.0.0", + removed_at="4.0.0", ): # Unsafe to set mutable '{}' as default in the func signature. if kwargs is None: # distinguish from falsy '{}' @@ -3238,7 +3239,7 @@ def _DEPRECATION_ERROR_KWARGS( f"Keyword {key!r} of method " f"'{instance.__class__.__name__}.{method}' has been deprecated " f"at version {version} and is no longer available and will be " - f"removed at version 4.0.0. {message}" + f"removed at version {removed_at}. {message}" ) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 1707e1cb87..690aa90b59 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -872,7 +872,6 @@ def test_Data_digitize(self): self.assertIsNone(d.digitize(bins, inplace=True)) self.assertTrue(d.equals(e)) - @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attribute '_ndim'") def test_Data_cumsum(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return @@ -883,18 +882,18 @@ def test_Data_cumsum(self): self.assertIsNone(e.cumsum(axis=0, inplace=True)) self.assertTrue(e.equals(f, verbose=2)) - d = cf.Data(self.a) + d = cf.Data(self.a, chunks=3) - for i in range(d.ndim): + for i in [None] + list(range(d.ndim)): b = np.cumsum(self.a, axis=i) e = d.cumsum(axis=i) self.assertTrue((e.array == b).all()) - d = cf.Data(self.ma) + d = cf.Data(self.ma, chunks=3) - for i in range(d.ndim): + for i in [None] + list(range(d.ndim)): b = np.cumsum(self.ma, axis=i) - e = d.cumsum(axis=i, masked_as_zero=False) + e = d.cumsum(axis=i) self.assertTrue(cf.functions._numpy_allclose(e.array, b)) @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attribute '_ndim'") From 49f27b969d2735d1fa5c313eb00c1483ad61af7a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 7 Mar 2022 16:52:58 +0000 Subject: [PATCH 2/3] dev --- cf/data/data.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 0bf3e63beb..3c29af11f9 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3153,7 +3153,7 @@ def cumsum( .. versionadded:: 3.0.0 - .. seealso:: `sum` + .. seealso:: `diff`, `sum` :Parameters: @@ -3210,15 +3210,19 @@ def cumsum( [ 8 17 27 38]] >>> d[0, 0] = cf.masked - >>> d[1, 1] = cf.masked + >>> d[1, [1, 3]] = cf.masked >>> d[2, 0:2] = cf.masked >>> print(d.array) [[-- 1 2 3] - [4 -- 6 7] + [4 -- 6 --] [-- -- 10 11]] + >>> print(d.cumsum(axis=0).array) + [[-- 1 2 3] + [4 -- 8 --] + [-- -- 18 14]] >>> print(d.cumsum(axis=1).array) [[-- 1 3 6] - [4 -- 10 17] + [4 -- 10 --] [-- -- 10 21]] """ @@ -3227,7 +3231,7 @@ def cumsum( self, "cumsum", {"masked_as_zero": None}, - message="", + message="This keyword is ", version="TODODASK", removed_at="5.0.0", ) # pragma: no cover From 9cd2b2132c62f657dfaabcc907376edfca40e3c2 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 8 Mar 2022 10:01:06 +0000 Subject: [PATCH 3/3] dask: Data.cumsum --- cf/data/data.py | 74 ++++++------------------------------------------- 1 file changed, 9 insertions(+), 65 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 3c29af11f9..837a583bc4 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -3162,19 +3162,6 @@ def cumsum( be calculated. By default the cumulative sum is computed over the flattened array. - masked_as_zero: `bool`, optional - Deprecated at version TODODASK. - - If True then set missing data values to zero before - calculating the cumulative sum. By default the output - data will be masked at the same locations as the - original data. - - .. note:: Sums produced entirely from masked elements - will always result in masked values in the - output data, regardless of the setting of - *masked_as_zero*. - method: `str`, optional Choose which method to use to perform the cumulative sum. See `dask.array.cumsum` for details. @@ -3185,6 +3172,10 @@ def cumsum( .. versionadded:: 3.3.0 + masked_as_zero: deprecated at version TODODASK + See the examples for the new behaviour when there are + masked values. + :Returns: `Data` or `None` @@ -3231,7 +3222,7 @@ def cumsum( self, "cumsum", {"masked_as_zero": None}, - message="This keyword is ", + message="", version="TODODASK", removed_at="5.0.0", ) # pragma: no cover @@ -3240,61 +3231,14 @@ def cumsum( dx = d._get_dask() dx = dx.cumsum(axis=axis, method=method) + + # Note: The dask cumsum method resets the mask hardness to the + # numpy default, so we need to reset the mask hardness + # during _set_dask. d._set_dask(dx, reset_mask_hardness=True) return d - # # Parse axis - # ndim = self._ndim - # if -ndim - 1 <= axis < 0: - # axis += ndim + 1 - # elif not 0 <= axis <= ndim: - # raise ValueError( - # "Can't cumsum: Invalid axis specification: Expected " - # "-{0}<=axis<{0}, got axis={1}".format(ndim, axis) - # ) - # - # d = _inplace_enabled_define_and_cleanup(self) - # - # sections = self.section(axis, chunks=True) - # - # # Cumulatively sum each section - # for key, data in sections.items(): - # array = data.array - # - # filled = False - # if masked_as_zero and np.ma.is_masked(array): - # mask = array.mask - # array = array.filled(0) - # filled = True - # - # array = np.cumsum(array, axis=axis) - # - # if filled: - # size = array.shape[axis] - # shape = [1] * array.ndim - # shape[axis] = size - # new_mask = np.cumsum(mask, axis=axis) == np.arange( - # 1, size + 1 - # ).reshape(shape) - # array = np.ma.array(array, mask=new_mask, copy=False) - # - # sections[key] = type(self)( - # array, units=self.Units, fill_value=self.fill_value - # ) - # - # # Glue the sections back together again - # out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic()) - # - # if inplace: - # d.__dict__ = out.__dict__ - # else: - # d = out - # - # return d - # - # return out - @_inplace_enabled(default=False) def rechunk( self,