diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 7a6c4dcc07..24b7595c5f 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -7,6 +7,58 @@ import numpy as np +try: + from scipy.ndimage.filters import convolve1d +except ImportError: + pass + + +def cf_convolve1d(a, window=None, axis=-1, origin=0): + """Calculate a 1-d convolution along the given axis. + + .. versionadded:: TODODASK + + .. seealso:: `cf.Data.convolution_filter` + + :Parameters: + + a: `numpy.ndarray` + The float array to be filtered. + + window: 1-d sequence of numbers + The window of weights to use for the filter. + + axis: `int`, optional + The axis of input along which to calculate. Default is -1. + + origin: `int`, optional + Controls the placement of the filter on the input array’s + pixels. A value of 0 (the default) centers the filter over + the pixel, with positive values shifting the filter to the + left, and negative ones to the right. + + :Returns: + + `numpy.ndarray` + Convolved float array with same shape as input. + + """ + masked = np.ma.is_masked(a) + if masked: + # convolve1d does not deal with masked arrays, so uses NaNs + # instead. + a = a.filled(np.nan) + + c = convolve1d( + a, window, axis=axis, mode="constant", cval=0.0, origin=origin + ) + + if masked or np.isnan(c).any(): + with np.errstate(invalid="ignore"): + c = np.ma.masked_invalid(c) + + return c + def cf_harden_mask(a): """Harden the mask of a masked `numpy` array. diff --git a/cf/data/data.py b/cf/data/data.py index 020a09b964..8f085e5e4d 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -8,11 +8,6 @@ from numbers import Integral from operator import mul -try: - from scipy.ndimage.filters import convolve1d as scipy_convolve1d -except ImportError: - pass - import cfdm import cftime import dask.array as da @@ -2855,6 +2850,7 @@ def ceil(self, inplace=False, i=False): """ return self.func(np.ceil, out=True, inplace=inplace) + @daskified(1) @_inplace_enabled(default=False) def convolution_filter( self, @@ -2935,6 +2931,9 @@ def convolution_filter( ``'wrap'`` The input is extended by ``(a b c | a b c | a b c)`` wrapping around to the opposite edge. + + ``'periodic'`` This is a synonym for + ``'wrap'``. ============== ========================== ============================ The position of the window relative to each value can be @@ -2973,14 +2972,7 @@ def convolution_filter( in-place. """ - # TODODSAK - map_overlap - - try: - scipy_convolve1d - except NameError: - raise ImportError( - "Must install scipy to use the convolution_filter method." - ) + from .dask_utils import cf_convolve1d d = _inplace_enabled_define_and_cleanup(self) @@ -2988,60 +2980,72 @@ def convolution_filter( if len(iaxis) != 1: raise ValueError( "Must specify a unique domain axis with the 'axis' " - "parameter. {!r} specifies axes {!r}".format(axis, iaxis) + f"parameter. {axis!r} specifies axes {iaxis!r}" ) iaxis = iaxis[0] - # Default mode to 'wrap' if the axis is cyclic if mode is None: + # Default mode is 'wrap' if the axis is cyclic, or else + # 'constant'. if iaxis in d.cyclic(): - mode = "wrap" + boundary = "periodic" else: - mode = "constant" - # --- End: if + boundary = cval + elif mode == "wrap": + boundary = "periodic" + elif mode == "constant": + boundary = cval + elif mode == "mirror": + raise ValueError( + "'mirror' mode is no longer available. Please raise an " + "issue at https://github.com/NCAS-CMS/cf-python/issues " + "if you would like it to be re-implemented." + ) + # This re-implementation would involve getting a 'mirror' + # function added to dask.array.overlap, along similar + # lines to the existing 'reflect' function in that module. + else: + boundary = mode - # Set cval to NaN if it is currently None, so that the edges - # will be filled with missing data if the mode is 'constant' - if cval is None: - cval = np.nan + # Set the overlap depth large enough to accommodate the + # filter. + # + # For instance, for a 5-point window, the calculated value at + # each point requires 2 points either side if the filter is + # centred (i.e. origin is 0) and (up to) 3 points either side + # if origin is 1 or -1. + # + # It is a restriction of dask.array.map_overlap that we can't + # use asymmetric halos for general 'boundary' types. + size = len(window) + depth = int(size / 2) + if not origin and not size % 2: + depth += 1 - # Section the data into sections up to a chunk in size - sections = self.section([iaxis], chunks=True) + depth += abs(origin) - # Filter each section replacing masked points with numpy - # NaNs and then remasking after filtering. - for key, data in sections.items(): - data.dtype = float - input_array = data.array - masked = np.ma.is_masked(input_array) - if masked: - input_array = input_array.filled(np.nan) - - output_array = scipy_convolve1d( - input_array, - window, - axis=iaxis, - mode=mode, - cval=cval, - origin=origin, - ) - if masked or (mode == "constant" and np.isnan(cval)): - with np.errstate(invalid="ignore"): - output_array = np.ma.masked_invalid(output_array) - # --- End: if + dx = d._get_dask() - sections[key] = type(self)( - output_array, units=self.Units, fill_value=self.fill_value - ) + # Cast to float to ensure that NaNs can be stored (as required + # by cf_convolve1d) + if dx.dtype != float: + dx = dx.astype(float, copy=False) - # Glue the sections back together again - out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic()) + # Convolve each chunk + convolve1d = partial( + cf_convolve1d, window=window, axis=iaxis, origin=origin + ) - if inplace: - d.__dict__ = out.__dict__ - else: - d = out + dx = dx.map_overlap( + convolve1d, + depth={iaxis: depth}, + boundary=boundary, + trim=True, + meta=np.array((), dtype=float), + ) + + d._set_dask(dx, reset_mask_hardness=True) return d diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 08de4a7750..a8715da8de 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -276,7 +276,7 @@ def test_Data_convolution_filter(self): if not SCIPY_AVAILABLE: raise unittest.SkipTest("SciPy must be installed for this test.") - d = cf.Data(self.ma, units="m") + d = cf.Data(self.ma, units="m", chunks=(2, 4, 5, 3)) window = [0.1, 0.15, 0.5, 0.15, 0.1] @@ -285,19 +285,45 @@ def test_Data_convolution_filter(self): d = cf.Data(self.ma, units="m") - # Test user weights in different modes - for mode in ( - "reflect", - "constant", - "nearest", - "mirror", - "wrap", - ): - b = convolve1d(d.array, window, axis=-1, mode=mode) - e = d.convolution_filter( - window=window, axis=-1, mode=mode, cval=0.0 - ) - self.assertTrue((e.array == b).all()) + for axis in (0, 1): + # Test weights in different modes + for mode in ( + "reflect", + "constant", + "nearest", + "wrap", + ): + b = convolve1d(self.ma, window, axis=axis, mode=mode) + e = d.convolution_filter( + window, axis=axis, mode=mode, cval=0.0 + ) + self.assertTrue((e.array == b).all()) + + for dtype in ("int", "int32", "float", "float32"): + a = np.ma.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=dtype) + a[2] = np.ma.masked + d = cf.Data(a, chunks=(4, 4, 1)) + a = a.astype(float).filled(np.nan) + + for window in ((1, 2, 1), (1, 2, 2, 1), (1, 2, 3, 2, 1)): + for cval in (0, np.nan): + for origin in (-1, 0, 1): + b = convolve1d( + a, + window, + axis=0, + cval=cval, + origin=origin, + mode="constant", + ) + e = d.convolution_filter( + window, + axis=0, + cval=cval, + origin=origin, + mode="constant", + ) + self.assertTrue((e.array == b).all()) @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'") def test_Data_diff(self):