From b06b01cf9e9006eafd4967d9fd67dc6ba8353ad5 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 4 Jan 2022 20:30:59 +0000 Subject: [PATCH 1/3] convolve1d --- cf/__init__.py | 4 +- cf/data/dask_utils.py | 50 +++++++++++++++++++++++ cf/data/data.py | 95 ++++++++++++++++++------------------------- cf/test/test_Data.py | 54 +++++++++++++++++------- 4 files changed, 132 insertions(+), 71 deletions(-) diff --git a/cf/__init__.py b/cf/__init__.py index 25f8ca8446..463bf4e18f 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -194,8 +194,8 @@ ) # Check the version of cfdm -_minimum_vn = "1.8.9.0" -_maximum_vn = "1.8.10.0" +_minimum_vn = "1.9.0.1" +_maximum_vn = "1.9.1.0" _cfdm_version = LooseVersion(cfdm.__version__) if not LooseVersion(_minimum_vn) <= _cfdm_version < LooseVersion(_maximum_vn): raise RuntimeError( diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 7a6c4dcc07..3018b5ccd2 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -7,6 +7,56 @@ import numpy as np +try: + from scipy.ndimage.filters import convolve1d +except ImportError: + pass + + +def cf_convolve1d(a, window=None, axis=-1, origin=0): + """Calculate a 1-d convolution along the given axis. + + .. versionadded:: TODODASK + + .. seealso:: `cf.Data.convolution_filter` + + :Parameters: + + a: `numpy.ndarray` + The float array to be filtered. + + window: 1-d sequence of numbers + The window of weights to use for the filter. + + axis: `int`, optional + The axis of input along which to calculate. Default is -1. + + origin: `int`, optional + Controls the placement of the filter on the input array’s + pixels. A value of 0 (the default) centers the filter over + the pixel, with positive values shifting the filter to the + left, and negative ones to the right. + + :Returns: + + `numpy.ndarray` + Convolved float array with same shape as input. + + """ + masked = np.ma.is_masked(a) + if masked: + a = a.filled(np.nan) + + c = convolve1d( + a, window, axis=axis, mode="constant", cval=0.0, origin=origin + ) + + if masked or np.isnan(c).any(): + with np.errstate(invalid="ignore"): + c = np.ma.masked_invalid(c) + + return c + def cf_harden_mask(a): """Harden the mask of a masked `numpy` array. diff --git a/cf/data/data.py b/cf/data/data.py index 020a09b964..e222a011d5 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -8,11 +8,6 @@ from numbers import Integral from operator import mul -try: - from scipy.ndimage.filters import convolve1d as scipy_convolve1d -except ImportError: - pass - import cfdm import cftime import dask.array as da @@ -2973,14 +2968,7 @@ def convolution_filter( in-place. """ - # TODODSAK - map_overlap - - try: - scipy_convolve1d - except NameError: - raise ImportError( - "Must install scipy to use the convolution_filter method." - ) + from .dask_utils import cf_convolve1d d = _inplace_enabled_define_and_cleanup(self) @@ -2988,60 +2976,57 @@ def convolution_filter( if len(iaxis) != 1: raise ValueError( "Must specify a unique domain axis with the 'axis' " - "parameter. {!r} specifies axes {!r}".format(axis, iaxis) + f"parameter. {axis!r} specifies axes {iaxis!r}" ) iaxis = iaxis[0] - # Default mode to 'wrap' if the axis is cyclic + boundary = mode if mode is None: + # Default mode is 'wrap' if the axis is cyclic, or else + # 'constant'. if iaxis in d.cyclic(): - mode = "wrap" + boundary = "periodic" else: - mode = "constant" - # --- End: if - - # Set cval to NaN if it is currently None, so that the edges - # will be filled with missing data if the mode is 'constant' - if cval is None: - cval = np.nan + boundary = cval + elif mode == "wrap": + boundary = "periodic" + elif mode == "constant": + boundary = cval + elif mode == "mirror": + raise ValueError( + "'mirror' mode is no longer available. Please raise an " + "issue at https://github.com/NCAS-CMS/cf-python/issues " + "if you would like it to be re-implmented" + ) + # This re-imlemnetation would involve getting a 'mirror' + # function added to dask.array.overlap, along similar + # lines to the existing 'reflect' function in that module. - # Section the data into sections up to a chunk in size - sections = self.section([iaxis], chunks=True) + # Set the overlap depth large enough to accommodate the filter + size = len(window) + centre = int(size / 2) + if not origin and not size % 2: + centre += 1 - # Filter each section replacing masked points with numpy - # NaNs and then remasking after filtering. - for key, data in sections.items(): - data.dtype = float - input_array = data.array - masked = np.ma.is_masked(input_array) - if masked: - input_array = input_array.filled(np.nan) - - output_array = scipy_convolve1d( - input_array, - window, - axis=iaxis, - mode=mode, - cval=cval, - origin=origin, - ) - if masked or (mode == "constant" and np.isnan(cval)): - with np.errstate(invalid="ignore"): - output_array = np.ma.masked_invalid(output_array) - # --- End: if + depth = {iaxis: centre + abs(origin)} - sections[key] = type(self)( - output_array, units=self.Units, fill_value=self.fill_value - ) + convolve1d = partial( + cf_convolve1d, window=window, axis=iaxis, origin=origin + ) - # Glue the sections back together again - out = self.reconstruct_sectioned_data(sections, cyclic=self.cyclic()) + dx = d._get_dask() + if dx.dtype.kind == "i": + dx = dx.astype(float) + + dx = dx.map_overlap( + convolve1d, + depth=depth, + boundary=boundary, + trim=True, + ) - if inplace: - d.__dict__ = out.__dict__ - else: - d = out + d._set_dask(dx, reset_mask_hardness=True) return d diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 08de4a7750..a8715da8de 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -276,7 +276,7 @@ def test_Data_convolution_filter(self): if not SCIPY_AVAILABLE: raise unittest.SkipTest("SciPy must be installed for this test.") - d = cf.Data(self.ma, units="m") + d = cf.Data(self.ma, units="m", chunks=(2, 4, 5, 3)) window = [0.1, 0.15, 0.5, 0.15, 0.1] @@ -285,19 +285,45 @@ def test_Data_convolution_filter(self): d = cf.Data(self.ma, units="m") - # Test user weights in different modes - for mode in ( - "reflect", - "constant", - "nearest", - "mirror", - "wrap", - ): - b = convolve1d(d.array, window, axis=-1, mode=mode) - e = d.convolution_filter( - window=window, axis=-1, mode=mode, cval=0.0 - ) - self.assertTrue((e.array == b).all()) + for axis in (0, 1): + # Test weights in different modes + for mode in ( + "reflect", + "constant", + "nearest", + "wrap", + ): + b = convolve1d(self.ma, window, axis=axis, mode=mode) + e = d.convolution_filter( + window, axis=axis, mode=mode, cval=0.0 + ) + self.assertTrue((e.array == b).all()) + + for dtype in ("int", "int32", "float", "float32"): + a = np.ma.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=dtype) + a[2] = np.ma.masked + d = cf.Data(a, chunks=(4, 4, 1)) + a = a.astype(float).filled(np.nan) + + for window in ((1, 2, 1), (1, 2, 2, 1), (1, 2, 3, 2, 1)): + for cval in (0, np.nan): + for origin in (-1, 0, 1): + b = convolve1d( + a, + window, + axis=0, + cval=cval, + origin=origin, + mode="constant", + ) + e = d.convolution_filter( + window, + axis=0, + cval=cval, + origin=origin, + mode="constant", + ) + self.assertTrue((e.array == b).all()) @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'") def test_Data_diff(self): From 01afa8e44c32225b7621a104646d525e3928983c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 5 Jan 2022 10:37:15 +0000 Subject: [PATCH 2/3] dtype, comments --- cf/data/dask_utils.py | 2 ++ cf/data/data.py | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index 3018b5ccd2..24b7595c5f 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -45,6 +45,8 @@ def cf_convolve1d(a, window=None, axis=-1, origin=0): """ masked = np.ma.is_masked(a) if masked: + # convolve1d does not deal with masked arrays, so uses NaNs + # instead. a = a.filled(np.nan) c = convolve1d( diff --git a/cf/data/data.py b/cf/data/data.py index e222a011d5..8f085e5e4d 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2850,6 +2850,7 @@ def ceil(self, inplace=False, i=False): """ return self.func(np.ceil, out=True, inplace=inplace) + @daskified(1) @_inplace_enabled(default=False) def convolution_filter( self, @@ -2930,6 +2931,9 @@ def convolution_filter( ``'wrap'`` The input is extended by ``(a b c | a b c | a b c)`` wrapping around to the opposite edge. + + ``'periodic'`` This is a synonym for + ``'wrap'``. ============== ========================== ============================ The position of the window relative to each value can be @@ -2981,7 +2985,6 @@ def convolution_filter( iaxis = iaxis[0] - boundary = mode if mode is None: # Default mode is 'wrap' if the axis is cyclic, or else # 'constant'. @@ -2997,33 +3000,49 @@ def convolution_filter( raise ValueError( "'mirror' mode is no longer available. Please raise an " "issue at https://github.com/NCAS-CMS/cf-python/issues " - "if you would like it to be re-implmented" + "if you would like it to be re-implemented." ) - # This re-imlemnetation would involve getting a 'mirror' + # This re-implementation would involve getting a 'mirror' # function added to dask.array.overlap, along similar # lines to the existing 'reflect' function in that module. + else: + boundary = mode - # Set the overlap depth large enough to accommodate the filter + # Set the overlap depth large enough to accommodate the + # filter. + # + # For instance, for a 5-point window, the calculated value at + # each point requires 2 points either side if the filter is + # centred (i.e. origin is 0) and (up to) 3 points either side + # if origin is 1 or -1. + # + # It is a restriction of dask.array.map_overlap that we can't + # use asymmetric halos for general 'boundary' types. size = len(window) - centre = int(size / 2) + depth = int(size / 2) if not origin and not size % 2: - centre += 1 + depth += 1 - depth = {iaxis: centre + abs(origin)} + depth += abs(origin) + dx = d._get_dask() + + # Cast to float to ensure that NaNs can be stored (as required + # by cf_convolve1d) + if dx.dtype != float: + dx = dx.astype(float, copy=False) + + # Convolve each chunk convolve1d = partial( cf_convolve1d, window=window, axis=iaxis, origin=origin ) - dx = d._get_dask() - if dx.dtype.kind == "i": - dx = dx.astype(float) - dx = dx.map_overlap( convolve1d, - depth=depth, + depth={iaxis: depth}, boundary=boundary, trim=True, + meta=np.array((), dtype=float), ) d._set_dask(dx, reset_mask_hardness=True) From 041e6b311bb7306f3381d33db0ab45ce124c7f50 Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Tue, 11 Jan 2022 01:48:42 +0000 Subject: [PATCH 3/3] Revert cfdm versioning req. changes for CI job runs --- cf/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cf/__init__.py b/cf/__init__.py index 463bf4e18f..25f8ca8446 100644 --- a/cf/__init__.py +++ b/cf/__init__.py @@ -194,8 +194,8 @@ ) # Check the version of cfdm -_minimum_vn = "1.9.0.1" -_maximum_vn = "1.9.1.0" +_minimum_vn = "1.8.9.0" +_maximum_vn = "1.8.10.0" _cfdm_version = LooseVersion(cfdm.__version__) if not LooseVersion(_minimum_vn) <= _cfdm_version < LooseVersion(_maximum_vn): raise RuntimeError(