diff --git a/cf/data/data.py b/cf/data/data.py index 61967d8c06..4c8f0a1929 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -7250,6 +7250,7 @@ def any(self): return False + @daskified(_DASKIFIED_VERBOSE) @_inplace_enabled(default=False) def apply_masking( self, @@ -7277,8 +7278,9 @@ def apply_masking( elements exactly equal to any of the values are set to missing data. - If True then the value returned by the `get_fill_value` - method, if such a value exists, is used. + If True then the value returned by the + `get_fill_value` method, if such a value exists, is + used. Zero or more values may be provided in a sequence of scalars. @@ -7299,21 +7301,21 @@ def apply_masking( ``fill_value=[]`` valid_min: number, optional - A scalar specifying the minimum valid value. Data elements - strictly less than this number will be set to missing - data. + A scalar specifying the minimum valid value. Data + elements strictly less than this number will be set to + missing data. valid_max: number, optional - A scalar specifying the maximum valid value. Data elements - strictly greater than this number will be set to missing - data. + A scalar specifying the maximum valid value. Data + elements strictly greater than this number will be set + to missing data. valid_range: (number, number), optional - A vector of two numbers specifying the minimum and maximum - valid values, equivalent to specifying values for both - *valid_min* and *valid_max* parameters. The *valid_range* - parameter must not be set if either *valid_min* or - *valid_max* is defined. + A vector of two numbers specifying the minimum and + maximum valid values, equivalent to specifying values + for both *valid_min* and *valid_max* parameters. The + *valid_range* parameter must not be set if either + *valid_min* or *valid_max* is defined. *Parameter example:* ``valid_range=[-999, 10000]`` is equivalent to setting @@ -7327,54 +7329,52 @@ def apply_masking( The data with masked values. If the operation was in-place then `None` is returned. - **Examples:** + **Examples** >>> import numpy - >>> d = Data(numpy.arange(12).reshape(3, 4), 'm') - >>> d[1, 1] = masked + >>> d = cf.Data(numpy.arange(12).reshape(3, 4), 'm') + >>> d[1, 1] = cf.masked >>> print(d.array) - [[0 1 2 3] - [4 -- 6 7] - [8 9 10 11]] - + [[0 1 2 3] + [4 -- 6 7] + [8 9 10 11]] >>> print(d.apply_masking().array) - [[0 1 2 3] - [4 -- 6 7] - [8 9 10 11]] + [[0 1 2 3] + [4 -- 6 7] + [8 9 10 11]] >>> print(d.apply_masking(fill_values=[0]).array) - [[-- 1 2 3] - [ 4 -- 6 7] - [ 8 9 10 11]] + [[-- 1 2 3] + [4 -- 6 7] + [8 9 10 11]] >>> print(d.apply_masking(fill_values=[0, 11]).array) - [[-- 1 2 3] - [ 4 -- 6 7] - [ 8 9 10 --]] - + [[-- 1 2 3] + [4 -- 6 7] + [8 9 10 --]] >>> print(d.apply_masking(valid_min=3).array) - [[-- -- -- 3] - [ 4 -- 6 7] - [ 8 9 10 11]] + [[-- -- -- 3] + [4 -- 6 7] + [8 9 10 11]] >>> print(d.apply_masking(valid_max=6).array) - [[ 0 1 2 3] - [ 4 -- 6 --] + [[0 1 2 3] + [4 -- 6 --] [-- -- -- --]] >>> print(d.apply_masking(valid_range=[2, 8]).array) - [[-- -- 2 3] - [ 4 -- 6 7] - [ 8 -- -- --]] - + [[-- -- 2 3] + [4 -- 6 7] + [8 -- -- --]] >>> d.set_fill_value(7) >>> print(d.apply_masking(fill_values=True).array) - [[0 1 2 3] - [4 -- 6 --] - [8 9 10 11]] + [[0 1 2 3] + [4 -- 6 --] + [8 9 10 11]] >>> print(d.apply_masking(fill_values=True, ... valid_range=[2, 8]).array) - [[-- -- 2 3] - [ 4 -- 6 --] - [ 8 -- -- --]] + [[-- -- 2 3] + [4 -- 6 --] + [8 -- -- --]] """ + # Parse valid_range if valid_range is not None: if valid_min is not None or valid_max is not None: raise ValueError( @@ -7396,8 +7396,7 @@ def apply_masking( valid_min, valid_max = valid_range - d = _inplace_enabled_define_and_cleanup(self) - + # Parse fill_values if fill_values is None: fill_values = False @@ -7412,45 +7411,45 @@ def apply_masking( fill_values = () else: try: - _ = iter(fill_values) + iter(fill_values) except TypeError: raise TypeError( "'fill_values' parameter must be a sequence or " - "of type bool. Got type {}".format(type(fill_values)) + f"of type bool. Got type {type(fill_values)}" ) else: if isinstance(fill_values, str): raise TypeError( "'fill_values' parameter must be a sequence or " - "of type bool. Got type {}".format(type(fill_values)) + f"of type bool. Got type {type(fill_values)}" ) - # --- End: if - mask = None + d = _inplace_enabled_define_and_cleanup(self) + dx = self._get_dask() + mask = None if fill_values: - mask = d == fill_values[0] + mask = dx == fill_values[0] for fill_value in fill_values[1:]: - mask |= d == fill_value - # --- End: for + mask |= dx == fill_value if valid_min is not None: if mask is None: - mask = d < valid_min + mask = dx < valid_min else: - mask |= d < valid_min - # --- End: if + mask |= dx < valid_min if valid_max is not None: if mask is None: - mask = d > valid_max + mask = dx > valid_max else: - mask |= d > valid_max - # --- End: if + mask |= dx > valid_max if mask is not None: - d.where(mask, cf_masked, inplace=True) + dx = da.ma.masked_where(mask, dx) + + d._set_dask(dx, reset_mask_hardness=True) return d diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index c84de4a7d2..c8ad1a1d9c 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -492,12 +492,6 @@ def test_Data_halo(self): d.halo(4) def test_Data_mask(self): - if self.test_only and inspect.stack()[0][3] not in self.test_only: - return - - # TODODASK: once test_Data_apply_masking is passing after daskification - # of apply_masking, might make sense to combine this test with that? - # Test for a masked Data object (having some masked points) a = self.ma d = cf.Data(a, units="m") @@ -531,53 +525,52 @@ def test_Data_mask(self): self.assertTrue(d3.mask.hardmask) self.assertTrue(d3.mask.array[1], True) - @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'") def test_Data_apply_masking(self): - if self.test_only and inspect.stack()[0][3] not in self.test_only: - return + a = np.ma.arange(12).reshape(3, 4) + a[1, 1] = np.ma.masked + d = cf.Data(a, units="m", chunks=2) - a = self.ma - d = cf.Data(a, units="m") + self.assertIsNone(d.apply_masking(inplace=True)) - b = a.copy() + b = a e = d.apply_masking() self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where(a == 0, np.ma.masked, a) + b = np.ma.masked_where(a == 0, a) e = d.apply_masking(fill_values=[0]) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where((a == 0) | (a == 11), np.ma.masked, a) + b = np.ma.masked_where((a == 0) | (a == 11), a) e = d.apply_masking(fill_values=[0, 11]) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where(a < 30, np.ma.masked, a) - e = d.apply_masking(valid_min=30) + b = np.ma.masked_where(a < 3, a) + e = d.apply_masking(valid_min=3) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where(a > -60, np.ma.masked, a) - e = d.apply_masking(valid_max=-60) + b = np.ma.masked_where(a > 8, a) + e = d.apply_masking(valid_max=8) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where((a < -20) | (a > 80), np.ma.masked, a) - e = d.apply_masking(valid_range=[-20, 80]) + b = np.ma.masked_where((a < 2) | (a > 8), a) + e = d.apply_masking(valid_range=[2, 8]) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - d.set_fill_value(70) + d.set_fill_value(7) - b = np.ma.where(a == 70, np.ma.masked, a) + b = np.ma.masked_where(a == 7, a) e = d.apply_masking(fill_values=True) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all()) - b = np.ma.where((a == 70) | (a < 20) | (a > 80), np.ma.masked, a) - e = d.apply_masking(fill_values=True, valid_range=[20, 80]) + b = np.ma.masked_where((a == 7) | (a < 2) | (a > 8), a) + e = d.apply_masking(fill_values=True, valid_range=[2, 8]) self.assertTrue((b == e.array).all()) self.assertTrue((b.mask == e.mask.array).all())