diff --git a/cf/data/data.py b/cf/data/data.py index acb2c59632..48e88b7cb1 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -113,6 +113,7 @@ dask_compatible, first_non_missing_value, new_axis_identifier, + scalar_masked_array, ) # from .chunk_utils import ( # is_small,; is_very_small, @@ -1373,6 +1374,22 @@ def _set_dask( `None` """ + if array is NotImplemented: + logger.warning( + "NotImplemented has been set in the place of a dask array" + ) + # This could occur if any sort of exception is raised by + # function that is run on chunks (such as + # `cf.Data._cf_where`). Such a function could get run at + # definition time in order to ascertain suitability (such + # as data type casting, braodcasting, etc.). Note that the + # exception may be hard to diagnose, as dask will have + # silently trapped it and trapped it and returned + # NotImplemented (for instance, see + # `dask.array.core.elemwise`). Print statements in a local + # copy of dask is prossibly the way to go if the cause of + # the error is not obvious by inspection. + if copy: array = array.copy() @@ -5827,16 +5844,12 @@ def Units(self, value): else: dtype = _dtype_float - # Apply the units conversion to the data - self._map_blocks( - partial( - Units.conform, - from_units=old_units, - to_units=value, - inplace=False, - ), - dtype=dtype, - ) + def cf_Units(x): + return Units.conform( + x=x, from_units=old_units, to_units=value, inplace=False + ) + + self._map_blocks(cf_Units, dtype=dtype) self._Units = value @@ -8364,7 +8377,6 @@ def asdata(cls, d, dtype=None, copy=False): if dtype is not None and np.dtype(dtype) != data.dtype: data = data.copy() data.dtype = dtype - # --- End: if return data @@ -11431,85 +11443,99 @@ def fits_in_one_chunk_in_memory(self, itemsize): @_deprecated_kwarg_check("i") @_inplace_enabled(default=False) @_manage_log_level_via_verbosity + @daskified(1) def where( self, condition, x=None, y=None, inplace=False, i=False, verbose=None ): - """Assign to data elements depending on a condition. + """Assign array elements depending on a condition. - Data can be changed by assigning to elements that are selected by - a condition based on the data values. - - Different values can be assigned to where the conditions are, and - are not, met. + The elements to be changed are identified by a + condition. Different values can be assigned according to where + the condition is True (assignment from the *x* parameter) or + False (assignment from the *y* parameter). **Missing data** - Data array elements may be set to missing values by assigning them - to the `cf.masked` constant, or by assignment missing data - elements of array-valued *x* and *y* parameters. + Array elements may be set to missing values if either *x* or + *y* are the `cf.masked` constant, or by assignment from any + missing data elements in *x* or *y*. + + If the data mask is hard (see the `hardmask` attribute) then + missing data values in the array will not be overwritten, + regardless of the content of *x* and *y*. + + If the *condition* contains missing data then the + corresponding elements in the array will not be assigned to, + regardless of the contents of *x* and *y*. + + **Broadcasting** + + The array and the *condition*, *x* and *y* parameters must all + be broadcastable to each other, such that the shape of the + result is identical to the orginal shape of the array. - By default the data mask is "hard", meaning that masked values can - not be changed by assigning them to another value. This behaviour - may be changed by setting the `hardmask` attribute to `False`, - thereby making the data mask "soft" and allowing masked elements - to be set to non-masked values. + If *condition* is a `Query` object then for the purposes of + broadcasting, the condition is considered to be that which is + produced by applying the query to the array. + + **Performance** + + If any of the shapes of the *condition*, *x*, or *y* + parameters, or the array, is unknown, then there is a + possibility that an unknown shape will need to be calculated + immediately by executing all delayed operations on that + object. .. seealso:: `cf.masked`, `hardmask`, `__setitem__` :Parameters: - condition: - The condition which determines how to assign values to the - data. + condition: array-like or `Query` + The condition which determines how to assign values to + the data. - In general it may be any scalar or array-like object (such - as a numpy array or `Data` instance) that is broadcastable - to the shape of the data. Assignment from the *x* and *y* - parameters will be done where elements of the condition - evaluate to `True` and `False` respectively. + Assignment from the *x* and *y* parameters will be + done where elements of the condition evaluate to + `True` and `False` respectively. + + If *condition* is a `Query` object then this implies a + condition defined by applying the query to the data. *Parameter example:* - ``d.where(d.data<0, x=-999)`` will set all data values that - are less than zero to -999. + ``d.where(d < 0, x=-999)`` will set all data + values that are less than zero to -999. *Parameter example:* - ``d.where(True, x=-999)`` will set all data values to - -999. This is equivalent to ``d[...] = -999``. + ``d.where(True, x=-999)`` will set all data values + to -999. This is equivalent to ``d[...] = -999``. *Parameter example:* - ``d.where(False, y=-999)`` will set all data values to - -999. This is equivalent to ``d[...] = -999``. + ``d.where(False, y=-999)`` will set all data values + to -999. This is equivalent to ``d[...] = -999``. *Parameter example:* - If data ``d`` has shape ``(5, 3)`` then ``d.where([True, + If ``d`` has shape ``(5, 3)`` then ``d.where([True, False, True], x=-999, y=cf.masked)`` will set data - values in columns 0 and 2 to -999, and data values in - column 1 to missing data. This works because the + values in columns 0 and 2 to -999, and data values + in column 1 to missing data. This works because the condition has shape ``(3,)`` which broadcasts to the data shape. - If *condition* is a `Query` object then this implies a - condition defined by applying the query to the data. - *Parameter example:* - ``d.where(cf.lt(0), x=-999)`` will set all data values - that are less than zero to -999. This is equivalent to - ``d.where(d<0, x=-999)``. + ``d.where(cf.lt(0), x=-999)`` will set all data + values that are less than zero to -999. This is + equivalent to ``d.where(d < 0, x=-999)``. - x, y: *optional* - Specify the assignment values. Where the condition - evaluates to `True`, assign to the data from *x*, and - where the condition evaluates to `False`, assign to the - data from *y*. The *x* and *y* parameters are each one of: + x, y: array-like or `None` + Specify the assignment values. Where the condition is + True assign to the data from *x*, and where the + condition is False assign to the data from *y*. - * `None`. The appropriate data elements array are - unchanged. This the default. + If *x* is `None` (the default) then no assignment is + carried out where the condition is True. - * Any scalar or array-like object (such as a numpy array, - or `Data` instance) that is broadcastable to the shape - of the data. - - .. + If *y* is `None` (the default) then no assignment is + carried out where the condition is False. *Parameter example:* ``d.where(condition)``, for any ``condition``, returns @@ -11520,6 +11546,12 @@ def where( sign of all negative data values, and set all other data values to missing data. + *Parameter example:* + ``d.where(cf.lt(0), x=-d)`` will change the sign of + all negative data values, and leave all other data + values unchanged. This is equivalent to, but faster + than, ``d.where(cf.lt(0), x=-d, y=d)`` + {{inplace: `bool`, optional}} {{verbose: `int` or `str` or `None`, optional}} @@ -11532,354 +11564,152 @@ def where( The new data with updated values, or `None` if the operation was in-place. - **Examples:** - - """ - - def _slice_to_partition(data, indices): - """Return a numpy array for the part of the input data which - spans the given indices. - - :Parameters: - - data: `cf.Data` - - indices: `tuple` - - :Returns: - - `numpy.ndarray` - - """ - indices2 = [ - (slice(0, 1) if n == 1 else i) - for n, i in zip(data.shape[::-1], indices[::-1]) - ] - - return data[tuple(indices2)[::-1]].array - - # --- End: def + **Examples** - def _is_broadcastable(data0, data1, do_not_broadcast, is_scalar): - """Check that the data1 is broadcastable to data0 and return - data1, as a python scalar if possible. + >>> d = cf.Data([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> e = d.where(d < 5, d, 10 * d) + >>> print(e.array) + [ 0 1 2 3 4 50 60 70 80 90] - .. note:: The input lists are updated inplace. + >>> d = cf.Data([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'km') + >>> e = d.where(d < 5, cf.Data(10000 * d, 'metre')) + >>> print(e.array) + [ 0. 10. 20. 30. 40. 5. 6. 7. 8. 9.] - :Parameters: + >>> e = d.where(d < 5, cf.masked) + >>> print(e.array) + [-- -- -- -- -- 5 6 7 8 9] - data0: `Data` + >>> d = cf.Data([[1, 2,], + ... [3, 4]]) + >>> e = d.where([[True, False], [True, True]], d, [[9, 8], [7, 6]]) + >>> print(e.array) + [[1 8] + [3 4]] + >>> e = d.where([[True, False], [True, True]], [[9, 8], [7, 6]]) + >>> print(e.array) + [[9 2] + [7 6]] - data1: `Data` + The shape of the result must have the same shape as the + original data: - do_not_broadcast: `list` + >>> e = d.where([True, False], [9, 8]) + >>> print(e.array) + [[9 2] + [9 4]] - is_scalar: `list` + >>> d = cf.Data(np.array([[0, 1, 2], + ... [0, 2, 4], + ... [0, 3, 6]])) + >>> d.where(d < 4, None, -1) + >>> print(e.array) + [[ 0 1 2] + [ 0 2 -1] + [ 0 3 -1]] + + >>> x, y = np.ogrid[:3, :4] + >>> print(x) + [[0] + [1] + [2]] + >>> print(y) + [[0 1 2 3]] + >>> condition = x < y + >>> print(condition) + [[False True True True] + [False False True True] + [False False False True]] + >>> d = cf.Data(x) + >>> e = d.where(condition, d, 10 + y) + ... + ValueError: where: Broadcasting the 'condition' parameter with shape (3, 4) would change the shape of the data with shape (3, 1) - :Returns: + >>> d = cf.Data(np.arange(9).reshape(3, 3)) + >>> e = d.copy() + >>> e[1, 0] = cf.masked + >>> f = e.where(d > 5, None, -3.1416) + >>> print(f.array) + [[-3.1416 -3.1416 -3.1416] + [-- -3.1416 -3.1416] + [6.0 7.0 8.0]] + >>> e.soften_mask() + >>> f = e.where(d > 5, None, -3.1416) + >>> print(f.array) + [[-3.1416 -3.1416 -3.1416] + [-3.1416 -3.1416 -3.1416] + [ 6. 7. 8. ]] - `Data` or scalar - Return *data1* or, if possible, ``data1.datum(0)``. + """ + d = _inplace_enabled_define_and_cleanup(self) - """ - shape0 = data0._shape - shape1 = data1._shape - size1 = data1._size - - if shape1 == shape0: - do_not_broadcast.append(True) - is_scalar.append(False) - - elif size1 == 1: - do_not_broadcast.append(False) - is_scalar.append(True) - # Replace data1 with its scalar value - data1 = data1.datum(0) - - elif data1._ndim <= data0._ndim and size1 < data0._size: - do_not_broadcast.append(False) - is_scalar.append(False) - for n, m in zip(shape1[::-1], shape0[::-1]): - if n != m and n != 1: - raise ValueError( - "where: Can't broadcast data with shape {} to " - "shape {}".format(shape1, shape0) - ) - else: - raise ValueError( - "where: Can't broadcast data with shape {} to " - "shape {}".format(shape1, shape0) - ) + units = d.Units + dx = d._get_dask() - return data1 + # Parse condition + if getattr(condition, "isquery", False): + # Condition is a cf.Query object: Make sure that the + # condition units are OK, and convert the condition to a + # boolean dask array with the same shape as the data. + condition = condition.copy() + condition = condition.set_condition_units(units) + condition = condition.evaluate(d) - # --- End: def + condition = type(self).asdata(condition) + _where_broadcastable(d, condition, "condition") - d = _inplace_enabled_define_and_cleanup(self) + # If x or y is self then change it to None. This prevents an + # unnecessary copy; and, at compute time, an unncessary numpy + # where. + if x is self: + x = None - logger.debug(" data.shape = {}".format(d.shape)) # pragma: no cover - logger.debug( - " condition = {!r}".format(condition) - ) # pragma: no cover + if y is self: + y = None if x is None and y is None: - # The data is unchanged regardless of condition - if inplace: - d = None + # The data is unchanged regardless of the condition return d - do_not_broadcast = [] - is_scalar = [] - - # # ------------------------------------------------------------ - # # Make sure that the condition is a cf.Data object - # # ------------------------------------------------------------ - # - # if not isinstance(condition, d.__class__): - # condition = type(d)(condition) - - # ------------------------------------------------------------ - # Check that the input condition is broadcastable - # ------------------------------------------------------------ - condition = Data.asdata(condition, copy=False) - condition = _is_broadcastable( - d, condition, do_not_broadcast, is_scalar - ) - - # if isinstance(condition, Query): - # condition = condition.evaluate(f).Data - # ------------------------------------------------------------ - # Parse inputs x and y so that each is one of A) None, B) a - # scalar or C) a data array with the same shape as the master - # array - # ------------------------------------------------------------ + # Parse x and y xy = [] - for value in (x, y): - if value is None or value is cf_masked: - do_not_broadcast.append(False) - is_scalar.append(True) - - else: - # Make sure that the value is a cf.Data object and has - # compatible units - if not isinstance(value, d.__class__): - value = type(d)(value) - else: - if value.Units.equivalent(d.Units): - if not value.Units.equals(d.Units): - value = value.copy() - value.Units = d.Units - elif value.Units: - raise ValueError( - "where: Can't assign values with " - "units {!r} to data with units {!r}".format( - value.Units, d.Units - ) - ) - # --- End: if - - # Check that the value is broadcastable - value = _is_broadcastable( - d, value, do_not_broadcast, is_scalar - ) - # --- End: if - - xy.append(value) - # --- End: for - - (x, y) = xy - (condition_is_scalar, x_is_scalar, y_is_scalar) = is_scalar - broadcast = not any(do_not_broadcast) - - logger.debug(" x = {!r}".format(x)) # pragma: no cover - logger.debug(" y = {!r}".format(y)) # pragma: no cover - logger.debug( - " condition_is_scalar = {!r}".format(condition_is_scalar) - ) # pragma: no cover - logger.debug( - " x_is_scalar = {!r}".format(x_is_scalar) - ) # pragma: no cover - logger.debug( - " y_is_scalar = {!r}".format(y_is_scalar) - ) # pragma: no cover - logger.debug( - " broadcast = {!r}".format(broadcast) - ) # pragma: no cover - - # ------------------------------------------------------------- - # Try some short cuts if the condition is a scalar - # ------------------------------------------------------------- - if condition_is_scalar and not getattr(condition, "isquery", False): - logger.debug( - " Condition is a scalar: {} {}".format( - condition, type(condition) - ) - ) - if condition: - if x is not None: - d[...] = x - - if inplace: - d = None - return d - else: - if y is not None: - d[...] = y - - if inplace: - d = None - return d - # --- End: if - - # Still here? - hardmask = d.hardmask - config = d.partition_configuration(readonly=False) # or True? - - for partition in d.partitions.matrix.flat: - logger.debug(" Partition:") # pragma: no cover - - partition.open(config) - array = partition.array - # -------------------------------------------------------- - # Find the master array indices for this partition - # -------------------------------------------------------- - shape = array.shape - indices = partition.indices - - # -------------------------------------------------------- - # Find the condition for this partition - # -------------------------------------------------------- - if getattr(condition, "isquery", False): - if hasattr(condition._value, "_Units"): - # Ensure query data has equal units before evaluation - orig_condition_units = condition._value._Units - p_units = partition.Units - if orig_condition_units.equivalent(p_units): - if not orig_condition_units.equals(p_units): - # Convert equivalent units to equal units - condition._value._Units = p_units - else: - raise ValueError( - "where: Can't apply a query condition with " - "units '{!s}' on data with non-equivalent " - "units '{!s}'".format( - orig_condition_units, p_units - ) - ) - c = condition.evaluate(array) - elif condition_is_scalar: - c = condition - else: - c = _slice_to_partition(condition, indices) - - c_masked = np.ma.isMA(c) and np.ma.is_masked(c) - - # -------------------------------------------------------- - # Find value to use where condition is True for this - # partition - # -------------------------------------------------------- - if x_is_scalar: - if x is None: - # Use d - T = array - T_masked = partition.masked - else: - T = x - T_masked = x is cf_masked - else: - T = _slice_to_partition(x, indices) - T_masked = np.ma.isMA(T) and np.ma.is_masked(T) - - # -------------------------------------------------------- - # Find value to use where condition is False for this - # partition - # -------------------------------------------------------- - if y_is_scalar: - if y is None: - # Use d - F = array - F_masked = partition.masked - else: - F = y - F_masked = y is cf_masked - else: - F = _slice_to_partition(y, indices) - F_masked = np.ma.isMA(F) and np.ma.is_masked(F) - - # -------------------------------------------------------- - # Make sure that at least one of the arrays is the same - # shape as the partition - # -------------------------------------------------------- - if broadcast: - if x is cf_masked or y is cf_masked: - c = _broadcast(c, shape) - else: - max_sizes = max((np.size(c), np.size(T), np.size(F))) - if np.size(c) == max_sizes: - c = _broadcast(c, shape) - elif np.size(T) == max_sizes: - T = _broadcast(T, shape) - else: - F = _broadcast(F, shape) - # --- End: if + for arg, name in zip((x, y), ("x", "y")): + if arg is None: + xy.append(arg) + continue + + if arg is cf_masked: + # Replace masked constant with array + xy.append(scalar_masked_array(self.dtype)) + continue + + arg = type(self).asdata(arg) + _where_broadcastable(d, arg, name) + + if arg.Units: + # Make sure that units are OK. + arg = arg.copy() + try: + arg.Units = units + except ValueError: + raise ValueError( + f"where: {name!r} parameter units {arg.Units!r} " + f"are not equivalent to data units {units!r}" + ) - logger.debug(" array = {}".format(array)) # pragma: no cover - logger.debug(" c = {}".format(c)) # pragma: no cover - logger.debug(" T = {}".format(T)) # pragma: no cover - logger.debug(" F = {}".format(F)) # pragma: no cover + xy.append(arg._get_dask()) - # -------------------------------------------------------- - # Create a numpy array which takes vales from T where c - # is True and from F where c is False - # -------------------------------------------------------- - if T_masked or F_masked: - # T and/or F have missing data - new = np.ma.where(c, T, F) - if c_masked: - new = np.ma.where(c.mask, array, new) - - if partition.masked: - if hardmask: - # The original partition has missing data and - # a hardmask, so apply the original - # partition's mask to the new array. - new.mask |= array.mask - elif not np.ma.is_masked(new): - # The original partition has missing data and - # a softmask and the new array doesn't have - # missing data, so turn the new array into an - # unmasked array. - new = new.data[...] - - elif not np.ma.is_masked(new): - # The original partition doesn't have missing data - # and neither does the new array, so turn the new - # array into an unmasked array. - new = new.data[...] + x, y = xy - else: - # Neither T nor F have missing data - new = np.where(c, T, F) - if c_masked: - new = np.ma.where(c.mask, array, new) - - if partition.masked and hardmask: - # The original partition has missing data and a - # hardmask, so apply the original partition's mask - # to the new array. - new = np.ma.masked_where(array.mask, new, copy=False) - # --- End: if - - # -------------------------------------------------------- - # Replace the partition's subarray with the new numpy - # array - # -------------------------------------------------------- - logger.debug(" new = {}".format(new)) # pragma: no cover + # Apply the where operation + dx = da.core.elemwise( + _cf_where, dx, dask_compatible(condition), x, y, d.hardmask + ) + d._set_dask(dx) - partition.subarray = new - - partition.close() - # --- End: for + # Note: No need to run _reset_mask_hardness at this point + # because the mask hardness has already been correctly + # set in _cf_where. return d @@ -13584,3 +13414,146 @@ def _broadcast(a, shape): tile = shape[0 : len(shape) - len(a_shape)] + tuple(tile[::-1]) return np.tile(a, tile) + + +def _where_broadcastable(data, x, name): + """Check broadcastability for `where` assignments. + + Raises an exception if the result of broadcasting *data* and *x* + together does not have the same shape as *data*. + + .. versionadded:: TODODASK + + .. seealso:: `where` + + :Parameters: + + data, x: `Data` + The arrays to compare. + + name: `str` + A name for *x* that is used in any exception error + message. + + :Returns: + + `bool` + If *x* is acceptably broadcastable to *data* then `True` + is returned, otherwise a `ValueError` is raised. + + """ + ndim_x = x.ndim + if not ndim_x: + return True + + ndim_data = data.ndim + if ndim_x > ndim_data: + raise ValueError( + f"where: Broadcasting the {name!r} parameter with {ndim_x} " + f"dimensions would change the shape of the data with " + f"{ndim_data} dimensions" + ) + + shape_x = x.shape + shape_data = data.shape + for n, m in zip(shape_x[::-1], shape_data[::-1]): + if n != m and n != 1: + raise ValueError( + f"where: Broadcasting the {name!r} parameter with shape " + f"{shape_x} would change the shape of the data with shape " + f"{shape_data}" + ) + + return True + + +def _cf_where(array, condition, x, y, hardmask): + """Set elements of *array* from *x* or *y* depending on *condition*. + + The input *array* is not changed in-place. + + See `where` for details on the expected functionality. + + .. versionadded:: TODODASK + + .. seealso:: `cf.Data.where` + + :Parameters: + + array: numpy.ndarray + The array to be assigned to. + + condition: numpy.ndarray + Where False or masked, assign from *y*, otherwise assign + from *x*. + + x: numpy.ndarray or `None` + *x* and *y* must not both be `None`. + + y: numpy.ndarray or `None` + *x* and *y* must not both be `None`. + + hardmask: `bool` + Set the mask hardness for a returned masked array. If True + then a returned masked array will have a hardened mask, and + the mask of the input *array* (if there is one) will be + applied to the returned array, in addition to any masked + elements arising from assignments from *x* or *y*. + + :Returns: + + `numpy.ndarray` + A copy of the input *array* with elements from *y* where + *condition* is False or masked, and elements from *x* + elsewhere. + + """ + mask = None + + if np.ma.isMA(array): + # Do a masked where + where = np.ma.where + if hardmask: + mask = array.mask + elif np.ma.isMA(x) or np.ma.isMA(y): + # Do a masked where + where = np.ma.where + else: + # Do a non-masked where + where = np.where + hardmask = False + + condition_is_masked = np.ma.isMA(condition) + if condition_is_masked: + condition = condition.astype(bool) + + if x is not None: + # Assign values from x + if condition_is_masked: + # Replace masked elements of condition with False, so that + # masked locations are assigned from array + c = condition.filled(False) + else: + c = condition + + array = where(c, x, array) + + if y is not None: + # Assign values from y + if condition_is_masked: + # Replace masked elements of condition with True, so that + # masked locations are assigned from array + c = condition.filled(True) + else: + c = condition + + array = where(c, array, y) + + if hardmask: + if mask is not None and mask.any(): + # Apply the mask from the input array to the result + array.mask |= mask + + array.harden_mask() + + return array diff --git a/cf/data/utils.py b/cf/data/utils.py index fb077ebac9..da0499c24b 100644 --- a/cf/data/utils.py +++ b/cf/data/utils.py @@ -269,15 +269,17 @@ def new_axis_identifier(existing_axes=(), basename="dim"): def chunk_positions(chunks): """Find the position of each chunk. - .. versionadded:: 4.0.0 + .. versionadded:: TODODASK .. seealso:: `chunk_shapes` :Parameters: chunks: `tuple` + The chunk sizes along each dimension, as output by + `dask.array.Array.chunks`. - **Examples:** + **Examples** >>> chunks = ((1, 2), (9,), (44, 55, 66)) >>> for position in chunk_positions(chunks): @@ -297,15 +299,17 @@ def chunk_positions(chunks): def chunk_shapes(chunks): """Find the shape of each chunk. - .. versionadded:: 4.0.0 + .. versionadded:: TODODASK .. seealso:: `chunk_positions` :Parameters: chunks: `tuple` + The chunk sizes along each dimension, as output by + `dask.array.Array.chunks`. - **Examples:** + **Examples** >>> chunks = ((1, 2), (9,), (4, 5, 6)) >>> for shape in chunk_shapes(chunks): @@ -368,6 +372,51 @@ def dask_compatible(a): return a +def scalar_masked_array(dtype=float): + """Return a scalar masked array. + + .. versionadded:: TODODASK + + :Parmaeters: + + dtype: data-type, optional + Desired output data-type for the array, e.g, + `numpy.int8`. Default is `numpy.float64`. + + :Returns: + + `np.ma.core.MaskedArray` + The scalar masked array. + + **Examples** + + >>> scalar_masked_array() + masked_array(data=--, + mask=True, + fill_value=1e+20, + dtype=float64) + >>> scalar_masked_array(dtype('int32')) + masked_array(data=--, + mask=True, + fill_value=999999, + dtype=int32) + >>> scalar_masked_array('U45') + masked_array(data=--, + mask=True, + fill_value='N/A', + dtype='>> scalar_masked_array(bool) + masked_array(data=--, + mask=True, + fill_value=True, + dtype=bool) + + """ + a = np.ma.empty((), dtype=dtype) + a.mask = True + return a + + def conform_units(value, units): """Conform units. @@ -377,7 +426,7 @@ def conform_units(value, units): unchanged; * if the value units are equivalent to *units* then a copy of - *value* convert to *units* is returned; + *value* converted to *units* is returned; * if the value units are not equivalent to *units* then an exception is raised. @@ -431,3 +480,4 @@ def conform_units(value, units): ) return value + diff --git a/cf/query.py b/cf/query.py index d9f05f1a80..8aed2c4aa5 100644 --- a/cf/query.py +++ b/cf/query.py @@ -1,4 +1,5 @@ import logging +from copy import deepcopy from operator import __and__ as operator_and from operator import __or__ as operator_or @@ -273,7 +274,7 @@ def __and__(self, other): new = Q.__new__(Q) new._operator = None - new._compound = (self, other) + new._compound = (self.copy(), other.copy()) new._bitwise_operator = operator_and new._attr = () @@ -332,7 +333,7 @@ def __str__(self): attr = ".".join(self._attr) if not self._compound: - out = f"{attr}({self._operator} " + str(self._value) + out = f"{attr}({self._operator} {self._value})" else: bitwise_operator = repr(self._bitwise_operator) if "and_" in bitwise_operator: @@ -465,7 +466,18 @@ def copy(self): >>> r = q.copy() """ - return self # TODO + Q = type(self) + new = Q.__new__(Q) + + d = self.__dict__.copy() + new.__dict__ = d + + if d["_compound"]: + d["_compound"] = deepcopy(d["_compound"]) + else: + d["_value"] = deepcopy(d["_value"]) + + return new @_display_or_return def dump(self, display=True): @@ -745,6 +757,83 @@ def inspect(self): """ print(_inspect(self)) # pragma: no cover + def set_condition_units(self, units): + """Set units of condition values in-place. + + .. versionadded:: TODO + + :Parameters: + + units: `str` or `Units` + + The units to be set on all condition values. + + :Returns: + + `None` + + **Examples** + + >>> q = cf.lt(9) + >>> q + + >>> q.set_condition_units('km') + >>> q + + >>> q.set_condition_units('seconds') + ... + ValueError: Units are not equivalent to query condition units + + >>> q = cf.lt(9, units='m') + >>> q + + >>> q.set_condition_units('km') + >>> q + + + >>> q = cf.lt(9) + >>> r = cf.ge(3000, units='m') + >>> s = q & r + >>> s + + >>> s.set_condition_units('km') + >>> s + + >>> q + + >>> r + + + """ + units = Units(units) + + compound = self._compound + if compound: + for r in compound: + r.set_condition_units(units) + + return + + value = self._value + if value is None: + return + + value_units = getattr(value, "Units", None) + if value_units is None: + # Value has no units + value = Data(value, units=units) + else: + # Value already has units + try: + value.Units = units + except ValueError: + raise ValueError( + f"Units {units!r} are not equivalent to " + f"query condition units {value_units!r}" + ) + + self._value = value + # ---------------------------------------------------------------- # Deprecated attributes and methods # ---------------------------------------------------------------- diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 6dd4648ab5..2a23626279 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3257,6 +3257,87 @@ def test_Data_has_calendar(self): d = cf.Data(1, "days since 2000-1-1", calendar="noleap") self.assertTrue(d.has_calendar()) + def test_Data_where(self): + a = np.arange(10) + d = cf.Data(a) + b = np.where(a < 5, a, 10 * a) + e = d.where(a < 5, d, 10 * a) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + d = cf.Data(a, "km") + b = np.where(a < 5, 10 * a, a) + e = d.where(a < 5, cf.Data(10000 * a, "metre")) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + a = np.array( + [ + [ + 1, + 2, + ], + [3, 4], + ] + ) + d = cf.Data(a) + b = np.where([[True, False], [True, True]], a, [[9, 8], [7, 6]]) + e = d.where([[True, False], [True, True]], d, [[9, 8], [7, 6]]) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + b = np.where([[True, False], [True, True]], [[9, 8], [7, 6]], a) + e = d.where([[True, False], [True, True]], [[9, 8], [7, 6]]) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + b = np.where([True, False], [9, 8], a) + e = d.where([True, False], [9, 8]) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + a = np.array([[0, 1, 2], [0, 2, 4], [0, 3, 6]]) + d = cf.Data(a) + b = np.where(a < 4, a, -1) + e = d.where(a < 4, d, -1) + self.assertTrue(e.shape == b.shape) + self.assertTrue((e.array == b).all()) + + x, y = np.ogrid[:3, :4] + d = cf.Data(x) + with self.assertRaises(ValueError): + # Can't change shape + d.where(x < y, d, 10 + y) + + with self.assertRaises(ValueError): + # Can't change shape + d.where(False, d, 10 + y) + + a = np.ma.arange(9, dtype=int).reshape(3, 3) + d = cf.Data(a, mask=[[0, 0, 0], [1, 0, 0], [0, 0, 0]]) + e = d.where(a > 5, None, -999) + self.assertTrue(e.shape == d.shape) + self.assertTrue((e.array.mask == d.array.mask).all()) + self.assertTrue( + (e.array == [[-999, -999, -999], [5, -999, -999], [6, 7, 8]]).all() + ) + + d.soften_mask() + e = d.where(a > 5, None, -999) + self.assertTrue(e.shape == d.shape) + self.assertTrue((e.array.mask == False).all()) + self.assertTrue( + ( + e.array == [[-999, -999, -999], [-999, -999, -999], [6, 7, 8]] + ).all() + ) + + a = np.arange(10) + d = cf.Data(a) + e = d.where(a < 5, cf.masked) + self.assertTrue((e.array.mask == [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]).all()) + self.assertTrue((e.array == a).all()) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_Query.py b/cf/test/test_Query.py index 92b6671cc4..105260c5ca 100644 --- a/cf/test/test_Query.py +++ b/cf/test/test_Query.py @@ -540,6 +540,32 @@ def test_Query_evaluate(self): self.assertNotEqual(x, cf.eq(re.compile(".*RTY$"))) self.assertNotEqual(x, cf.eq(re.compile("^.*RTY$"))) + def test_Query_set_condition_units(self): + q = cf.lt(9) + q.set_condition_units("km") + self.assertEqual(q.value.Units, cf.Units("km")) + + with self.assertRaises(ValueError): + q.set_condition_units("seconds") + + q = cf.lt(9000, units="m") + q.set_condition_units("km") + self.assertEqual(q.value.Units, cf.Units("km")) + self.assertEqual(q.value.array, 9) + + q = cf.lt(9) + r = cf.ge(3000, units="m") + s = q & r + s.set_condition_units("km") + self.assertEqual(s._compound[0].value.Units, cf.Units("km")) + self.assertEqual(s._compound[1].value.Units, cf.Units("km")) + self.assertEqual(s._compound[0].value.array, 9) + self.assertEqual(s._compound[1].value.array, 3) + + self.assertEqual(r.value.Units, cf.Units("m")) + self.assertEqual(r.value.array, 3000) + self.assertEqual(q.value, 9) + if __name__ == "__main__": print("Run date:", datetime.datetime.now())