From a4ef294e476d499b73510ee920e0bd26bad7ba5e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 12 Sep 2021 00:48:04 +0100 Subject: [PATCH 1/4] fix setting of non-dask array in __init__ --- cf/data/data.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index edea334fe9..d608c5c8c9 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -458,10 +458,17 @@ def __init__( "Can't set the 'source' and 'loads' parameters " "at the same time" ) - # --- End: if if source is not None: - super().__init__(source=source, _use_array=_use_array) + try: + array = source._get_Array(None) + except AttributeError: + array = None + + super().__init__( + source=source, _use_array=_use_array and array is not None + ) + if _use_array: try: array = source._get_dask() @@ -474,9 +481,7 @@ def __init__( return - super().__init__( - array=array, fill_value=fill_value, _use_array=_use_array - ) + super().__init__(array=array, fill_value=fill_value, _use_array=False) # Create the _HDF_chunks attribute: defines HDF chunking when # writing to disk. @@ -546,6 +551,10 @@ def __init__( "compressed input arrays" ) + # Save the input compressed array, as this will contain + # extra information, such as a count or index variable. + self._set_Array(array) + array = compressed_to_dask(array) elif not is_dask_collection(array): @@ -558,7 +567,6 @@ def __init__( first_value = first_non_missing_value(array) if first_value is not None: dt = hasattr(first_value, "timetuple") - # --- End: if # Convert string or object date-times to floating point # reference times From 865caeda6fb0d05a042c94c351287c7cf1323187 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 12 Sep 2021 10:53:20 +0100 Subject: [PATCH 2/4] re-instated select parameter to _read_a_file --- cf/read_write/read.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cf/read_write/read.py b/cf/read_write/read.py index 04bd0c3dbb..30744d2703 100644 --- a/cf/read_write/read.py +++ b/cf/read_write/read.py @@ -503,9 +503,9 @@ def read( .. versionadded:: 1.5 - chunks: TODO + chunks: TODODASK - .. versionadded:: 4.0.0 + .. versionadded:: TODODASK umversion: deprecated at version 3.0.0 Use the *um* parameter instead. @@ -522,7 +522,7 @@ def read( select_options: deprecated at version 3.0.0 Use methods on the returned `FieldList` instead. - chunk: deprecated at version 4.0.0 + chunk: deprecated at version TODODASK Use the *chunks* parameter instead. :Returns: @@ -600,7 +600,7 @@ def read( "cf.read", {"chunk": chunk}, "Use keyword 'chunks' instead.", - version="4.0.0", + version="TODODASK", ) # pragma: no cover # Parse select @@ -889,6 +889,7 @@ def _read_a_file( mask=True, warn_valid=False, chunks="auto", + select=None, ): """Read the contents of a single file into a field list. @@ -991,7 +992,7 @@ def _read_a_file( # return FieldList() extra_read_vars = { - "chunk": chunk, + "chunks": chunks, "fmt": selected_fmt, "ignore_read_error": ignore_read_error, # 'cfa' defaults to False. If the file has From 9314241408ae0bedbf5745c487c5cd35f5d0c95a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 13 Sep 2021 09:03:35 +0100 Subject: [PATCH 3/4] comment --- cf/data/abstract/array.py | 3 --- cf/data/data.py | 4 ++-- cf/data/gatheredarray.py | 7 +------ 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/cf/data/abstract/array.py b/cf/data/abstract/array.py index ba7e3c71dd..1ab1c485e1 100644 --- a/cf/data/abstract/array.py +++ b/cf/data/abstract/array.py @@ -25,6 +25,3 @@ def __repr__(self): def dask_asarray(self): """TODODASK.""" return False - - -# --- End: class diff --git a/cf/data/data.py b/cf/data/data.py index d608c5c8c9..694d9e51aa 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -569,7 +569,7 @@ def __init__( dt = hasattr(first_value, "timetuple") # Convert string or object date-times to floating point - # reference times + # reference times, if appropriate. if array.dtype.kind in "USO" and (dt or units.isreftime): array, units = convert_to_reftime(array, units, first_value) # Reset the units @@ -1126,7 +1126,7 @@ def _get_dask(self): def _set_dask(self, array, copy=False, delete_source=True): """Set the dask array. - .. versionadded:: 4.0.0 + .. versionadded:: TODODASK :Parameters: diff --git a/cf/data/gatheredarray.py b/cf/data/gatheredarray.py index 4fb2a47a43..b41a60b271 100644 --- a/cf/data/gatheredarray.py +++ b/cf/data/gatheredarray.py @@ -1,9 +1,7 @@ import cfdm -from . import mixin - -class GatheredArray(mixin.CompressedArray, cfdm.GatheredArray): +class GatheredArray(cfdm.GatheredArray): """An underlying gathered array. Compression by gathering combines axes of a multidimensional array @@ -16,6 +14,3 @@ class GatheredArray(mixin.CompressedArray, cfdm.GatheredArray): .. versionadded:: 3.0.0 """ - - -# --- End: class From a091728cc086f32c4b85ea08763e7a03cbc6278b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 13 Sep 2021 16:28:41 +0100 Subject: [PATCH 4/4] dask reset_mask_hardness --- cf/data/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cf/data/data.py b/cf/data/data.py index 694d9e51aa..fbbea678ec 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -476,6 +476,9 @@ def __init__( pass else: self._set_dask(array, copy=copy, delete_source=False) + # TODODASK: When PR #257 (dask: Data.__getitem__, + # Data.__setitem__) is merged, add in a + # "reset_mask_hardness=False". else: self._del_dask(None)