diff --git a/cf/data/data.py b/cf/data/data.py index a7a37484d6..5ecdf7d2f9 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -832,78 +832,75 @@ def __iter__(self): x.__iter__() <==> iter(x) - **Examples:** + **Performance** + + If the shape of the data is unknown then it is calculated + immediately by executing all delayed operations. + + **Examples** >>> d = cf.Data([1, 2, 3], 'metres') >>> for e in d: - ... print(repr(e)) + ... print(repr(e)) ... - 1 - 2 - 3 + + + - >>> d = cf.Data([[1, 2], [4, 5]], 'metres') + >>> d = cf.Data([[1, 2], [3, 4]], 'metres') >>> for e in d: - ... print(repr(e)) + ... print(repr(e)) ... - + - >>> d = cf.Data(34, 'metres') + >>> d = cf.Data(99, 'metres') >>> for e in d: ... print(repr(e)) - .. + ... + Traceback (most recent call last): + ... TypeError: iteration over a 0-d Data """ - ndim = self._ndim - - if not ndim: - raise TypeError( - "Iteration over 0-d {}".format(self.__class__.__name__) - ) + try: + n = len(self) + except TypeError: + raise TypeError(f"iteration over a 0-d {self.__class__.__name__}") - elif ndim == 1: - if self.fits_in_memory(self.dtype.itemsize): - i = iter(self.array) - while 1: - try: - yield next(i) - except StopIteration: - return - else: - for n in range(self._size): - yield self[n].array[0] - - else: - # ndim > 1 - for n in range(self._shape[0]): - out = self[n, ...] - out.squeeze(0, inplace=True) - yield out + for i in range(n): + yield self[i] def __len__(self): - """The built-in function `len` + """Called to implement the built-in function `len`. x.__len__() <==> len(x) - **Examples:** + **Performance** + + If the shape of the data is unknown then it is calculated + immediately by executing all delayed operations. - >>> len(Data([1, 2, 3])) + **Examples** + + >>> len(cf.Data([1, 2, 3])) 3 - >>> len(Data([[1, 2, 3]])) + >>> len(cf.Data([[1, 2, 3]])) 1 - >>> len(Data([[1, 2, 3], [4, 5, 6]])) + >>> len(cf.Data([[1, 2, 3], [4, 5, 6]])) 2 - >>> len(Data(1)) - TypeError: len() of scalar Data + >>> len(cf.Data(1)) + Traceback (most recent call last): + ... + TypeError: len() of unsized object """ - shape = self._shape - if shape: - return shape[0] + dx = self._get_dask() + if math.isnan(dx.size): + logger.warning("Computing data len: Performance may be degraded") + dx.compute_chunk_sizes() - raise TypeError("len() of scalar {}".format(self.__class__.__name__)) + return len(dx) def __bool__(self): """Truth value testing and the built-in operation `bool` diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 1ac278d174..393c91451e 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -2342,17 +2342,17 @@ def test_Data_ERROR(self): cf.Data.mask_fpe(oldm) cf.Data.seterr(**olds) - @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attribute '_shape'") def test_Data__len__(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return - self.assertEqual(len(cf.Data([1, 2, 3])), 3) - self.assertEqual(len(cf.Data([[1, 2, 3]])), 1) - self.assertEqual(len(cf.Data([[1, 2, 3], [4, 5, 6]])), 2) + self.assertEqual(3, len(cf.Data([1, 2, 3]))) + self.assertEqual(2, len(cf.Data([[1, 2, 3], [4, 5, 6]]))) + self.assertEqual(1, len(cf.Data([[1, 2, 3]]))) - with self.assertRaises(Exception): - _ = len(cf.Data(1)) + # len() of unsized object + with self.assertRaises(TypeError): + len(cf.Data(1)) def test_Data__float__(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: @@ -3768,6 +3768,18 @@ def test_Data_zeros(self): self.assertEqual(d.dtype, dtype_out) self.assertTrue((d.array == np.zeros(shape, dtype=dtype_in)).all()) + def test_Data__iter__(self): + for d in ( + cf.Data([1, 2, 3], "metres"), + cf.Data([[1, 2], [3, 4]], "metres"), + ): + for i, e in enumerate(d): + self.assertTrue(e.equals(d[i])) + + # iteration over a 0-d Data + with self.assertRaises(TypeError): + list(cf.Data(99, "metres")) + def test_Data__bool__(self): for x in (1, 1.5, True, "x"): self.assertTrue(bool(cf.Data(x)))