diff --git a/cf/data/__init__.py b/cf/data/__init__.py index 6a28aa2c46..4d80e10a5e 100644 --- a/cf/data/__init__.py +++ b/cf/data/__init__.py @@ -1,3 +1,5 @@ +from .abstract import FileArray + from .cachedarray import CachedArray from .netcdfarray import NetCDFArray from .umarray import UMArray diff --git a/cf/data/data.py b/cf/data/data.py index fb8ff5292a..83735069d7 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -47,6 +47,7 @@ from ..mixin_container import Container from ..units import Units from . import ( # GatheredSubarray,; RaggedContiguousSubarray,; RaggedIndexedContiguousSubarray,; RaggedIndexedSubarray, + FileArray, NetCDFArray, UMArray, ) @@ -9414,37 +9415,43 @@ def insert_dimension(self, position=0, inplace=False): return d + @daskified(_DASKIFIED_VERBOSE) def get_filenames(self): """Return the names of files containing parts of the data array. :Returns: `set` - The file names in normalized, absolute form. If the data - is are memory then an empty `set` is returned. + The file names in normalized, absolute form. If the + data is in memory then an empty `set` is returned. - **Examples:** + **Examples** + + >>> f = cf.NetCDFArray(TODODASK) + >>> d = cf.Data(f) + >>> d.get_filenames() + {TODODASK} - >>> f = cf.read('../file[123]')[0] - >>> f.get_filenames() - {'/data/user/file1', - '/data/user/file2', - '/data/user/file3'} - >>> a = f.array - >>> f.get_filenames() + >>> d = cf.Data([1, 2, 3]) + >>> d.get_filenames() set() """ - print("TODODASK - is this still possible?") - out = set( - [ - abspath(p.subarray.get_filename()) - for p in self.partitions.matrix.flat - if p.in_file - ] - ) - out.discard(None) + out = set() + dx = self._get_dask() + hlg = dx.dask + dsk = hlg.to_dict() + for key, value in hlg.get_all_dependencies().items(): + if value: + continue + + # This key has no dependencies, and so is raw data. + a = dsk[key] + if isinstance(a, FileArray): + out.add(abspath(a.get_filename())) + + out.discard(None) return out @daskified(_DASKIFIED_VERBOSE) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 84dfbe0460..3dce6264af 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3922,6 +3922,10 @@ def test_Data_set_units(self): with self.assertRaises(ValueError): d.set_units("km") + @unittest.skipIf(TEST_DASKIFIED_ONLY, "Needs updated NetCDFArray to test") + def test_Data_get_filenames(self): + pass + def test_Data_tolist(self): for x in (1, [1, 2], [[1, 2], [3, 4]]): d = cf.Data(x)