From 3ef925722cfabf1b560495c13a8986e96173c572 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 1 Jan 2020 14:35:33 -0700 Subject: [PATCH 01/16] apply_func: Set meta=np.ndarray when vectorize=True and dask="parallelized" Closes #3574 --- doc/whats-new.rst | 3 +++ xarray/core/computation.py | 14 +++++++++++++- xarray/tests/test_computation.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 00d1c50780e..58f842b10c8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,9 @@ New Features Bug fixes ~~~~~~~~~ +- Make applying a user-defined function that adds new dimensions using + :py:func:`apply_ufunc` with ``vectorize=True`` work with ``dask > 2.0``. + By `Deepak Cherian `_. - Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger `_. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 643c1137d6c..15a0a95da40 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -547,6 +547,7 @@ def apply_variable_ufunc( output_dtypes=None, output_sizes=None, keep_attrs=False, + vectorize=False, ): """Apply a ndarray level function over Variable and/or ndarray objects. """ @@ -579,6 +580,7 @@ def apply_variable_ufunc( elif dask == "parallelized": input_dims = [broadcast_dims + dims for dims in signature.input_core_dims] numpy_func = func + meta = np.ndarray if vectorize else None def func(*arrays): return _apply_blockwise( @@ -589,6 +591,7 @@ def func(*arrays): signature, output_dtypes, output_sizes, + meta, ) elif dask == "allowed": @@ -647,7 +650,14 @@ def func(*arrays): def _apply_blockwise( - func, args, input_dims, output_dims, signature, output_dtypes, output_sizes=None + func, + args, + input_dims, + output_dims, + signature, + output_dtypes, + output_sizes=None, + meta=None, ): import dask.array @@ -719,6 +729,7 @@ def _apply_blockwise( dtype=dtype, concatenate=True, new_axes=output_sizes, + meta=meta, ) @@ -1005,6 +1016,7 @@ def earth_mover_distance(first_samples, dask=dask, output_dtypes=output_dtypes, output_sizes=output_sizes, + vectorize=vectorize, ) if any(isinstance(a, GroupBy) for a in args): diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 1f2634cc9b0..5a386de64d2 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -817,6 +817,24 @@ def test_vectorize_dask(): assert_identical(expected, actual) +@requires_dask +def test_vectorize_dask_new_output_dims(): + # regression test for GH3574 + data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y")) + func = lambda x: x[np.newaxis, ...] + expected = data_array.expand_dims("z") + actual = apply_ufunc( + func, + data_array.chunk({"x": 1}), + output_core_dims=[["z"]], + vectorize=True, + dask="parallelized", + output_dtypes=[float], + output_sizes={"z": 1}, + ).transpose(*expected.dims) + assert_identical(expected, actual) + + def test_output_wrong_number(): variable = xr.Variable("x", np.arange(10)) From 3c84f22d0e4fe972277457be3a8e8c6d34fd398d Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 9 Jan 2020 08:03:29 -0700 Subject: [PATCH 02/16] Add meta kwarg to apply_ufunc. --- xarray/core/computation.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 15a0a95da40..81971c9d754 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -547,7 +547,7 @@ def apply_variable_ufunc( output_dtypes=None, output_sizes=None, keep_attrs=False, - vectorize=False, + meta=None, ): """Apply a ndarray level function over Variable and/or ndarray objects. """ @@ -580,7 +580,6 @@ def apply_variable_ufunc( elif dask == "parallelized": input_dims = [broadcast_dims + dims for dims in signature.input_core_dims] numpy_func = func - meta = np.ndarray if vectorize else None def func(*arrays): return _apply_blockwise( @@ -771,6 +770,7 @@ def apply_ufunc( dask: str = "forbidden", output_dtypes: Sequence = None, output_sizes: Mapping[Any, int] = None, + meta: Any = None, ) -> Any: """Apply a vectorized function for unlabeled arrays on xarray objects. @@ -867,6 +867,9 @@ def apply_ufunc( Optional mapping from dimension names to sizes for outputs. Only used if dask='parallelized' and new dimensions (not found on inputs) appear on outputs. + meta : optional + Size-0 object representing the type of array wrapped by dask array. Passed on to + ``dask.array.map_blocks``. Returns ------- @@ -1000,6 +1003,11 @@ def earth_mover_distance(first_samples, func = functools.partial(func, **kwargs) if vectorize: + if meta is None: + # set meta=np.ndarray by default for numpy vectorized functions + # work around dask bug computing meta with vectorized functions: GH5642 + meta = np.ndarray + if signature.all_core_dims: func = np.vectorize( func, otypes=output_dtypes, signature=signature.to_gufunc_string() @@ -1016,7 +1024,7 @@ def earth_mover_distance(first_samples, dask=dask, output_dtypes=output_dtypes, output_sizes=output_sizes, - vectorize=vectorize, + meta=meta, ) if any(isinstance(a, GroupBy) for a in args): @@ -1031,6 +1039,7 @@ def earth_mover_distance(first_samples, dataset_fill_value=dataset_fill_value, keep_attrs=keep_attrs, dask=dask, + meta=meta, ) return apply_groupby_func(this_apply, *args) elif any(is_dict_like(a) for a in args): From 3f107f08a578453a2e0243fc1ac2156415f6a2f3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 9 Jan 2020 08:33:56 -0700 Subject: [PATCH 03/16] Bump minimum dask to 2.1.0 --- ci/requirements/py36-min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index 3f10a158f91..8e2e206ee95 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -15,7 +15,7 @@ dependencies: - cfgrib=0.9 - cftime=1.0 - coveralls - - dask=1.2 + - dask=2.1.0 - distributed=1.27 - flake8 - h5netcdf=0.7 From b2c1b49b4f97dfa0ce19e6881ec4f612ffe89ec5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 9 Jan 2020 09:04:15 -0700 Subject: [PATCH 04/16] Update distributed too --- ci/requirements/py36-min-all-deps.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index 8e2e206ee95..cbbc7b4bad6 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -15,8 +15,8 @@ dependencies: - cfgrib=0.9 - cftime=1.0 - coveralls - - dask=2.1.0 - - distributed=1.27 + - dask=2.1 + - distributed=2.1 - flake8 - h5netcdf=0.7 - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest From db0eafc7ca1d946f9a01981da2498678ed11411e Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 Jan 2020 07:51:41 -0700 Subject: [PATCH 05/16] bump minimum dask, distributed to 2.2 --- ci/requirements/py36-min-all-deps.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index cbbc7b4bad6..3719ee946b0 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -15,8 +15,8 @@ dependencies: - cfgrib=0.9 - cftime=1.0 - coveralls - - dask=2.1 - - distributed=2.1 + - dask=2.2 + - distributed=2.2 - flake8 - h5netcdf=0.7 - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest From c1253002ed3ca7bd8ab72dbecbbb3d85475b9d8f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 Jan 2020 08:00:49 -0700 Subject: [PATCH 06/16] Update whats-new --- doc/whats-new.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 58f842b10c8..64bc839d50c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,7 @@ v0.15.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ - +- Bumped minimum ``dask`` version to 2.2. New Features ~~~~~~~~~~~~ @@ -37,6 +37,8 @@ New Features - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Add ``meta`` kwarg to :py:func:`~xarray.apply_ufunc`; this is passed on to + :py:meth:`dask.array.map_blocks`. (:pr:`3660`) By `Deepak Cherian `_. - Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties and support `.dt` accessor for timedelta via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) @@ -44,8 +46,8 @@ New Features Bug fixes ~~~~~~~~~ -- Make applying a user-defined function that adds new dimensions using - :py:func:`apply_ufunc` with ``vectorize=True`` work with ``dask > 2.0``. +- Applying a user-defined function that adds new dimensions using :py:func:`apply_ufunc` + and ``vectorize=True`` now works with ``dask > 2.0``. (:issue:`3574`, :pr:`3660`). By `Deepak Cherian `_. - Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger From 680036163632cf07d4f31762c3099748729d2026 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 Jan 2020 08:27:20 -0700 Subject: [PATCH 07/16] minor. --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ea9781e29a8..31efc164cd2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,7 +55,6 @@ Bug fixes - Applying a user-defined function that adds new dimensions using :py:func:`apply_ufunc` and ``vectorize=True`` now works with ``dask > 2.0``. (:issue:`3574`, :pr:`3660`). By `Deepak Cherian `_. - - Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger `_. From 8f8c0c04467f7807e89c19b07b293d376454cd65 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 15 Jan 2020 08:57:49 -0700 Subject: [PATCH 08/16] fix whats-new --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 31efc164cd2..2368f4bd47a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,7 +38,7 @@ New Features and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ - Add ``meta`` kwarg to :py:func:`~xarray.apply_ufunc`; this is passed on to - :py:meth:`dask.array.map_blocks`. (:pr:`3660`) By `Deepak Cherian `_. + :py:meth:`dask.array.map_blocks`. (:pull:`3660`) By `Deepak Cherian `_. - Add `attrs_file` option in :py:func:`~xarray.open_mfdataset` to choose the source file for global attributes in a multi-file dataset (:issue:`2382`, :pull:`3498`) by `Julien Seguinot _`. @@ -53,7 +53,7 @@ New Features Bug fixes ~~~~~~~~~ - Applying a user-defined function that adds new dimensions using :py:func:`apply_ufunc` - and ``vectorize=True`` now works with ``dask > 2.0``. (:issue:`3574`, :pr:`3660`). + and ``vectorize=True`` now works with ``dask > 2.0``. (:issue:`3574`, :pull:`3660`). By `Deepak Cherian `_. - Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger From 2b22470b799c2dba14ac834a623d9d011104d3ce Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 16 Jan 2020 08:37:18 -0700 Subject: [PATCH 09/16] Attempt numpy=1.15 --- ci/requirements/py36-min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index 3719ee946b0..cbef9e40095 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -29,7 +29,7 @@ dependencies: - nc-time-axis=1.2 - netcdf4=1.4 - numba=0.44 - - numpy=1.14 + - numpy=1.15 - pandas=0.24 # - pint # See py36-min-nep18.yml - pip From 64911a08d6392aac9bc65bfaa9b562b67c7c73f2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Jan 2020 07:44:47 -0700 Subject: [PATCH 10/16] Revert "Attempt numpy=1.15" This reverts commit 2b22470b799c2dba14ac834a623d9d011104d3ce. --- ci/requirements/py36-min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index cbef9e40095..3719ee946b0 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -29,7 +29,7 @@ dependencies: - nc-time-axis=1.2 - netcdf4=1.4 - numba=0.44 - - numpy=1.15 + - numpy=1.14 - pandas=0.24 # - pint # See py36-min-nep18.yml - pip From be48ae7d169d9908d771189eb9eb2fcc2cd0d942 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Jan 2020 15:15:35 -0700 Subject: [PATCH 11/16] xfail test. --- xarray/tests/test_backends.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4fccdf2dd6c..2d8fa6428da 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1723,6 +1723,7 @@ def test_hidden_zarr_keys(self): with xr.decode_cf(store): pass + @pytest.mark.xfail(reason="fails for old dask versions.") def test_write_persistence_modes(self): original = create_test_data() From 9e8316bca1575962e3accc208171313715d6a0ad Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Jan 2020 15:18:43 -0700 Subject: [PATCH 12/16] More xfailed tests. --- xarray/tests/test_backends.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 2d8fa6428da..67d060ead41 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1788,6 +1788,7 @@ def test_encoding_kwarg_fixed_width_string(self): def test_dataset_caching(self): super().test_dataset_caching() + @pytest.mark.xfail(reason="fails for old dask versions.") def test_append_write(self): ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: @@ -1864,6 +1865,7 @@ def test_check_encoding_is_consistent_after_append(self): xr.concat([ds, ds_to_append], dim="time"), ) + @pytest.mark.xfail(reason="fails for old dask versions.") def test_append_with_new_variable(self): ds, ds_to_append, ds_with_new_var = create_append_test_data() From 41702f8cd69824be33ae19e90ae93491bdf66a15 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Jan 2020 15:27:00 -0700 Subject: [PATCH 13/16] Update xfail reason. --- xarray/tests/test_backends.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3985846acde..bf9a13f34c9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1723,7 +1723,7 @@ def test_hidden_zarr_keys(self): with xr.decode_cf(store): pass - @pytest.mark.xfail(reason="fails for old dask versions.") + @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") def test_write_persistence_modes(self): original = create_test_data() @@ -1788,7 +1788,7 @@ def test_encoding_kwarg_fixed_width_string(self): def test_dataset_caching(self): super().test_dataset_caching() - @pytest.mark.xfail(reason="fails for old dask versions.") + @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") def test_append_write(self): ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: @@ -1865,7 +1865,7 @@ def test_check_encoding_is_consistent_after_append(self): xr.concat([ds, ds_to_append], dim="time"), ) - @pytest.mark.xfail(reason="fails for old dask versions.") + @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") def test_append_with_new_variable(self): ds, ds_to_append, ds_with_new_var = create_append_test_data() From 4b92ba7af38b81303a6e25d2f58d35a9b73d900f Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 20 Jan 2020 15:28:39 -0700 Subject: [PATCH 14/16] fix whats-new --- doc/whats-new.rst | 2 +- xarray/core/computation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 486f16b463c..fba0a90e807 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,7 +52,7 @@ New Features and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ - Add ``meta`` kwarg to :py:func:`~xarray.apply_ufunc`; this is passed on to - :py:meth:`dask.array.map_blocks`. (:pull:`3660`) By `Deepak Cherian `_. + :py:meth:`dask.array.blockwise`. (:pull:`3660`) By `Deepak Cherian `_. - Add `attrs_file` option in :py:func:`~xarray.open_mfdataset` to choose the source file for global attributes in a multi-file dataset (:issue:`2382`, :pull:`3498`) by `Julien Seguinot _`. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 45032a37cc7..d2c5c32bc00 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -870,7 +870,7 @@ def apply_ufunc( on outputs. meta : optional Size-0 object representing the type of array wrapped by dask array. Passed on to - ``dask.array.map_blocks``. + ``dask.array.blockwise``. Returns ------- From 54be7dfc9013426c15454d78be5975d8c7574e71 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 21 Jan 2020 07:54:32 -0700 Subject: [PATCH 15/16] Add test to ensure meta is passed on to dask. --- xarray/tests/test_sparse.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index a02fef2faeb..21a212c29b3 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -873,3 +873,16 @@ def test_dask_token(): t5 = dask.base.tokenize(ac + 1) assert t4 != t5 assert isinstance(ac.data._meta, sparse.COO) + + +@requires_dask +def test_apply_ufunc_meta_to_blockwise(): + da = xr.DataArray(np.zeros((2, 3)), dims=["x", "y"]).chunk({"x": 2, "y": 1}) + sparse_meta = sparse.COO.from_numpy(np.zeros((0, 0))) + + # if dask computed meta, it would be np.ndarray + expected = xr.apply_ufunc( + lambda x: x, da, dask="parallelized", output_dtypes=[da.dtype], meta=sparse_meta + ).data._meta + + assert_sparse_equal(expected, sparse_meta) From 58e5608334dd48ba6274b96892636ae795a2a9e9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 21 Jan 2020 10:31:36 -0700 Subject: [PATCH 16/16] Use skipif instead of xfail. --- xarray/tests/test_backends.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bf9a13f34c9..bb77cbb94fe 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -37,7 +37,7 @@ from xarray.core import indexing from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type -from xarray.tests import mock +from xarray.tests import LooseVersion, mock from . import ( arm_xfail, @@ -76,9 +76,14 @@ pass try: + import dask import dask.array as da + + dask_version = dask.__version__ except ImportError: - pass + # needed for xfailed tests when dask < 2.4.0 + # remove when min dask > 2.4.0 + dask_version = "10.0" ON_WINDOWS = sys.platform == "win32" @@ -1723,7 +1728,7 @@ def test_hidden_zarr_keys(self): with xr.decode_cf(store): pass - @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") + @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_write_persistence_modes(self): original = create_test_data() @@ -1788,7 +1793,7 @@ def test_encoding_kwarg_fixed_width_string(self): def test_dataset_caching(self): super().test_dataset_caching() - @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") + @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_append_write(self): ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: @@ -1865,7 +1870,7 @@ def test_check_encoding_is_consistent_after_append(self): xr.concat([ds, ds_to_append], dim="time"), ) - @pytest.mark.xfail(reason="fails for dask <2.4.0. dask GH5334") + @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_append_with_new_variable(self): ds, ds_to_append, ds_with_new_var = create_append_test_data()