Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/2011.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use {attr}`numpy.dtypes.StringDType` with `na_object` set to {attr}`pandas.NA` for nullable string data with {class}`anndata.experimental.backed.Dataset2D` {user}`ilan-gold`
4 changes: 2 additions & 2 deletions src/anndata/_io/specs/lazy_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from anndata._core.xarray import Dataset2D, requires_xarray
from anndata.abc import CSCDataset, CSRDataset
from anndata.compat import (
NULLABLE_NUMPY_STRING_TYPE,
DaskArray,
H5Array,
H5Group,
Expand Down Expand Up @@ -259,8 +260,7 @@ def _gen_xarray_dict_iterator_from_elems(
"base_path_or_zarr_group": v.base_path_or_zarr_group,
"elem_name": v.elem_name,
"is_nullable_string": isinstance(v, MaskedArray)
and v.dtype # CategoricalArray dtype access requires a read nad is not necessary here
== np.dtype("O"),
and v.dtype == NULLABLE_NUMPY_STRING_TYPE,
},
)
elif k == dim_name:
Expand Down
7 changes: 7 additions & 0 deletions src/anndata/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,3 +404,10 @@ def _map_cat_to_str(cat: pd.Categorical) -> pd.Categorical:
return cat.map(str, na_action="ignore")
else:
return cat.map(str)


NULLABLE_NUMPY_STRING_TYPE = (
np.dtype("O")
if Version(np.__version__) < Version("2")
else np.dtypes.StringDType(na_object=pd.NA)
)
15 changes: 11 additions & 4 deletions src/anndata/experimental/backed/_lazy_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from functools import cached_property
from typing import TYPE_CHECKING, Generic, TypeVar

import numpy as np
import pandas as pd

from anndata._core.index import _subset
Expand All @@ -12,13 +11,20 @@
from anndata.compat import H5Array, ZarrArray

from ..._settings import settings
from ...compat import XBackendArray, XDataArray, XZarrArrayWrapper
from ...compat import (
NULLABLE_NUMPY_STRING_TYPE,
XBackendArray,
XDataArray,
XZarrArrayWrapper,
)
from ...compat import xarray as xr

if TYPE_CHECKING:
from pathlib import Path
from typing import Literal

import numpy as np

from anndata._core.index import Index
from anndata.compat import ZarrGroup

Expand Down Expand Up @@ -146,7 +152,8 @@
extension_array = pd.arrays.BooleanArray(values, mask=mask)
elif self._dtype_str == "nullable-string-array":
# https://github.com/pydata/xarray/issues/10419
values[mask] = np.nan
values = values.astype(self.dtype)
values[mask] = pd.NA

Check warning on line 156 in src/anndata/experimental/backed/_lazy_arrays.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/experimental/backed/_lazy_arrays.py#L155-L156

Added lines #L155 - L156 were not covered by tests
return values
else:
msg = f"Invalid dtype_str {self._dtype_str}"
Expand All @@ -164,7 +171,7 @@
return pd.BooleanDtype()
elif self._dtype_str == "nullable-string-array":
# https://github.com/pydata/xarray/issues/10419
return np.dtype("O")
return NULLABLE_NUMPY_STRING_TYPE

Check warning on line 174 in src/anndata/experimental/backed/_lazy_arrays.py

View check run for this annotation

Codecov / codecov/patch

src/anndata/experimental/backed/_lazy_arrays.py#L174

Added line #L174 was not covered by tests
msg = f"Invalid dtype_str {self._dtype_str}"
raise RuntimeError(msg)

Expand Down