diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 35503a9274e..d8737b39610 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -99,7 +99,6 @@ parse_dims_as_set, ) from xarray.core.variable import ( - UNSUPPORTED_EXTENSION_ARRAY_TYPES, IndexVariable, Variable, as_variable, @@ -7268,7 +7267,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self: extension_arrays = [] for k, v in dataframe.items(): if not is_extension_array_dtype(v) or isinstance( - v.array, UNSUPPORTED_EXTENSION_ARRAY_TYPES + v.array, pd.arrays.NumpyExtensionArray ): arrays.append((k, np.asarray(v))) else: diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index 9052f5ae0a0..ddc37369b81 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -93,9 +93,6 @@ class PandasExtensionArray(Generic[T_ExtensionArray], NDArrayMixin): def __post_init__(self): if not isinstance(self.array, pd.api.extensions.ExtensionArray): raise TypeError(f"{self.array} is not an pandas ExtensionArray.") - # This does not use the UNSUPPORTED_EXTENSION_ARRAY_TYPES whitelist because - # we do support extension arrays from datetime, for example, that need - # duck array support internally via this class. if isinstance(self.array, pd.arrays.NumpyExtensionArray): raise TypeError( "`NumpyExtensionArray` should be converted to a numpy array in `xarray` internally." diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9c753a2ffa7..7694d59e714 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -63,11 +63,6 @@ ) # https://github.com/python/mypy/issues/224 BASIC_INDEXING_TYPES = integer_types + (slice,) -UNSUPPORTED_EXTENSION_ARRAY_TYPES = ( - pd.arrays.DatetimeArray, - pd.arrays.TimedeltaArray, - pd.arrays.NumpyExtensionArray, # type: ignore[attr-defined] -) if TYPE_CHECKING: from xarray.core.types import ( @@ -195,7 +190,7 @@ def _maybe_wrap_data(data): """ if isinstance(data, pd.Index): return PandasIndexingAdapter(data) - if isinstance(data, UNSUPPORTED_EXTENSION_ARRAY_TYPES): + if isinstance(data, pd.arrays.NumpyExtensionArray): return data.to_numpy() if isinstance(data, pd.api.extensions.ExtensionArray): return PandasExtensionArray(data) @@ -261,7 +256,7 @@ def convert_non_numpy_type(data): if ( isinstance(data, pd.Series) and pd.api.types.is_extension_array_dtype(data) - and not isinstance(data.array, UNSUPPORTED_EXTENSION_ARRAY_TYPES) + and not isinstance(data.array, pd.arrays.NumpyExtensionArray) ): pandas_data = data.array else: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 1e7c32dec1e..d9e094bd98e 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -15,6 +15,7 @@ from xarray import DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like +from xarray.core.extension_array import PandasExtensionArray from xarray.core.indexing import ( BasicIndexer, CopyOnWriteArray, @@ -2757,15 +2758,15 @@ def test_tz_datetime(self) -> None: warnings.simplefilter("ignore") actual: T_DuckArray = as_compatible_data(times_s) assert actual.array == times_s - assert actual.array.dtype == pd.DatetimeTZDtype("s", tz) # type: ignore[arg-type] + assert actual.array.dtype == times_s.dtype # type: ignore[arg-type] series = pd.Series(times_s) with warnings.catch_warnings(): warnings.simplefilter("ignore") actual2: T_DuckArray = as_compatible_data(series) - np.testing.assert_array_equal(actual2, np.asarray(series.values)) - assert actual2.dtype == np.dtype("datetime64[s]") + np.testing.assert_array_equal(actual2, np.asarray(series.array)) + assert actual2.dtype == times_s.dtype def test_full_like(self) -> None: # For more thorough tests, see test_variable.py @@ -3096,8 +3097,13 @@ def test_datetime_conversion(values, unit) -> None: else: # The only case where a non-datetime64 dtype can occur currently is in # the case that the variable is backed by a timezone-aware - # DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class. - assert isinstance(var._data, PandasIndexingAdapter) + # DatetimeIndex/DateTimeArray, and thus is hidden within the PandasIndexingAdapter/PandasExtensionArray class. + assert isinstance( + var._data, + PandasIndexingAdapter + if isinstance(values, pd.DatetimeIndex) + else PandasExtensionArray, + ) assert var._data.array.dtype == pd.DatetimeTZDtype( "ns", pytz.timezone("America/New_York") ) @@ -3132,19 +3138,9 @@ def test_pandas_two_only_datetime_conversion_warnings( ) -> None: # todo: check for redundancy (suggested per review) var = Variable(["time"], data.astype(dtype)) # type: ignore[arg-type] - - # we internally convert series to numpy representations to avoid too much nastiness with extension arrays - # when calling data.array e.g., with NumpyExtensionArrays - if isinstance(data, pd.Series): - assert var.dtype == np.dtype("datetime64[s]") - elif var.dtype.kind == "M": - assert var.dtype == dtype - else: - # The only case where a non-datetime64 dtype can occur currently is in - # the case that the variable is backed by a timezone-aware - # DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class. + assert var.dtype == dtype + if isinstance(data, pd.DatetimeIndex): assert isinstance(var._data, PandasIndexingAdapter) - assert var._data.array.dtype == pd.DatetimeTZDtype("s", tz_ny) @pytest.mark.parametrize(