apache · jorisvandenbossche · Oct 12, 2020 · Oct 12, 2020
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -32,7 +32,7 @@
 @pytest.mark.pandas
 def test_deserialize_pandas_arrow_7956():
     df = pd.DataFrame({'a': np.arange(10000),
-                       'b': [pd.util.testing.rands(5) for _ in range(10000)]})
+                       'b': [test_util.rands(5) for _ in range(10000)]})
 
     def action():
         df_bytes = pa.ipc.serialize_pandas(df).to_pybytes()

diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
@@ -569,7 +569,7 @@ def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
         inner_seq([1., 2., 3.]),
         inner_seq([np.nan]),
         None
-    ])
+    ], dtype=object)
     arr = pa.array(data, from_pandas=from_pandas)
     assert len(arr) == 4
     assert arr.null_count == 1

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
@@ -169,9 +169,9 @@ def multisourcefs(request):
         with mockfs.open_output_stream(path) as out:
             pq.write_table(_table_from_pandas(chunk), out)
 
-    # create one with schema partitioning by week and color
+    # create one with schema partitioning by weekday and color
     mockfs.create_dir('schema')
-    for part, chunk in df_b.groupby([df_b.date.dt.week, df_b.color]):
+    for part, chunk in df_b.groupby([df_b.date.dt.dayofweek, df_b.color]):
         folder = 'schema/{}/{}'.format(*part)
         path = '{}/chunk.parquet'.format(folder)
         mockfs.create_dir(folder)

diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
@@ -573,7 +573,7 @@ def test_compress_decompress(compression):
     INPUT_SIZE = 10000
     test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
                  .astype(np.uint8)
-                 .tostring())
+                 .tobytes())
     test_buf = pa.py_buffer(test_data)
 
     compressed_buf = pa.compress(test_buf, codec=compression)

diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
@@ -765,6 +765,7 @@ def test_serialize_pandas_no_preserve_index():
     assert_frame_equal(result, df)
 
 
+@pytest.mark.filterwarnings("ignore:'pyarrow:DeprecationWarning")
 def test_serialize_with_pandas_objects():
     df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3])
     s = pd.Series([1, 2, 3, 4])

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
@@ -1793,7 +1793,7 @@ def test_column_of_arrays_to_py(self):
             np.arange(5, dtype=dtype),
             None,
             np.arange(1, dtype=dtype)
-        ])
+        ], dtype=object)
         type_ = pa.list_(pa.int8())
         parr = pa.array(arr, type=type_)
 
@@ -2090,7 +2090,7 @@ def test_nested_large_list(self):
                       type=pa.large_list(pa.large_list(pa.int64())))
              .to_pandas())
         tm.assert_series_equal(
-            s, pd.Series([[[1, 2, 3], [4]], None]),
+            s, pd.Series([[[1, 2, 3], [4]], None], dtype=object),
             check_names=False)
 
     def test_large_binary_list(self):
@@ -2717,7 +2717,11 @@ def test_safe_unsafe_casts(self):
 
     def test_error_sparse(self):
         # ARROW-2818
-        df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+        try:
+            df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
+        except AttributeError:
+            # pandas.arrays module introduced in pandas 0.24
+            df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
         with pytest.raises(TypeError, match="Sparse pandas data"):
             pa.Table.from_pandas(df)
 

diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
@@ -4195,6 +4195,7 @@ def test_filter_before_validate_schema(tempdir, use_legacy_dataset):
 @pytest.mark.pandas
 @pytest.mark.fastparquet
 @pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
+@pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet")
 def test_fastparquet_cross_compatibility(tempdir):
     fp = pytest.importorskip('fastparquet')
 

diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py
@@ -307,6 +307,8 @@ def test_put_and_get(self):
             [result] = self.plasma_client.get([object_id], timeout_ms=0)
             assert result == pa.plasma.ObjectNotAvailable
 
+    @pytest.mark.filterwarnings(
+        "ignore:'pyarrow.deserialize':DeprecationWarning")
     def test_put_and_get_raw_buffer(self):
         temp_id = random_object_id()
         use_meta = b"RAW"
@@ -338,6 +340,8 @@ def deserialize_or_output(data_tuple):
             result = deserialize_or_output(result)
             assert result == pa.plasma.ObjectNotAvailable
 
+    @pytest.mark.filterwarnings(
+        "ignore:'serialization_context':DeprecationWarning")
     def test_put_and_get_serialization_context(self):
 
         class CustomType:
@@ -349,7 +353,7 @@ def __init__(self, val):
         with pytest.raises(pa.ArrowSerializationError):
             self.plasma_client.put(val)
 
-        serialization_context = pa.SerializationContext()
+        serialization_context = pa.lib.SerializationContext()
         serialization_context.register_type(CustomType, 20*"\x00")
 
         object_id = self.plasma_client.put(

diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
@@ -20,7 +20,7 @@
 import pyarrow as pa
 import numpy as np
 import pandas as pd
-import pandas.util.testing as tm
+from pyarrow.tests.util import rands
 import memory_profiler
 import gc
 import io
@@ -85,7 +85,7 @@ def test_leak3():
                        for i in range(50)})
     table = pa.Table.from_pandas(df, preserve_index=False)
 
-    writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet',
+    writer = pq.ParquetWriter('leak_test_' + rands(5) + '.parquet',
                               table.schema)
 
     def func():