Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
import os
import tempfile
import uuid
import warnings

try:
import pyarrow
except ImportError: # pragma: NO COVER
pyarrow = None
import six

from google import resumable_media
Expand Down Expand Up @@ -1304,9 +1309,17 @@ def load_table_from_dataframe(
os.close(tmpfd)

try:
if job_config.schema:
if pyarrow and job_config.schema:
_pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath)
else:
if job_config.schema:
warnings.warn(
"job_config.schema is set, but not used to assist in "
"identifying correct types for data serialization. "
"Please install the pyarrow package.",
PendingDeprecationWarning,
stacklevel=2,
)
dataframe.to_parquet(tmppath)

with open(tmppath, "rb") as parquet_file:
Expand Down
48 changes: 48 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io
import json
import unittest
import warnings

import mock
import requests
Expand Down Expand Up @@ -5000,6 +5001,53 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
assert sent_config is job_config
assert sent_config.source_format == job.SourceFormat.PARQUET

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField

client = self._make_client()
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
dataframe = pandas.DataFrame(records)
schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER"))
job_config = job.LoadJobConfig(schema=schema)

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None)

with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings(
record=True
) as warned:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
)

assert len(warned) == 1
warning = warned[0]
assert warning.category is PendingDeprecationWarning
assert "pyarrow" in str(warning)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
project=None,
job_config=mock.ANY,
)

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.PARQUET
assert tuple(sent_config.schema) == schema

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_nulls(self):
Expand Down