Skip to content

Commit ca715d0

Browse files
authored
Fix breaking change. Don't require pyarrow if schema is set, but warn. (#8202)
1 parent c53dfd0 commit ca715d0

File tree

2 files changed

+62
-1
lines changed

2 files changed

+62
-1
lines changed

bigquery/google/cloud/bigquery/client.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
import os
2929
import tempfile
3030
import uuid
31+
import warnings
3132

33+
try:
34+
import pyarrow
35+
except ImportError: # pragma: NO COVER
36+
pyarrow = None
3237
import six
3338

3439
from google import resumable_media
@@ -1304,9 +1309,17 @@ def load_table_from_dataframe(
13041309
os.close(tmpfd)
13051310

13061311
try:
1307-
if job_config.schema:
1312+
if pyarrow and job_config.schema:
13081313
_pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath)
13091314
else:
1315+
if job_config.schema:
1316+
warnings.warn(
1317+
"job_config.schema is set, but not used to assist in "
1318+
"identifying correct types for data serialization. "
1319+
"Please install the pyarrow package.",
1320+
PendingDeprecationWarning,
1321+
stacklevel=2,
1322+
)
13101323
dataframe.to_parquet(tmppath)
13111324

13121325
with open(tmppath, "rb") as parquet_file:

bigquery/tests/unit/test_client.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import io
2121
import json
2222
import unittest
23+
import warnings
2324

2425
import mock
2526
import requests
@@ -4991,6 +4992,53 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
49914992
assert sent_config is job_config
49924993
assert sent_config.source_format == job.SourceFormat.PARQUET
49934994

4995+
@unittest.skipIf(pandas is None, "Requires `pandas`")
4996+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
4997+
def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
4998+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
4999+
from google.cloud.bigquery import job
5000+
from google.cloud.bigquery.schema import SchemaField
5001+
5002+
client = self._make_client()
5003+
records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
5004+
dataframe = pandas.DataFrame(records)
5005+
schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER"))
5006+
job_config = job.LoadJobConfig(schema=schema)
5007+
5008+
load_patch = mock.patch(
5009+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
5010+
)
5011+
pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None)
5012+
5013+
with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings(
5014+
record=True
5015+
) as warned:
5016+
client.load_table_from_dataframe(
5017+
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
5018+
)
5019+
5020+
assert len(warned) == 1
5021+
warning = warned[0]
5022+
assert warning.category is PendingDeprecationWarning
5023+
assert "pyarrow" in str(warning)
5024+
5025+
load_table_from_file.assert_called_once_with(
5026+
client,
5027+
mock.ANY,
5028+
self.TABLE_REF,
5029+
num_retries=_DEFAULT_NUM_RETRIES,
5030+
rewind=True,
5031+
job_id=mock.ANY,
5032+
job_id_prefix=None,
5033+
location=self.LOCATION,
5034+
project=None,
5035+
job_config=mock.ANY,
5036+
)
5037+
5038+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
5039+
assert sent_config.source_format == job.SourceFormat.PARQUET
5040+
assert tuple(sent_config.schema) == schema
5041+
49945042
@unittest.skipIf(pandas is None, "Requires `pandas`")
49955043
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
49965044
def test_load_table_from_dataframe_w_nulls(self):

0 commit comments

Comments
 (0)