Fix breaking change. Don't require pyarrow if schema is set, but warn. (#8202)

tswast · web-flow · commit ca715d09789d · 2019-05-31T10:18:35.000-07:00
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -28,7 +28,12 @@
 import os
 import tempfile
 import uuid
+import warnings
 
+try:
+    import pyarrow
+except ImportError:  # pragma: NO COVER
+    pyarrow = None
 import six
 
 from google import resumable_media
@@ -1304,9 +1309,17 @@ def load_table_from_dataframe(
         os.close(tmpfd)
 
         try:
-            if job_config.schema:
+            if pyarrow and job_config.schema:
                 _pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath)
             else:
+                if job_config.schema:
+                    warnings.warn(
+                        "job_config.schema is set, but not used to assist in "
+                        "identifying correct types for data serialization. "
+                        "Please install the pyarrow package.",
+                        PendingDeprecationWarning,
+                        stacklevel=2,
+                    )
                 dataframe.to_parquet(tmppath)
 
             with open(tmppath, "rb") as parquet_file:
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
@@ -20,6 +20,7 @@
 import io
 import json
 import unittest
+import warnings
 
 import mock
 import requests
@@ -4991,6 +4992,53 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
         assert sent_config is job_config
         assert sent_config.source_format == job.SourceFormat.PARQUET
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+    def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
+        from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+        from google.cloud.bigquery import job
+        from google.cloud.bigquery.schema import SchemaField
+
+        client = self._make_client()
+        records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}]
+        dataframe = pandas.DataFrame(records)
+        schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER"))
+        job_config = job.LoadJobConfig(schema=schema)
+
+        load_patch = mock.patch(
+            "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+        )
+        pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None)
+
+        with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings(
+            record=True
+        ) as warned:
+            client.load_table_from_dataframe(
+                dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+            )
+
+        assert len(warned) == 1
+        warning = warned[0]
+        assert warning.category is PendingDeprecationWarning
+        assert "pyarrow" in str(warning)
+
+        load_table_from_file.assert_called_once_with(
+            client,
+            mock.ANY,
+            self.TABLE_REF,
+            num_retries=_DEFAULT_NUM_RETRIES,
+            rewind=True,
+            job_id=mock.ANY,
+            job_id_prefix=None,
+            location=self.LOCATION,
+            project=None,
+            job_config=mock.ANY,
+        )
+
+        sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+        assert sent_config.source_format == job.SourceFormat.PARQUET
+        assert tuple(sent_config.schema) == schema
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
     def test_load_table_from_dataframe_w_nulls(self):