From ba49697ec538674a8501d324b2e4d296e3b27dd8 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 16:59:58 +0200 Subject: [PATCH 01/24] process: make BQ Storage and pyarrow required --- setup.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 71958ccf9..b23b90d2e 100644 --- a/setup.py +++ b/setup.py @@ -32,24 +32,18 @@ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", "proto-plus >= 1.10.0", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", "google-cloud-core >= 1.4.1, < 3.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "pyarrow >= 1.0.0, < 5.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", - ], + # Keep the no-op bqstorage extra for backward compatibility. + # See: https://github.com/googleapis/python-bigquery/issues/757 + "bqstorage": [], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], From 2c791522fcbfecbbf79c939097786c3fc565bcc5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 17:27:04 +0200 Subject: [PATCH 02/24] Make pyarrow required in _pandas_helpers.py --- google/cloud/bigquery/_pandas_helpers.py | 123 ++++++++++------------- tests/unit/test__pandas_helpers.py | 45 +-------- tests/unit/test_client.py | 56 ----------- 3 files changed, 55 insertions(+), 169 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 2ff96da4d..d3b28c100 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -27,11 +27,8 @@ except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.parquet -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.parquet try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -104,63 +101,57 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False +# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py +# When modifying it be sure to update it there as well. +BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, +} +ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", +} -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER +if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True +else: + _BIGNUMERIC_SUPPORT = False def bq_to_arrow_struct_data_type(field): @@ -344,13 +335,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: - if not pyarrow: - msg = u"Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -492,9 +476,6 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") - bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..7fdd80cb7 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -29,13 +29,9 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() + +import pyarrow +import pyarrow.types import pytest import pytz @@ -1041,14 +1037,6 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): assert list(arrow_schema) == expected_fields -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc_context: - module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc_context.value) - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @@ -1093,33 +1081,6 @@ def test_dataframe_to_parquet_compression_method(module_under_test): assert call_args.kwargs.get("compression") == "ZSTD" -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): - dataframe = pandas.DataFrame( - data=[ - {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, - ] - ) - - no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) - - with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: - detected_schema = module_under_test.dataframe_to_bq_schema( - dataframe, bq_schema=[] - ) - - assert detected_schema is None - - # a warning should also be issued - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - msg = str(expected_warnings[0]) - assert "execution_date" in msg and "created_at" in msg - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6b62eb85b..ca2b69940 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7392,62 +7392,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_partial_schema_missing_types(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - df_data = collections.OrderedDict( - [ - ("string_col", ["abc", "def", "ghi"]), - ("unknown_col", [b"jkl", None, b"mno"]), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - - schema = (SchemaField("string_col", "STRING"),) - job_config = job.LoadJobConfig(schema=schema) - with pyarrow_patch, load_patch as load_table_from_file, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - size=mock.ANY, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - timeout=None, - ) - - assert warned # there should be at least one warning - unknown_col_warnings = [ - warning for warning in warned if "unknown_col" in str(warning) - ] - assert unknown_col_warnings - assert unknown_col_warnings[0].category == UserWarning - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema is None - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): From 137ae92ca63fbd80243f70a438e1a35f1ab1d824 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 15 Jul 2021 16:17:45 +0200 Subject: [PATCH 03/24] Make pyarrow required in client.py --- google/cloud/bigquery/client.py | 14 +--------- tests/system/test_client.py | 9 ++----- tests/unit/test_client.py | 48 --------------------------------- 3 files changed, 3 insertions(+), 68 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8572ba911..ea127fd94 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -32,11 +32,6 @@ import uuid import warnings -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload @@ -2490,7 +2485,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. - Indexes are not loaded. Requires the :mod:`pyarrow` library. + Indexes are not loaded. By default, this method uses the parquet source format. To override this, supply a value for @@ -2520,9 +2515,6 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ValueError: - If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2550,10 +2542,6 @@ def load_table_from_dataframe( ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - if location is None: location = self.location diff --git a/tests/system/test_client.py b/tests/system/test_client.py index cbca73619..9966d7135 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -43,11 +43,8 @@ except ImportError: # pragma: NO COVER fastavro = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.types from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest @@ -1639,7 +1636,6 @@ def test_dbapi_create_view(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -2288,7 +2284,6 @@ def test_create_table_rows_fetch_nested_schema(self): def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca2b69940..e53c0d9fc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -46,10 +46,6 @@ ) except (ImportError, AttributeError): # pragma: NO COVER opentelemetry = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None import google.api_core.exceptions from google.api_core import client_info @@ -6678,7 +6674,6 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6765,7 +6760,6 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6810,7 +6804,6 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6865,7 +6858,6 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6921,7 +6913,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -6941,7 +6932,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7018,7 +7008,6 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7080,7 +7069,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7119,7 +7107,6 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7167,7 +7154,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7212,7 +7198,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7272,7 +7257,6 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7356,7 +7340,6 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7393,7 +7376,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7426,36 +7408,6 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - to_parquet_patch = mock.patch.object( - dataframe, "to_parquet", wraps=dataframe.to_parquet - ) - - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: - with pytest.raises(ValueError): - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. From 42c960630ef56d6d4ea85fc180ca918fa0e81a9e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 11:31:54 +0200 Subject: [PATCH 04/24] Make pyarrow required in table.py --- google/cloud/bigquery/table.py | 27 ++++------------------- tests/unit/test_table.py | 39 +++------------------------------- 2 files changed, 7 insertions(+), 59 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2d9c15f50..7db6e4252 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -30,10 +30,7 @@ except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -53,7 +50,6 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import pyarrow from google.cloud import bigquery_storage @@ -61,10 +57,6 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) -_NO_PYARROW_ERROR = ( - "The pyarrow library is not installed, please install " - "pyarrow to use the to_arrow() function." -) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1670,8 +1662,7 @@ def to_arrow( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1692,14 +1683,8 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1754,8 +1739,7 @@ def to_dataframe_iterable( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1831,8 +1815,7 @@ def to_dataframe( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1974,8 +1957,6 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..f17787ae5 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -20,6 +20,8 @@ import mock import pkg_resources +import pyarrow +import pyarrow.types import pytest import pytz @@ -40,15 +42,6 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") - try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -57,6 +50,7 @@ from google.cloud.bigquery.dataset import DatasetReference +PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") @@ -1619,13 +1613,6 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_error_if_pyarrow_is_none(self): - row_iterator = self._make_one() - with self.assertRaises(ValueError): - row_iterator.to_arrow() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -1904,7 +1891,6 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -1986,7 +1972,6 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2019,7 +2004,6 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2062,7 +2046,6 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2101,7 +2084,6 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2141,7 +2123,6 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2222,7 +2203,6 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2253,7 +2233,6 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2280,7 +2259,6 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2320,7 +2298,6 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.tqdm_notebook") @@ -2459,7 +2436,6 @@ def test_to_dataframe_iterable_with_dtypes(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2573,7 +2549,6 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -2604,7 +2579,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3026,7 +3000,6 @@ def test_to_dataframe_w_bqstorage_no_streams(self): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3051,7 +3024,6 @@ def test_to_dataframe_w_bqstorage_logs_session(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3106,7 +3078,6 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3186,7 +3157,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3241,7 +3211,6 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema @@ -3319,7 +3288,6 @@ def blocking_to_arrow(*args, **kwargs): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3498,7 +3466,6 @@ def test_to_dataframe_w_bqstorage_snapshot(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut From 5ae720d6e1b4dd2c9465ae1491b15b9c91a4ae7c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 12:03:02 +0200 Subject: [PATCH 05/24] Make pyarrow required in job/query.py --- google/cloud/bigquery/job/query.py | 7 +------ tests/unit/job/test_query_pandas.py | 12 ++---------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d588e9b5a..23eecfdf7 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1373,8 +1373,7 @@ def to_arrow( BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. Reading from a specific partition or snapshot is not currently supported by this method. @@ -1399,10 +1398,6 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..c68bbb8dc 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,16 +17,14 @@ import json import mock +import pyarrow import pytest try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None + try: from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER @@ -151,7 +149,6 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -238,7 +235,6 @@ def test_to_arrow(): ] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -273,7 +269,6 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): from google.cloud.bigquery import table @@ -330,7 +325,6 @@ def test_to_arrow_w_tqdm_w_query_plan(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_pending_status(): from google.cloud.bigquery import table @@ -383,7 +377,6 @@ def test_to_arrow_w_tqdm_w_pending_status(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_wo_query_plan(): from google.cloud.bigquery import table @@ -643,7 +636,6 @@ def test_to_dataframe_column_dtypes(): assert df.date.dtype.name == "object" -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class From ede031322ce0dffe4a12bd045d25eefacf0f2c48 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 12:17:32 +0200 Subject: [PATCH 06/24] Make pyarrow required in DBAPI tests --- tests/unit/test_dbapi__helpers.py | 6 ------ tests/unit/test_dbapi_cursor.py | 8 -------- 2 files changed, 14 deletions(-) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index b33203354..4afc47b6c 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,11 +21,6 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers @@ -215,7 +210,6 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 026810aaf..5f4ee3c6c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -18,12 +18,6 @@ import pytest - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions try: @@ -282,7 +276,6 @@ def test_fetchall_w_row(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -393,7 +386,6 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table From ce7f93dd236ef98e247f1ac9851571b12c0539ad Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 12:22:16 +0200 Subject: [PATCH 07/24] Make pyarrow required in snippets tests --- docs/snippets.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..82e07901e 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -30,10 +30,6 @@ import pandas except (ImportError, AttributeError): pandas = None -try: - import pyarrow -except (ImportError, AttributeError): - pyarrow = None from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable From 3f7c4567f4ba7f2d6f0c968a90dace323551a43f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 12:48:19 +0200 Subject: [PATCH 08/24] Make BQ storage required in client.py --- google/cloud/bigquery/client.py | 20 ++++------------ tests/system/test_client.py | 15 +----------- tests/unit/test_client.py | 42 +-------------------------------- 3 files changed, 7 insertions(+), 70 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index ea127fd94..89e4020ba 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -45,12 +45,9 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error -try: - from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, - ) -except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None +from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, +) from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -472,7 +469,7 @@ def _ensure_bqstorage_client( ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to a missing or outdated dependency + If a client cannot be created due to an outdated dependency `google-cloud-bigquery-storage`, raise a warning and return ``None``. If the `bqstorage_client` argument is not ``None``, still perform the version @@ -493,14 +490,7 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - try: - from google.cloud import bigquery_storage - except ImportError: - warnings.warn( - "Cannot create BigQuery Storage client, the dependency " - "google-cloud-bigquery-storage is not installed." - ) - return None + from google.cloud import bigquery_storage try: BQ_STORAGE_VERSIONS.verify_version() diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9966d7135..1e09cd195 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -33,11 +33,6 @@ from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - try: import fastavro # to parse BQ storage client results except ImportError: # pragma: NO COVER @@ -64,6 +59,7 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums +from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient @@ -1633,9 +1629,6 @@ def test_dbapi_create_view(self): Config.CURSOR.execute(query) self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1694,9 +1687,6 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2284,9 +2274,6 @@ def test_create_table_rows_fetch_nested_schema(self): def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e53c0d9fc..2f311b8f5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -50,14 +50,10 @@ import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers +from google.cloud import bigquery_storage from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -600,9 +596,6 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -625,33 +618,6 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) - def test_ensure_bqstorage_client_missing_dependency(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning - for warning in warned - if "not installed" in str(warning) - and "google-cloud-bigquery-storage" in str(warning) - ] - assert matching_warnings, "Missing dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_obsolete_dependency(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -671,9 +637,6 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): ] assert matching_warnings, "Obsolete dependency warning not raised." - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -685,9 +648,6 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_fails(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError From ed412c001616a81f526a6f1c9435fa048a857db9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 13:43:20 +0200 Subject: [PATCH 09/24] Make BQ storage required in table.py --- google/cloud/bigquery/table.py | 19 +++----- tests/unit/test_table.py | 88 ++-------------------------------- 2 files changed, 10 insertions(+), 97 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 7db6e4252..c88488b7b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -36,6 +36,7 @@ from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers +from google.cloud import bigquery_storage from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -50,7 +51,6 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - from google.cloud import bigquery_storage _NO_PANDAS_ERROR = ( @@ -1551,11 +1551,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: - return False - try: _helpers.BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: @@ -1633,7 +1628,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a @@ -1728,7 +1723,7 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, dtypes: Dict[str, Any] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, ) -> "pandas.DataFrame": @@ -1802,7 +1797,7 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -1869,9 +1864,7 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported. """ if pandas is None: @@ -1985,7 +1978,7 @@ def to_dataframe( def to_dataframe_iterable( self, - bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f17787ae5..28c5a7ff7 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -26,16 +26,11 @@ import pytz import google.api_core.exceptions -from test_utils.imports import maybe_fail_import -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None +from google.cloud import bigquery_storage +from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, +) try: import pandas @@ -1849,27 +1844,6 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): - iterator = self._make_one(first_page_response=None) # not cached - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -2084,9 +2058,6 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2123,9 +2094,6 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2203,9 +2171,6 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2259,9 +2224,6 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2433,9 +2395,6 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2937,9 +2896,6 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2968,9 +2924,6 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2996,9 +2949,6 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3021,9 +2971,6 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3075,9 +3022,6 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3154,9 +3098,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3207,9 +3148,6 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): @@ -3285,9 +3223,6 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3403,9 +3338,6 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -3422,9 +3354,6 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3442,9 +3371,6 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3463,9 +3389,6 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3950,9 +3873,6 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", ( From ef905c2d49de0ec3feef6d8ce77059134e710180 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 15:05:31 +0200 Subject: [PATCH 10/24] Make BQ storage required in DB API tests --- tests/unit/test_dbapi_connection.py | 22 +--------------------- tests/unit/test_dbapi_cursor.py | 18 +----------------- 2 files changed, 2 insertions(+), 38 deletions(-) diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 0576cad38..6b3a99439 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -17,10 +17,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage class TestConnection(unittest.TestCase): @@ -40,8 +37,6 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - # Assumption: bigquery_storage exists. It's the test's responisbility to - # not use this helper or skip itself if bqstroage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -58,9 +53,6 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -89,9 +81,6 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -107,9 +96,6 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -142,9 +128,6 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -165,9 +148,6 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 5f4ee3c6c..f075bb6f7 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -19,11 +19,7 @@ import pytest from google.api_core import exceptions - -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage from tests.unit.helpers import _to_pyarrow @@ -273,9 +269,6 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -327,9 +320,6 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -351,9 +341,6 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -383,9 +370,6 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table From 9c106f329cbc8a7d61cb8100b9d0f6349ca5a0f6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 14:13:35 +0200 Subject: [PATCH 11/24] Make BQ storage required in magics.py --- google/cloud/bigquery/magics/magics.py | 11 ------ tests/unit/test_magics.py | 50 -------------------------- 2 files changed, 61 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..dce911232 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -766,17 +766,6 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError as err: - customized_error = ImportError( - "The default BigQuery Storage API client cannot be used, install " - "the missing google-cloud-bigquery-storage and pyarrow packages " - "to use it. Alternatively, use the classic REST API by specifying " - "the --use_rest_api magic option." - ) - raise customized_error from err - try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..e134d73c3 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -76,19 +76,6 @@ def ipython_ns_cleanup(): del ip.user_ns[name] -@pytest.fixture(scope="session") -def missing_bq_storage(): - """Provide a patcher that can make the bigquery storage import to fail.""" - - def fail_if(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - return maybe_fail_import(predicate=fail_if) - - @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -324,9 +311,6 @@ def test__make_bqstorage_client_false(): assert got is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -338,25 +322,6 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) -def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(test_client, True, {}) - - error_msg = str(exc_context.value) - assert "google-cloud-bigquery-storage" in error_msg - assert "pyarrow" in error_msg - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__make_bqstorage_client_true_obsolete_dependency(): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -382,9 +347,6 @@ def test__make_bqstorage_client_true_obsolete_dependency(): assert matching_warnings, "Obsolete dependency warning not raised." -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -440,9 +402,6 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -603,9 +562,6 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -667,9 +623,6 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_rest_client_requested(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -887,9 +840,6 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() From 54382397ce3fd282bd527746216a79ca51f365b5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 14:16:14 +0200 Subject: [PATCH 12/24] Make BQ storage required in test__helpers.py --- tests/unit/test__helpers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..0a36bb700 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,13 +19,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def _object_under_test(self): from google.cloud.bigquery import _helpers From cfcc2d3e41b4cd35430d429e8a1d2e814608c6e3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 14:22:16 +0200 Subject: [PATCH 13/24] Make BQ storage required in test__pandas_helpers.py --- tests/unit/test__pandas_helpers.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 7fdd80cb7..b0495c3cd 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,12 +40,9 @@ from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -try: - from google.cloud import bigquery_storage +from google.cloud import bigquery_storage - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None +_helpers.BQ_STORAGE_VERSIONS.verify_version() PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -1275,9 +1272,6 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1308,8 +1302,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1349,9 +1342,6 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage( module_under_test, stream_count, From 86c4533e7a3d57406888ddaa798c6ed517348485 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 14:34:07 +0200 Subject: [PATCH 14/24] Make BQ storage required in test_query_pandas.py --- tests/unit/job/test_query_pandas.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c68bbb8dc..e5105974f 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -25,10 +25,8 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -81,9 +79,6 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "query", ( @@ -478,9 +473,6 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -531,9 +523,6 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class From 560ccd691ce22c06a4a0d96406c6c2f800d3717e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Jul 2021 16:12:01 +0200 Subject: [PATCH 15/24] Make method signatures compatible again The annotations caused a mismatch --- google/cloud/bigquery/job/query.py | 4 ++-- google/cloud/bigquery/table.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 23eecfdf7..67955dbb1 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1342,7 +1342,7 @@ def result( def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, ) -> "pyarrow.Table": @@ -1412,7 +1412,7 @@ def to_arrow( # that should only exist here in the QueryJob method. def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c88488b7b..fee98ce5e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -36,7 +36,6 @@ from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers -from google.cloud import bigquery_storage from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -48,6 +47,8 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud import bigquery_storage + # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas @@ -1628,7 +1629,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a @@ -1723,7 +1724,7 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, ) -> "pandas.DataFrame": @@ -1797,7 +1798,7 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -1978,7 +1979,7 @@ def to_dataframe( def to_dataframe_iterable( self, - bqstorage_client: Optional[bigquery_storage.BigQueryReadClient] = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: From e658d5a8e2469b56075ce4f8d2af22a9fc6beac3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 19 Jul 2021 22:24:16 +0200 Subject: [PATCH 16/24] Remove checks for minimum BQ Storage version Since this is now a required dependency, there should not be any more pip quirks that used to allow installing BQ Storage as an extra, but without always respecting its minimum version pin. --- google/cloud/bigquery/_helpers.py | 26 ------------------- google/cloud/bigquery/client.py | 19 ++------------ google/cloud/bigquery/exceptions.py | 6 ++++- google/cloud/bigquery/table.py | 7 ------ tests/unit/test__helpers.py | 31 ----------------------- tests/unit/test__pandas_helpers.py | 1 - tests/unit/test_client.py | 39 ----------------------------- tests/unit/test_magics.py | 25 ------------------ tests/unit/test_table.py | 21 ---------------- 9 files changed, 7 insertions(+), 168 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..3d83ddee9 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -28,8 +28,6 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -41,7 +39,6 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -75,29 +72,6 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {self.installed_version})." - ) - raise LegacyBigQueryStorageError(msg) - BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 89e4020ba..7aa7f3f3e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -53,7 +53,6 @@ from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -61,7 +60,6 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -469,17 +467,10 @@ def _ensure_bqstorage_client( ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to an outdated dependency - `google-cloud-bigquery-storage`, raise a warning and return ``None``. - - If the `bqstorage_client` argument is not ``None``, still perform the version - check and return the argument back to the caller if the check passes. If it - fails, raise a warning and return ``None``. - Args: bqstorage_client: - An existing BigQuery Storage client instance to check for version - compatibility. If ``None``, a new instance is created and returned. + An existing BigQuery Storage client instance. If ``None``, a new + instance is created and returned. client_options: Custom options used with a new BigQuery Storage client instance if one is created. @@ -492,12 +483,6 @@ def _ensure_bqstorage_client( """ from google.cloud import bigquery_storage - try: - BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return None - if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index 6e5c27eb1..b8a46a2a7 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -18,4 +18,8 @@ class BigQueryError(Exception): class LegacyBigQueryStorageError(BigQueryError): - """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + """Raised when too old a version of BigQuery Storage extra is detected at runtime. + + .. deprecated:: 2.2.0 + Not used anymore and will be removed in the future. + """ diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index fee98ce5e..6fc7c9889 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -38,7 +38,6 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1552,12 +1551,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return False - return True def _get_next_page_response(self): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a36bb700..aaafdb0f7 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -26,37 +26,6 @@ def _object_under_test(self): return _helpers.BQStorageVersions() - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): - self._call_fut() - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index b0495c3cd..8d7ae3094 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -42,7 +42,6 @@ from google.cloud import bigquery_storage -_helpers.BQ_STORAGE_VERSIONS.verify_version() PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2f311b8f5..2dffef272 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -618,25 +618,6 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) - def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -648,26 +629,6 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - mock_storage_client = mock.sentinel.mock_storage_client - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index e134d73c3..5181735d3 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -322,31 +322,6 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) -def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - got = magics._make_bqstorage_client(test_client, True, {}) - - assert got is None - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 28c5a7ff7..91864413d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1844,27 +1844,6 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - iterator = self._make_one(first_page_response=None) # not cached - - patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From 70572d877e83673aa26df28549b9432559be3d16 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 20 Jul 2021 11:26:34 +0200 Subject: [PATCH 17/24] Remove LegacyBigQueryStorageError Since it will be released in a major version bump, we can make this a breaking change, i.e. without deprecation. --- google/cloud/bigquery/__init__.py | 3 --- google/cloud/bigquery/exceptions.py | 8 -------- 2 files changed, 11 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..59bb08ce5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -42,7 +42,6 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -171,8 +170,6 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Custom exceptions - "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index b8a46a2a7..6e4b9109d 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -15,11 +15,3 @@ class BigQueryError(Exception): """Base class for all custom exceptions defined by the BigQuery client.""" - - -class LegacyBigQueryStorageError(BigQueryError): - """Raised when too old a version of BigQuery Storage extra is detected at runtime. - - .. deprecated:: 2.2.0 - Not used anymore and will be removed in the future. - """ From be5cf326dc61e9f8cf0d1a2c32ed242c81ecfeea Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Jul 2021 18:45:07 +0200 Subject: [PATCH 18/24] Bump minimum pyarrow version to 3.0.0 --- setup.py | 10 ++-------- testing/constraints-3.6.txt | 2 +- tests/system/test_pandas.py | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 9839fe3ed..91458bb78 100644 --- a/setup.py +++ b/setup.py @@ -43,15 +43,14 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", - "pyarrow >= 1.0.0, < 5.0dev", + "pyarrow >= 3.0.0, < 5.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + "pandas": ["pandas>=0.23.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", @@ -63,11 +62,6 @@ all_extras = [] for extra in extras: - # Exclude this extra from all to avoid overly strict dependencies on core - # libraries such as pyarrow. - # https://github.com/googleapis/python-bigquery/issues/563 - if extra in {"bignumeric_type"}: - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..c732e9d05 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -34,7 +34,6 @@ "google.cloud.bigquery_storage", minversion="2.0.0" ) pandas = pytest.importorskip("pandas", minversion="0.23.0") -pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version From 7960729ae8c006697ac7b7db36f4e1c553e08e38 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Jul 2021 18:47:52 +0200 Subject: [PATCH 19/24] Remove unneeded pytest.importorskip for BQ Storage --- tests/system/test_pandas.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index c732e9d05..9bf2354da 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -27,12 +27,10 @@ from google.cloud import bigquery from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from google.cloud import bigquery_storage from . import helpers -bigquery_storage = pytest.importorskip( - "google.cloud.bigquery_storage", minversion="2.0.0" -) pandas = pytest.importorskip("pandas", minversion="0.23.0") From c0b810b39aa66b92e3c39b863cc2ce3d0971bca0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Jul 2021 14:05:22 +0200 Subject: [PATCH 20/24] Remove pyarrow version checks in pandas helpers tests --- tests/unit/test__pandas_helpers.py | 42 +++--------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 8d7ae3094..7d764fcb5 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -52,11 +52,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -112,7 +107,6 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -145,9 +139,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -226,11 +218,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -286,7 +277,6 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -294,7 +284,6 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -345,7 +334,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -396,7 +384,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -433,7 +420,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -441,7 +428,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), @@ -494,7 +480,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -529,7 +514,6 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -540,7 +524,6 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -552,7 +535,6 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -574,7 +556,6 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -591,7 +572,6 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -853,7 +833,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( schema.SchemaField("str_index", "STRING"), @@ -920,7 +899,6 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -979,7 +957,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1012,7 +989,6 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1034,7 +1010,6 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1046,7 +1021,6 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1058,7 +1032,6 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1078,7 +1051,6 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1108,7 +1080,6 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1133,7 +1104,6 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1206,7 +1176,6 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1242,7 +1211,6 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1390,7 +1358,6 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1426,7 +1393,6 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1461,7 +1427,6 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1489,7 +1454,6 @@ def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), From 8d127cf2db1aabb9a95db4525ba1c8c1b75ecf38 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Jul 2021 14:15:23 +0200 Subject: [PATCH 21/24] Conditionally skip pandas tests where needed --- tests/unit/test__pandas_helpers.py | 2 ++ tests/unit/test_magics.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 7d764fcb5..7ee6da4f9 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1212,6 +1212,8 @@ def test_augment_schema_type_detection_fails(module_under_test): def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): + pandas = pytest.importorskip("pandas") + dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 5181735d3..bb3a8d1fd 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -538,6 +538,8 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -599,6 +601,8 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_rest_client_requested(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( From e47f1bd3178cf5c19bdf6b0120d09d8f45a09cca Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Jul 2021 15:33:02 +0200 Subject: [PATCH 22/24] Remove unneeded conditional pyarrow version paths --- google/cloud/bigquery/_pandas_helpers.py | 13 ++------ google/cloud/bigquery/exceptions.py | 17 ---------- tests/system/test_client.py | 14 +++----- tests/system/test_pandas.py | 30 +++++++---------- tests/unit/test__pandas_helpers.py | 41 ++++++++---------------- tests/unit/test_table.py | 9 +----- 6 files changed, 34 insertions(+), 90 deletions(-) delete mode 100644 google/cloud/bigquery/exceptions.py diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index d3b28c100..71c14f944 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,8 +20,6 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER @@ -104,6 +102,7 @@ def pyarrow_timestamp(): # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, "BYTES": pyarrow.binary, @@ -142,16 +141,10 @@ def pyarrow_timestamp(): pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", -} - -if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True -else: - _BIGNUMERIC_SUPPORT = False + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", +} def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py deleted file mode 100644 index 6e4b9109d..000000000 --- a/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class BigQueryError(Exception): - """Base class for all custom exceptions defined by the BigQuery client.""" diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ca16f4c6d..d43442a42 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,7 +30,6 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers try: @@ -1889,6 +1888,11 @@ def test_query_w_query_params(self): "expected": pi_numeric, "query_parameters": [pi_numeric_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, { "sql": "SELECT @truthy", "expected": truthy, @@ -1959,14 +1963,6 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 9bf2354da..4b1828c86 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -26,7 +26,6 @@ import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud import bigquery_storage from . import helpers @@ -181,12 +180,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -208,12 +206,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -289,12 +286,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -332,6 +328,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -346,17 +350,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 7ee6da4f9..85c507b2a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -38,7 +38,6 @@ from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud import bigquery_storage @@ -293,6 +292,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -302,9 +302,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -316,6 +313,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -324,8 +322,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -343,6 +339,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -352,9 +349,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -366,6 +360,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -374,8 +369,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -908,6 +901,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -916,8 +910,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -927,6 +919,10 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ @@ -941,11 +937,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -1137,11 +1128,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1163,13 +1151,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 91864413d..533ed610f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,7 +19,6 @@ import warnings import mock -import pkg_resources import pyarrow import pyarrow.types import pytest @@ -45,10 +44,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -2501,9 +2496,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows From 427665e0f4f99b60dd97ac4848104b0e830a59cd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Jul 2021 16:40:18 +0200 Subject: [PATCH 23/24] Cover schema autodetect failed code path in test --- tests/unit/test_client.py | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2dffef272..5d173afd8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6928,6 +6928,65 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("ts_col", "TIMESTAMP"), ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + + df_data = [ + [[{"name": "n1.1", "value": 1.1}, {"name": "n1.2", "value": 1.2}]], + [[{"name": "n2.1", "value": 2.1}, {"name": "n2.2", "value": 2.2}]], + ] + dataframe = pandas.DataFrame(df_data, columns=["col_record_list"]) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + # There should be a warning that schema detection failed. + expected_warnings = [ + warning + for warning in warned + if "schema could not be detected" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass( + expected_warnings[0].category, + (DeprecationWarning, PendingDeprecationWarning), + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=None, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema is None + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES From c4315cebc4041d4617223ca090009a3ac906a792 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 12:11:39 -0500 Subject: [PATCH 24/24] fix bad merge --- tests/system/test_client.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 52ea7b091..4970ef281 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1594,20 +1594,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - def test_dbapi_create_view(self): - - query = """ - CREATE VIEW {}.dbapi_create_view - AS SELECT name, SUM(number) AS total - FROM `bigquery-public-data.usa_names.usa_1910_2013` - GROUP BY name; - """.format( - Config.DATASET - ) - - Config.CURSOR.execute(query) - self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") - def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials