From beac4a066d48bf529a9d8160590d6674735a88e7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 14 Jan 2022 16:44:07 -0600 Subject: [PATCH 1/4] wip: attempt to fix pandas tests with new bqstorage client --- tests/unit/job/test_query_pandas.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 044ca6e9a..4c4deed8b 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -549,8 +549,12 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): record_batch.serialize().to_pybytes() ) bqstorage_base_client.read_rows.return_value = [page] + # TODO: constructor is different depending on version + # May want to just mock this out entirely. It'd be nice to test the + # arrow deserialization, but that's tested in the + # google-cloud-bigquery-storage client too. reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( - [page], bqstorage_base_client, stream_id, 0, {} + bqstorage_base_client, stream_id, 0, {} ) bqstorage_client.read_rows.return_value = reader From 56aaa9453e0a29ab36fba24cd74ca3d2a042c6e2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 18 Jan 2022 10:57:43 -0600 Subject: [PATCH 2/4] mock a bit more --- tests/unit/job/test_query_pandas.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 4c4deed8b..8cfd99052 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -536,26 +536,25 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] - bqstorage_client.create_read_session.return_value = session - bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - page = bigquery_storage.types.ReadRowsResponse() - if BQ_STORAGE_VERSIONS.is_read_session_optional: - page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() - page.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True ) - bqstorage_base_client.read_rows.return_value = [page] - # TODO: constructor is different depending on version - # May want to just mock this out entirely. It'd be nice to test the - # arrow deserialization, but that's tested in the - # google-cloud-bigquery-storage client too. - reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( - bqstorage_base_client, stream_id, 0, {} + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True ) + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True + ) + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_client.create_read_session.return_value = session bqstorage_client.read_rows.return_value = reader dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) From 05e5cc1caffa46db45908a2c06db2f7fc21b9406 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 18 Jan 2022 12:20:00 -0600 Subject: [PATCH 3/4] update other test too --- tests/unit/job/test_query_pandas.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 8cfd99052..e35051c5c 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,7 +41,6 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource @@ -142,18 +141,22 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] - bqstorage_client.create_read_session.return_value = session - bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - page = bigquery_storage.types.ReadRowsResponse() - if BQ_STORAGE_VERSIONS.is_read_session_optional: - page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() - page.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True ) - bqstorage_base_client.read_rows.return_value = [page] - reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( - [page], bqstorage_base_client, stream_id, 0, {} + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True ) + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_client.create_read_session.return_value = session bqstorage_client.read_rows.return_value = reader dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) From d4e1392ded43a1d65d1d7a01445be4136826df9f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 18 Jan 2022 12:39:44 -0600 Subject: [PATCH 4/4] make flakey test more robust --- tests/system/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index d52cb9eb9..a00193788 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1552,7 +1552,7 @@ def test_dbapi_connection_does_not_leak_sockets(self): connection.close() conn_count_end = len(current_process.connections()) - self.assertEqual(conn_count_end, conn_count_start) + self.assertLessEqual(conn_count_end, conn_count_start) def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile