-
Notifications
You must be signed in to change notification settings - Fork 322
Closed
Labels
api: bigqueryIssues related to the googleapis/python-bigquery API.Issues related to the googleapis/python-bigquery API.priority: p1Important issue which blocks shipping the next release. Will be fixed prior to next release.Important issue which blocks shipping the next release. Will be fixed prior to next release.type: bugError or flaw in code with unintended results or allowing sub-optimal usage patterns.Error or flaw in code with unintended results or allowing sub-optimal usage patterns.
Description
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477f994ac0>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_ test_to_dataframe_bqstorage_preserve_order[SELECT name, age FROM table ORDER BY other_column;] _
query = 'SELECT name, age FROM table ORDER BY other_column;'
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
@pytest.mark.parametrize(
"query",
(
"select name, age from table order by other_column;",
"Select name, age From table Order By other_column;",
"SELECT name, age FROM table ORDER BY other_column;",
"select name, age from table order\nby other_column;",
"Select name, age From table Order\nBy other_column;",
"SELECT name, age FROM table ORDER\nBY other_column;",
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
project_id="test-project", job_type="query", ended=True
)
job_resource["configuration"]["query"]["query"] = query
job_resource["status"] = {"state": "DONE"}
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": "test-project", "jobId": "test-job"},
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"totalRows": "4",
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477f221f10>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_ test_to_dataframe_bqstorage_preserve_order[select name, age from table order\nby other_column;] _
query = 'select name, age from table order\nby other_column;'
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
@pytest.mark.parametrize(
"query",
(
"select name, age from table order by other_column;",
"Select name, age From table Order By other_column;",
"SELECT name, age FROM table ORDER BY other_column;",
"select name, age from table order\nby other_column;",
"Select name, age From table Order\nBy other_column;",
"SELECT name, age FROM table ORDER\nBY other_column;",
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
project_id="test-project", job_type="query", ended=True
)
job_resource["configuration"]["query"]["query"] = query
job_resource["status"] = {"state": "DONE"}
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": "test-project", "jobId": "test-job"},
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"totalRows": "4",
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f4767ef8e50>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_ test_to_dataframe_bqstorage_preserve_order[Select name, age From table Order\nBy other_column;] _
query = 'Select name, age From table Order\nBy other_column;'
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
@pytest.mark.parametrize(
"query",
(
"select name, age from table order by other_column;",
"Select name, age From table Order By other_column;",
"SELECT name, age FROM table ORDER BY other_column;",
"select name, age from table order\nby other_column;",
"Select name, age From table Order\nBy other_column;",
"SELECT name, age FROM table ORDER\nBY other_column;",
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
project_id="test-project", job_type="query", ended=True
)
job_resource["configuration"]["query"]["query"] = query
job_resource["status"] = {"state": "DONE"}
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": "test-project", "jobId": "test-job"},
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"totalRows": "4",
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477ef80a60>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_ test_to_dataframe_bqstorage_preserve_order[SELECT name, age FROM table ORDER\nBY other_column;] _
query = 'SELECT name, age FROM table ORDER\nBY other_column;'
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
@pytest.mark.parametrize(
"query",
(
"select name, age from table order by other_column;",
"Select name, age From table Order By other_column;",
"SELECT name, age FROM table ORDER BY other_column;",
"select name, age from table order\nby other_column;",
"Select name, age From table Order\nBy other_column;",
"SELECT name, age FROM table ORDER\nBY other_column;",
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
project_id="test-project", job_type="query", ended=True
)
job_resource["configuration"]["query"]["query"] = query
job_resource["status"] = {"state": "DONE"}
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": "test-project", "jobId": "test-job"},
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"totalRows": "4",
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477ef9e070>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_ test_to_dataframe_bqstorage_preserve_order[SelecT name, age froM table OrdeR \n\t BY other_column;] _
query = 'SelecT name, age froM table OrdeR \n\t BY other_column;'
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
@pytest.mark.parametrize(
"query",
(
"select name, age from table order by other_column;",
"Select name, age From table Order By other_column;",
"SELECT name, age FROM table ORDER BY other_column;",
"select name, age from table order\nby other_column;",
"Select name, age From table Order\nBy other_column;",
"SELECT name, age FROM table ORDER\nBY other_column;",
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
project_id="test-project", job_type="query", ended=True
)
job_resource["configuration"]["query"]["query"] = query
job_resource["status"] = {"state": "DONE"}
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": "test-project", "jobId": "test-job"},
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
"totalRows": "4",
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477fb50340>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
_________________________ test_to_dataframe_bqstorage __________________________
table_read_options_kwarg = {'read_options': arrow_serialization_options {
buffer_compression: LZ4_FRAME
}
}
def test_to_dataframe_bqstorage(table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
resource = _make_job_resource(job_type="query", ended=True)
query_resource = {
"jobComplete": True,
"jobReference": resource["jobReference"],
"totalRows": "4",
"schema": {
"fields": [
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
]
},
}
stream_id = "projects/1/locations/2/sessions/3/streams/4"
name_array = pyarrow.array(
["John", "Paul", "George", "Ringo"], type=pyarrow.string()
)
age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64())
arrow_schema = pyarrow.schema(
[
pyarrow.field("name", pyarrow.string(), True),
pyarrow.field("age", pyarrow.int64(), True),
]
)
record_batch = pyarrow.RecordBatch.from_arrays(
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
session.streams = [bigquery_storage.types.ReadStream(name=stream_id)]
bqstorage_client.create_read_session.return_value = session
bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
page = bigquery_storage.types.ReadRowsResponse()
if BQ_STORAGE_VERSIONS.is_read_session_optional:
page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
page.arrow_record_batch.serialized_record_batch = (
record_batch.serialize().to_pybytes()
)
bqstorage_base_client.read_rows.return_value = [page]
reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream(
[page], bqstorage_base_client, stream_id, 0, {}
)
bqstorage_client.read_rows.return_value = reader
> dataframe = job.to_dataframe(bqstorage_client=bqstorage_client)
tests/unit/job/test_query_pandas.py:557:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
google/cloud/bigquery/job/query.py:1636: in to_dataframe
return query_result.to_dataframe(
google/cloud/bigquery/table.py:1940: in to_dataframe
record_batch = self.to_arrow(
google/cloud/bigquery/table.py:1743: in to_arrow
for record_batch in self.to_arrow_iterable(
google/cloud/bigquery/table.py:1613: in _to_page_iterable
yield from result_pages
google/cloud/bigquery/_pandas_helpers.py:888: in _download_table_bqstorage
future.result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:437: in result
return self.__get_result()
/usr/local/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
raise self._exception
/usr/local/lib/python3.8/concurrent/futures/thread.py:57: in run
result = self.fn(*self.args, **self.kwargs)
google/cloud/bigquery/_pandas_helpers.py:766: in _download_table_bqstorage_stream
for page in rowstream.pages:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:338: in pages
for message in self._reader:
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:148: in __iter__
self._reconnect()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <google.cloud.bigquery_storage_v1.reader.ReadRowsStream object at 0x7f477f935b50>
def _reconnect(self):
"""Reconnect to the ReadRows stream using the most recent offset."""
while True:
try:
> self._wrapped = self._client.read_rows(
read_stream=self._name,
offset=self._offset,
**self._read_rows_kwargs
)
E AttributeError: 'list' object has no attribute 'read_rows'
.nox/prerelease_deps-3-8/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/reader.py:178: AttributeError
=============================== warnings summary ===============================
tests/unit/test__pandas_helpers.py::test_bq_to_arrow_array_w_geography_type_shapely_data
tests/unit/test__pandas_helpers.py::test_dataframe_to_bq_schema_geography
tests/unit/test_table.py::TestRowIterator::test_rowiterator_to_geodataframe_delegation
/tmpfs/src/github/python-bigquery/.nox/prerelease_deps-3-8/lib/python3.8/site-packages/pandas/core/dtypes/cast.py:122: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.
arr = construct_1d_object_array_from_listlike(values)
-- Docs: https://docs.pytest.org/en/stable/warnings.html
=========================== short test summary info ============================
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[select name, age from table order by other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[Select name, age From table Order By other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[SELECT name, age FROM table ORDER BY other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[select name, age from table order\nby other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[Select name, age From table Order\nBy other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[SELECT name, age FROM table ORDER\nBY other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage_preserve_order[SelecT name, age froM table OrdeR \n\t BY other_column;]
FAILED tests/unit/job/test_query_pandas.py::test_to_dataframe_bqstorage - Att...
TBD whether this is just a problem with how we are mocking out the client or if it's a true failure.
Metadata
Metadata
Assignees
Labels
api: bigqueryIssues related to the googleapis/python-bigquery API.Issues related to the googleapis/python-bigquery API.priority: p1Important issue which blocks shipping the next release. Will be fixed prior to next release.Important issue which blocks shipping the next release. Will be fixed prior to next release.type: bugError or flaw in code with unintended results or allowing sub-optimal usage patterns.Error or flaw in code with unintended results or allowing sub-optimal usage patterns.