From 0217edb5bedd512d3152dd7a8c658a4d44fda77f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 9 Aug 2019 14:56:31 +0200 Subject: [PATCH 1/2] Fix schema recognition of struct field types A struct field can be referred to as "RECORD" or "STRUCT", and this commit assures that the to_api_repr() logic is correct. --- .../google/cloud/bigquery/_pandas_helpers.py | 5 +-- bigquery/google/cloud/bigquery/schema.py | 4 +- bigquery/tests/unit/test_schema.py | 39 ++++++++++--------- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py index 5ac0505e91ae..e7329df4c99a 100644 --- a/bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -47,7 +47,6 @@ "please install google-cloud-bigquery-storage to use bqstorage features." ) -STRUCT_TYPES = ("RECORD", "STRUCT") _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -126,7 +125,7 @@ def bq_to_arrow_data_type(field): return pyarrow.list_(inner_type) return None - if field.field_type.upper() in STRUCT_TYPES: + if field.field_type.upper() in schema.STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper()) @@ -168,7 +167,7 @@ def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) if bq_field.mode.upper() == "REPEATED": return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if bq_field.field_type.upper() in STRUCT_TYPES: + if bq_field.field_type.upper() in schema.STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.array(series, type=arrow_type) diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py index 61bc0bcedfd6..852b84802159 100644 --- a/bigquery/google/cloud/bigquery/schema.py +++ b/bigquery/google/cloud/bigquery/schema.py @@ -17,6 +17,8 @@ from google.cloud.bigquery_v2 import types +STRUCT_TYPES = ("RECORD", "STRUCT") + # SQL types reference: # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types @@ -150,7 +152,7 @@ def to_api_repr(self): # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. - if self.field_type.upper() == "RECORD": + if self.field_type.upper() in STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] # Done; return the serialized dictionary. diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py index 682e45895852..862d8a823e62 100644 --- a/bigquery/tests/unit/test_schema.py +++ b/bigquery/tests/unit/test_schema.py @@ -71,25 +71,26 @@ def test_to_api_repr(self): ) def test_to_api_repr_with_subfield(self): - subfield = self._make_one("bar", "INTEGER", "NULLABLE") - field = self._make_one("foo", "RECORD", "REQUIRED", fields=(subfield,)) - self.assertEqual( - field.to_api_repr(), - { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "description": None, - } - ], - "mode": "REQUIRED", - "name": "foo", - "type": "RECORD", - "description": None, - }, - ) + for record_type in ("RECORD", "STRUCT"): + subfield = self._make_one("bar", "INTEGER", "NULLABLE") + field = self._make_one("foo", record_type, "REQUIRED", fields=(subfield,)) + self.assertEqual( + field.to_api_repr(), + { + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "description": None, + } + ], + "mode": "REQUIRED", + "name": "foo", + "type": record_type, + "description": None, + }, + ) def test_from_api_repr(self): field = self._get_target_class().from_api_repr( From 565e4a78710b48696ea292398f4f4006212d3bd2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Aug 2019 23:46:45 +0200 Subject: [PATCH 2/2] Mark STRUCT_TYPES as private in schema.py --- bigquery/google/cloud/bigquery/_pandas_helpers.py | 4 ++-- bigquery/google/cloud/bigquery/schema.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py index e7329df4c99a..5cc69e434b04 100644 --- a/bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -125,7 +125,7 @@ def bq_to_arrow_data_type(field): return pyarrow.list_(inner_type) return None - if field.field_type.upper() in schema.STRUCT_TYPES: + if field.field_type.upper() in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper()) @@ -167,7 +167,7 @@ def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) if bq_field.mode.upper() == "REPEATED": return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if bq_field.field_type.upper() in schema.STRUCT_TYPES: + if bq_field.field_type.upper() in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) return pyarrow.array(series, type=arrow_type) diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py index 852b84802159..e0673d85baf6 100644 --- a/bigquery/google/cloud/bigquery/schema.py +++ b/bigquery/google/cloud/bigquery/schema.py @@ -17,7 +17,7 @@ from google.cloud.bigquery_v2 import types -STRUCT_TYPES = ("RECORD", "STRUCT") +_STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types @@ -152,7 +152,7 @@ def to_api_repr(self): # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. - if self.field_type.upper() in STRUCT_TYPES: + if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] # Done; return the serialized dictionary.