From 0217edb5bedd512d3152dd7a8c658a4d44fda77f Mon Sep 17 00:00:00 2001
From: Peter Lamut <inbox@peterlamut.com>
Date: Fri, 9 Aug 2019 14:56:31 +0200
Subject: [PATCH 1/2] Fix schema recognition of struct field types

A struct field can be referred to as "RECORD" or "STRUCT", and this
commit assures that the to_api_repr() logic is correct.
---
 .../google/cloud/bigquery/_pandas_helpers.py  |  5 +--
 bigquery/google/cloud/bigquery/schema.py      |  4 +-
 bigquery/tests/unit/test_schema.py            | 39 ++++++++++---------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py
index 5ac0505e91ae..e7329df4c99a 100644
--- a/bigquery/google/cloud/bigquery/_pandas_helpers.py
+++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py
@@ -47,7 +47,6 @@
     "please install google-cloud-bigquery-storage to use bqstorage features."
 )
 
-STRUCT_TYPES = ("RECORD", "STRUCT")
 _PROGRESS_INTERVAL = 0.2  # Maximum time between download status checks, in seconds.
 
 
@@ -126,7 +125,7 @@ def bq_to_arrow_data_type(field):
             return pyarrow.list_(inner_type)
         return None
 
-    if field.field_type.upper() in STRUCT_TYPES:
+    if field.field_type.upper() in schema.STRUCT_TYPES:
         return bq_to_arrow_struct_data_type(field)
 
     data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper())
@@ -168,7 +167,7 @@ def bq_to_arrow_array(series, bq_field):
     arrow_type = bq_to_arrow_data_type(bq_field)
     if bq_field.mode.upper() == "REPEATED":
         return pyarrow.ListArray.from_pandas(series, type=arrow_type)
-    if bq_field.field_type.upper() in STRUCT_TYPES:
+    if bq_field.field_type.upper() in schema.STRUCT_TYPES:
         return pyarrow.StructArray.from_pandas(series, type=arrow_type)
     return pyarrow.array(series, type=arrow_type)
 
diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py
index 61bc0bcedfd6..852b84802159 100644
--- a/bigquery/google/cloud/bigquery/schema.py
+++ b/bigquery/google/cloud/bigquery/schema.py
@@ -17,6 +17,8 @@
 from google.cloud.bigquery_v2 import types
 
 
+STRUCT_TYPES = ("RECORD", "STRUCT")
+
 # SQL types reference:
 # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
@@ -150,7 +152,7 @@ def to_api_repr(self):
 
         # If this is a RECORD type, then sub-fields are also included,
         # add this to the serialized representation.
-        if self.field_type.upper() == "RECORD":
+        if self.field_type.upper() in STRUCT_TYPES:
             answer["fields"] = [f.to_api_repr() for f in self.fields]
 
         # Done; return the serialized dictionary.
diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py
index 682e45895852..862d8a823e62 100644
--- a/bigquery/tests/unit/test_schema.py
+++ b/bigquery/tests/unit/test_schema.py
@@ -71,25 +71,26 @@ def test_to_api_repr(self):
         )
 
     def test_to_api_repr_with_subfield(self):
-        subfield = self._make_one("bar", "INTEGER", "NULLABLE")
-        field = self._make_one("foo", "RECORD", "REQUIRED", fields=(subfield,))
-        self.assertEqual(
-            field.to_api_repr(),
-            {
-                "fields": [
-                    {
-                        "mode": "NULLABLE",
-                        "name": "bar",
-                        "type": "INTEGER",
-                        "description": None,
-                    }
-                ],
-                "mode": "REQUIRED",
-                "name": "foo",
-                "type": "RECORD",
-                "description": None,
-            },
-        )
+        for record_type in ("RECORD", "STRUCT"):
+            subfield = self._make_one("bar", "INTEGER", "NULLABLE")
+            field = self._make_one("foo", record_type, "REQUIRED", fields=(subfield,))
+            self.assertEqual(
+                field.to_api_repr(),
+                {
+                    "fields": [
+                        {
+                            "mode": "NULLABLE",
+                            "name": "bar",
+                            "type": "INTEGER",
+                            "description": None,
+                        }
+                    ],
+                    "mode": "REQUIRED",
+                    "name": "foo",
+                    "type": record_type,
+                    "description": None,
+                },
+            )
 
     def test_from_api_repr(self):
         field = self._get_target_class().from_api_repr(

From 565e4a78710b48696ea292398f4f4006212d3bd2 Mon Sep 17 00:00:00 2001
From: Peter Lamut <inbox@peterlamut.com>
Date: Tue, 13 Aug 2019 23:46:45 +0200
Subject: [PATCH 2/2] Mark STRUCT_TYPES as private in schema.py

---
 bigquery/google/cloud/bigquery/_pandas_helpers.py | 4 ++--
 bigquery/google/cloud/bigquery/schema.py          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py
index e7329df4c99a..5cc69e434b04 100644
--- a/bigquery/google/cloud/bigquery/_pandas_helpers.py
+++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py
@@ -125,7 +125,7 @@ def bq_to_arrow_data_type(field):
             return pyarrow.list_(inner_type)
         return None
 
-    if field.field_type.upper() in schema.STRUCT_TYPES:
+    if field.field_type.upper() in schema._STRUCT_TYPES:
         return bq_to_arrow_struct_data_type(field)
 
     data_type_constructor = BQ_TO_ARROW_SCALARS.get(field.field_type.upper())
@@ -167,7 +167,7 @@ def bq_to_arrow_array(series, bq_field):
     arrow_type = bq_to_arrow_data_type(bq_field)
     if bq_field.mode.upper() == "REPEATED":
         return pyarrow.ListArray.from_pandas(series, type=arrow_type)
-    if bq_field.field_type.upper() in schema.STRUCT_TYPES:
+    if bq_field.field_type.upper() in schema._STRUCT_TYPES:
         return pyarrow.StructArray.from_pandas(series, type=arrow_type)
     return pyarrow.array(series, type=arrow_type)
 
diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py
index 852b84802159..e0673d85baf6 100644
--- a/bigquery/google/cloud/bigquery/schema.py
+++ b/bigquery/google/cloud/bigquery/schema.py
@@ -17,7 +17,7 @@
 from google.cloud.bigquery_v2 import types
 
 
-STRUCT_TYPES = ("RECORD", "STRUCT")
+_STRUCT_TYPES = ("RECORD", "STRUCT")
 
 # SQL types reference:
 # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
@@ -152,7 +152,7 @@ def to_api_repr(self):
 
         # If this is a RECORD type, then sub-fields are also included,
         # add this to the serialized representation.
-        if self.field_type.upper() in STRUCT_TYPES:
+        if self.field_type.upper() in _STRUCT_TYPES:
             answer["fields"] = [f.to_api_repr() for f in self.fields]
 
         # Done; return the serialized dictionary.