Skip to content

Commit d06a2ac

Browse files
committed
fix: load_table_from_dataframe does not error out when nan in a required column
1 parent f22eff2 commit d06a2ac

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,19 @@ def bq_to_arrow_array(series, bq_field):
302302
return pyarrow.Array.from_pandas(series, type=arrow_type)
303303

304304

305+
def _check_nullability(arrow_fields, dataframe):
306+
"""Throws error if dataframe has null values and column doesn't allow nullable"""
307+
if dataframe.index.name:
308+
dataframe[dataframe.index.name] = dataframe.index
309+
for arrow_field in arrow_fields:
310+
col_name = arrow_field.name
311+
if (
312+
not arrow_field.nullable
313+
and dataframe[arrow_field.name].isnull().values.any()
314+
):
315+
raise ValueError(f"required field {col_name} can not be nulls")
316+
317+
305318
def get_column_or_index(dataframe, name):
306319
"""Return a column or index as a pandas series."""
307320
if name in dataframe.columns:
@@ -587,6 +600,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
587600
)
588601
arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
589602

603+
_check_nullability(arrow_fields, dataframe)
590604
if all((field is not None for field in arrow_fields)):
591605
return pyarrow.Table.from_arrays(
592606
arrow_arrays, schema=pyarrow.schema(arrow_fields)

tests/unit/test_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8677,6 +8677,32 @@ def test_load_table_from_dataframe_w_nulls(self):
86778677
assert sent_config.schema == schema
86788678
assert sent_config.source_format == job.SourceFormat.PARQUET
86798679

8680+
@unittest.skipIf(pandas is None, "Requires `pandas`")
8681+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
8682+
def test_load_table_from_dataframe_w_nulls_for_required_cols(self):
8683+
"""Test that a DataFrame with null columns should throw error if
8684+
corresponding field in bigquery schema is required.
8685+
8686+
See: https://github.com/googleapis/python-bigquery/issues/1692
8687+
"""
8688+
from google.cloud.bigquery.schema import SchemaField
8689+
from google.cloud.bigquery import job
8690+
8691+
client = self._make_client()
8692+
records = [{"name": None, "age": None}, {"name": None, "age": None}]
8693+
dataframe = pandas.DataFrame(records, columns=["name", "age"])
8694+
schema = [
8695+
SchemaField("name", "STRING"),
8696+
SchemaField("age", "INTEGER", mode="REQUIRED"),
8697+
]
8698+
job_config = job.LoadJobConfig(schema=schema)
8699+
with pytest.raises(ValueError) as e:
8700+
client.load_table_from_dataframe(
8701+
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
8702+
)
8703+
8704+
assert str(e.value) == "required field age can not be nulls"
8705+
86808706
@unittest.skipIf(pandas is None, "Requires `pandas`")
86818707
def test_load_table_from_dataframe_w_invaild_job_config(self):
86828708
from google.cloud.bigquery import job

0 commit comments

Comments
 (0)