From ec28835581f3721f57280eed63a98ecfd5654137 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 6 Nov 2019 14:20:43 +0200 Subject: [PATCH 1/4] feat(bigquery): add hive partitioning options to external config --- .../google/cloud/bigquery/external_config.py | 84 +++++++++++++++++++ bigquery/tests/unit/test_external_config.py | 52 ++++++++++++ 2 files changed, 136 insertions(+) diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py index c637d37d185c..791e0b1bf77f 100644 --- a/bigquery/google/cloud/bigquery/external_config.py +++ b/bigquery/google/cloud/bigquery/external_config.py @@ -564,6 +564,72 @@ def from_api_repr(cls, resource): _OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +class HivePartitioningOptions(object): + """Options that configure hive partitioning. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions + """ + + def __init__(self): + self._properties = {} + + @property + def mode(self): + """Optional[str]: When set, what mode of hive partitioning to use when reading data. + + Two modes are supported: "AUTO" and "STRINGS". + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode + """ + return self._properties.get("mode") + + @mode.setter + def mode(self, value): + self._properties["mode"] = value + + @property + def source_uri_prefix(self): + """Optional[str]: When hive partition detection is requested, a common prefix for + all source URIs is required. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix + """ + return self._properties.get("sourceUriPrefix") + + @source_uri_prefix.setter + def source_uri_prefix(self, value): + self._properties["sourceUriPrefix"] = value + + def to_api_repr(self): + """Build an API representation of this object. + + Returns: + Dict[str, Any]: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a :class:`~.external_config.HivePartitioningOptions` + instance given its API representation. + + Args: + resource (Dict[str, Any]): + Definition of a :class:`~.external_config.HivePartitioningOptions` + instance in the same representation as is returned from the + API. + + Returns: + HivePartitioningOptions: Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + class ExternalConfig(object): """Description of an external data source. @@ -624,6 +690,24 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ + it configures hive partitioning support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options + """ + prop = self._properties.get("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + prop = value.to_api_repr() if value is not None else None + self._properties["hivePartitioningOptions"] = prop + @property def ignore_unknown_values(self): """bool: If :data:`True`, extra values that are not represented in the diff --git a/bigquery/tests/unit/test_external_config.py b/bigquery/tests/unit/test_external_config.py index dab4391cbe04..6028d069bcbe 100644 --- a/bigquery/tests/unit/test_external_config.py +++ b/bigquery/tests/unit/test_external_config.py @@ -173,6 +173,58 @@ def test_to_api_repr_sheets(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_hive_partitioning(self): + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertIsInstance( + ec.hive_partitioning, external_config.HivePartitioningOptions + ) + self.assertEqual(ec.hive_partitioning.source_uri_prefix, "http://foo/bar") + self.assertEqual(ec.hive_partitioning.mode, "STRINGS") + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["hivePartitioningOptions"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.hive_partitioning) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_hive_partitioning(self): + hive_partitioning = external_config.HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "STRINGS" + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.hive_partitioning = hive_partitioning + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "hivePartitioningOptions": { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + }, + } + self.assertEqual(got_resource, expected_resource) + def test_from_api_repr_csv(self): resource = _copy_and_update( self.BASE_RESOURCE, From b6c6a0c85a5629142cb8cef4f54c266e0d9bd7cd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 7 Nov 2019 10:37:45 +0200 Subject: [PATCH 2/4] Support hive partitioning options in LoadJobConfig --- bigquery/google/cloud/bigquery/job.py | 28 +++++++++++++++++++ bigquery/tests/unit/test_job.py | 40 +++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index a8d797f4bef5..11b9376663bf 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -29,6 +29,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter @@ -1138,6 +1139,33 @@ def field_delimiter(self): def field_delimiter(self, value): self._set_sub_prop("fieldDelimiter", value) + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + @property def ignore_unknown_values(self): """bool: Ignore extra values not represented in the table schema. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index a2aeb5efbc4a..b7596e4db848 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1448,6 +1448,46 @@ def test_field_delimiter_setter(self): config.field_delimiter = field_delimiter self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + def test_ignore_unknown_values_missing(self): config = self._get_target_class()() self.assertIsNone(config.ignore_unknown_values) From 1aac65de4a0317ee7eade1576a4b7cd69f78366e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 6 Nov 2019 19:50:06 +0200 Subject: [PATCH 3/4] Mark ExternalConfig.options property as optional --- bigquery/google/cloud/bigquery/external_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py index 791e0b1bf77f..0099dd00839b 100644 --- a/bigquery/google/cloud/bigquery/external_config.py +++ b/bigquery/google/cloud/bigquery/external_config.py @@ -658,7 +658,7 @@ def source_format(self): @property def options(self): - """Dict[str, Any]: Source-specific options.""" + """Optional[Dict[str, Any]]: Source-specific options.""" return self._options @property From b736df327d84c4a11f715fe31ace2479c0194dee Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 13 Nov 2019 09:20:32 +0200 Subject: [PATCH 4/4] Mark hive partitioning class and propertis as beta --- bigquery/google/cloud/bigquery/external_config.py | 12 ++++++++++-- bigquery/google/cloud/bigquery/job.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py index 0099dd00839b..ea6a42c60cac 100644 --- a/bigquery/google/cloud/bigquery/external_config.py +++ b/bigquery/google/cloud/bigquery/external_config.py @@ -565,7 +565,11 @@ def from_api_repr(cls, resource): class HivePartitioningOptions(object): - """Options that configure hive partitioning. + """[Beta] Options that configure hive partitioning. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions @@ -692,9 +696,13 @@ def compression(self, value): @property def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ it configures hive partitioning support. + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options """ diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 11b9376663bf..e150cc61ef79 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1141,7 +1141,7 @@ def field_delimiter(self, value): @property def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ it configures hive partitioning support. .. note::