diff --git a/bigquery/docs/reference.rst b/bigquery/docs/reference.rst index a0fc0e1ead70..981059de5226 100644 --- a/bigquery/docs/reference.rst +++ b/bigquery/docs/reference.rst @@ -83,11 +83,13 @@ Table .. autosummary:: :toctree: generated + table.PartitionRange + table.RangePartitioning + table.Row + table.RowIterator table.Table table.TableListItem table.TableReference - table.Row - table.RowIterator table.TimePartitioning table.TimePartitioningType diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py index da13375365e9..3982c1175850 100644 --- a/bigquery/google/cloud/bigquery/__init__.py +++ b/bigquery/google/cloud/bigquery/__init__.py @@ -73,9 +73,11 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.table import PartitionRange +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import Row from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -96,7 +98,12 @@ # Tables "Table", "TableReference", + "PartitionRange", + "RangePartitioning", "Row", + "TimePartitioning", + "TimePartitioningType", + # Jobs "CopyJob", "CopyJobConfig", "ExtractJob", @@ -104,8 +111,6 @@ "LoadJob", "LoadJobConfig", "UnknownJob", - "TimePartitioningType", - "TimePartitioning", # Models "Model", "ModelReference", diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index a8e75835c6ea..cfc5a3797c70 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -27,7 +27,9 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -37,12 +39,11 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import _EmptyRowIterator +from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TimePartitioning -from google.cloud.bigquery import _helpers -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -1180,6 +1181,40 @@ def quote_character(self): def quote_character(self, value): self._set_sub_prop("quote", value) + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + @property def schema(self): """List[google.cloud.bigquery.schema.SchemaField]: Schema of the @@ -1249,6 +1284,10 @@ def source_format(self, value): def time_partitioning(self): """google.cloud.bigquery.table.TimePartitioning: Specifies time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. """ prop = self._get_sub_prop("timePartitioning") if prop is not None: @@ -1463,6 +1502,13 @@ def destination_table_friendly_name(self): """ return self._configuration.destination_table_friendly_name + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + @property def time_partitioning(self): """See @@ -2242,6 +2288,40 @@ def query_parameters(self): def query_parameters(self, values): self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + @property def udf_resources(self): """List[google.cloud.bigquery.query.UDFResource]: user @@ -2318,8 +2398,18 @@ def table_definitions(self, values): @property def time_partitioning(self): - """google.cloud.bigquery.table.TimePartitioning: Specifies time-based - partitioning for the destination table. + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies + time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. """ prop = self._get_sub_prop("timePartitioning") if prop is not None: @@ -2552,6 +2642,13 @@ def maximum_bytes_billed(self): """ return self._configuration.maximum_bytes_billed + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + @property def table_definitions(self): """See diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 90cd5d96406a..72ff8f71385c 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -505,14 +505,54 @@ def table_type(self): """ return self._properties.get("type") + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for a table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or + :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._properties.get("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._properties["rangePartitioning"] = resource + @property def time_partitioning(self): - """google.cloud.bigquery.table.TimePartitioning: Configures time-based + """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based partitioning for a table. + Only specify at most one of + :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or + :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. + Raises: ValueError: - If the value is not :class:`TimePartitioning` or :data:`None`. + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. """ prop = self._properties.get("timePartitioning") if prop is not None: @@ -1645,6 +1685,147 @@ def __iter__(self): return iter(()) +class PartitionRange(object): + """Definition of the ranges for range partitioning. + + .. note:: + **Beta**. The integer range partitioning feature is in a pre-release + state and might change or have limited support. + + Args: + start (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.start` + property. + end (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.end` + property. + interval (Optional[int]): + Sets the + :attr:`~google.cloud.bigquery.table.PartitionRange.interval` + property. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__(self, start=None, end=None, interval=None, _properties=None): + if _properties is None: + _properties = {} + self._properties = _properties + + if start is not None: + self.start = start + if end is not None: + self.end = end + if interval is not None: + self.interval = interval + + @property + def start(self): + """int: The start of range partitioning, inclusive.""" + return _helpers._int_or_none(self._properties.get("start")) + + @start.setter + def start(self, value): + self._properties["start"] = _helpers._str_or_none(value) + + @property + def end(self): + """int: The end of range partitioning, exclusive.""" + return _helpers._int_or_none(self._properties.get("end")) + + @end.setter + def end(self, value): + self._properties["end"] = _helpers._str_or_none(value) + + @property + def interval(self): + """int: The width of each interval.""" + return _helpers._int_or_none(self._properties.get("interval")) + + @interval.setter + def interval(self, value): + self._properties["interval"] = _helpers._str_or_none(value) + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "PartitionRange({})".format(", ".join(key_vals)) + + +class RangePartitioning(object): + """Range-based partitioning configuration for a table. + + .. note:: + **Beta**. The integer range partitioning feature is in a pre-release + state and might change or have limited support. + + Args: + range_ (Optional[google.cloud.bigquery.table.PartitionRange]): + Sets the + :attr:`google.cloud.bigquery.table.RangePartitioning.range_` + property. + field (Optional[str]): + Sets the + :attr:`google.cloud.bigquery.table.RangePartitioning.field` + property. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__(self, range_=None, field=None, _properties=None): + if _properties is None: + _properties = {} + self._properties = _properties + + if range_ is not None: + self.range_ = range_ + if field is not None: + self.field = field + + # Trailing underscore to prevent conflict with built-in range() function. + @property + def range_(self): + """google.cloud.bigquery.table.PartitionRange: Defines the + ranges for range partitioning. + + Raises: + ValueError: + If the value is not a :class:`PartitionRange`. + """ + range_properties = self._properties.setdefault("range", {}) + return PartitionRange(_properties=range_properties) + + @range_.setter + def range_(self, value): + if not isinstance(value, PartitionRange): + raise ValueError("Expected a PartitionRange, but got {}.".format(value)) + self._properties["range"] = value._properties + + @property + def field(self): + """str: The table is partitioned by this field. + + The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The + only supported type is ``INTEGER`` / ``INT64``. + """ + return self._properties.get("field") + + @field.setter + def field(self, value): + self._properties["field"] = value + + def _key(self): + return (("field", self.field), ("range_", self.range_)) + + def __repr__(self): + key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] + return "RangePartitioning({})".format(", ".join(key_vals)) + + class TimePartitioningType(object): """Specifies the type of time partitioning to perform.""" diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 16964722ec2e..5f3d3ee965b8 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1638,6 +1638,44 @@ def test_source_format_setter(self): config.source_format = source_format self.assertEqual(config._properties["load"]["sourceFormat"], source_format) + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["load"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_time_partitioning_miss(self): config = self._get_target_class()() self.assertIsNone(config.time_partitioning) @@ -1892,6 +1930,7 @@ def test_ctor(self): self.assertIsNone(job.destination_encryption_configuration) self.assertIsNone(job.destination_table_description) self.assertIsNone(job.destination_table_friendly_name) + self.assertIsNone(job.range_partitioning) self.assertIsNone(job.time_partitioning) self.assertIsNone(job.use_avro_logical_types) self.assertIsNone(job.clustering_fields) @@ -3328,6 +3367,44 @@ def test_destinaton_w_string(self): expected = table.TableReference.from_string(destination) self.assertEqual(config.destination, expected) + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["query"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_time_partitioning(self): from google.cloud.bigquery import table @@ -3628,6 +3705,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.maximum_bytes_billed) self.assertIsNone(job.table_definitions) self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.range_partitioning) self.assertIsNone(job.time_partitioning) self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index dc2162d35fc9..b04a4491e6ca 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -856,6 +856,29 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(["bad"]) + def test_range_partitioning(self): + from google.cloud.bigquery.table import RangePartitioning + from google.cloud.bigquery.table import PartitionRange + + table = self._make_one("proj.dset.tbl") + assert table.range_partitioning is None + + table.range_partitioning = RangePartitioning( + field="col1", range_=PartitionRange(start=-512, end=1024, interval=128) + ) + assert table.range_partitioning.field == "col1" + assert table.range_partitioning.range_.start == -512 + assert table.range_partitioning.range_.end == 1024 + assert table.range_partitioning.range_.interval == 128 + + table.range_partitioning = None + assert table.range_partitioning is None + + def test_range_partitioning_w_wrong_type(self): + object_under_test = self._make_one("proj.dset.tbl") + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + def test_require_partitioning_filter(self): table = self._make_one("proj.dset.tbl") assert table.require_partition_filter is None @@ -2777,6 +2800,96 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) +class TestPartitionRange(unittest.TestCase): + def _get_target_class(self): + from google.cloud.bigquery.table import PartitionRange + + return PartitionRange + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor_defaults(self): + object_under_test = self._make_one() + assert object_under_test.start is None + assert object_under_test.end is None + assert object_under_test.interval is None + + def test_constructor_w_properties(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + assert object_under_test.start == 1 + assert object_under_test.end == 10 + assert object_under_test.interval == 2 + + def test_constructor_w_resource(self): + object_under_test = self._make_one( + _properties={"start": -1234567890, "end": 1234567890, "interval": 1000000} + ) + assert object_under_test.start == -1234567890 + assert object_under_test.end == 1234567890 + assert object_under_test.interval == 1000000 + + def test_repr(self): + object_under_test = self._make_one(start=1, end=10, interval=2) + assert repr(object_under_test) == "PartitionRange(end=10, interval=2, start=1)" + + +class TestRangePartitioning(unittest.TestCase): + def _get_target_class(self): + from google.cloud.bigquery.table import RangePartitioning + + return RangePartitioning + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_constructor_defaults(self): + object_under_test = self._make_one() + assert object_under_test.field is None + assert object_under_test.range_.start is None + assert object_under_test.range_.end is None + assert object_under_test.range_.interval is None + + def test_constructor_w_properties(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + assert object_under_test.field == "integer_col" + assert object_under_test.range_.start == 1 + assert object_under_test.range_.end == 10 + assert object_under_test.range_.interval == 2 + + def test_constructor_w_resource(self): + object_under_test = self._make_one( + _properties={ + "field": "some_column", + "range": {"start": -1234567890, "end": 1234567890, "interval": 1000000}, + } + ) + assert object_under_test.field == "some_column" + assert object_under_test.range_.start == -1234567890 + assert object_under_test.range_.end == 1234567890 + assert object_under_test.range_.interval == 1000000 + + def test_range_w_wrong_type(self): + object_under_test = self._make_one() + with pytest.raises(ValueError, match="PartitionRange"): + object_under_test.range_ = object() + + def test_repr(self): + from google.cloud.bigquery.table import PartitionRange + + object_under_test = self._make_one( + range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" + ) + assert ( + repr(object_under_test) + == "RangePartitioning(field='integer_col', range_=PartitionRange(end=10, interval=2, start=1))" + ) + + class TestTimePartitioning(unittest.TestCase): def _get_target_class(self): from google.cloud.bigquery.table import TimePartitioning