From a7699aa8f7e0bd95ade42f7a48cfeff8989f5764 Mon Sep 17 00:00:00 2001 From: Anmol Sahoo Date: Fri, 18 Feb 2022 02:32:46 -0500 Subject: [PATCH 1/4] chore: Add support for accessing BI Engine statistics The REST API returns BiEngineStatistics for a query which denotes if the query was accelerated by BI Engine or not. This commit adds the necessary function to access this information for executed queries. --- google/cloud/bigquery/enums.py | 40 +++++++++++++++++++++ google/cloud/bigquery/job/query.py | 54 ++++++++++++++++++++++++++++ tests/unit/job/test_query_stats.py | 57 ++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 7fc0a5fd6..d8ca4bf6c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -28,6 +28,46 @@ class AutoRowIDs(enum.Enum): GENERATE_UUID = enum.auto() +class BiEngineReasonCode(enum.Enum): + """Specifies reason why BI Engine did not accelerate query""" + + CODE_UNSPECIFIED = enum.auto() + """BiEngineReason not specified.""" + + NO_RESERVATION = enum.auto() + """No reservation available for BI Engine acceleration.""" + + INSUFFICIENT_RESERVATION = enum.auto() + """Not enough memory available for BI Engine acceleration.""" + + UNCACHED = enum.auto() + """Data is not-cached and could not be accelerated by BI Engine.""" + + UNSUPPORTED_SQL_TEXT = enum.auto() + """This particular SQL text is not supported for acceleration by BI Engine.""" + + INPUT_TOO_LARGE = enum.auto() + """Input too large for acceleration by BI Engine.""" + + OTHER_REASON = enum.auto() + """Catch-all code for all other cases for partial or disabled acceleration.""" + + TABLE_EXCLUDED = enum.auto() + """One or more tables were not eligible for BI Engine acceleration.""" + + +class BiEngineMode(enum.Enum): + """Specifies which mode of BI Engine acceleration was performed""" + + ACCELERATION_MODE_UNSPECIFIED = enum.auto() + + DISABLED = enum.auto() + + PARTIAL = enum.auto() + + FULL = enum.auto() + + class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2dd945984..32ca99b68 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -29,6 +29,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import KeyResultStatementKind +from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import ( @@ -121,6 +122,50 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class BiEngineReason(typing.NamedTuple): + """Reason for BI Engine acceleration failure + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason + """ + + code: BiEngineReasonCode = BiEngineReasonCode.CODE_UNSPECIFIED + + reason: str = "" + + @classmethod + def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason": + return cls(BiEngineReasonCode[reason.get("code")], reason.get("message")) + + +class BiEngineStats(typing.NamedTuple): + """Statistics for a BI Engine query + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics + """ + + mode: BiEngineMode = BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + """ Specifies which mode of BI Engine acceleration was performed (if any) + """ + + reasons: List[BiEngineReason] = [] + """ Contains explanatory messages in case of DISABLED / PARTIAL acceleration + """ + + @classmethod + def from_api_repr(cls, stats: Dict[str, str]) -> "BiEngineStats": + biEngineMode = stats.get("biEngineMode") + biEngineReasons = stats.get("biEngineReasons") + + mode = BiEngineMode[biEngineMode] + + if biEngineReasons is None: + reasons = [] + else: + reasons = [BiEngineReason.from_api_repr(r) for r in biEngineReasons] + + return cls(mode, reasons) + + class DmlStats(typing.NamedTuple): """Detailed statistics for DML statements. @@ -1191,6 +1236,15 @@ def dml_stats(self) -> Optional[DmlStats]: else: return DmlStats.from_api_repr(stats) + @property + def bi_engine_stats(self) -> Optional[BiEngineStats]: + stats = self._job_statistics().get("biEngineStatistics") + + if stats is not None: + return None + else: + return BiEngineStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index e70eb097c..d618bc3d4 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -13,6 +13,63 @@ # limitations under the License. from .helpers import _Base +from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode + + +class TestBiEngineStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job.query import BiEngineStats + + return BiEngineStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + bi_engine_stats = self._make_one() + assert bi_engine_stats.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + assert bi_engine_stats.reasons == [] + + def test_from_api_repr_unspecified(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"}) + + assert isinstance(result, klass) + assert result.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + assert result.reasons == [] + + def test_from_api_repr_full(self): + klass = self._get_target_class() + result = klass.from_api_repr({"biEngineMode": "FULL"}) + + assert isinstance(result, klass) + assert result.mode == BiEngineMode.FULL + assert result.reasons == [] + + def test_from_api_repr_disabled(self): + klass = self._get_target_class() + result = klass.from_api_repr( + { + "biEngineMode": "DISABLED", + "biEngineReasons": [ + { + "code": "OTHER_REASON", + "message": "Unable to support input table xyz due to an internal error.", + } + ], + } + ) + + assert isinstance(result, klass) + assert result.mode == BiEngineMode.DISABLED + + reason = result.reasons[0] + assert reason.code == BiEngineReasonCode.OTHER_REASON + assert ( + reason.reason + == "Unable to support input table xyz due to an internal error." + ) class TestDmlStats: From 5264e92ccb0d07b9f88bb804760002e6a5ba5dd2 Mon Sep 17 00:00:00 2001 From: Anmol Sahoo Date: Fri, 18 Feb 2022 10:21:43 -0500 Subject: [PATCH 2/4] fix: Removed enums and replaced with string constants --- google/cloud/bigquery/enums.py | 40 ------------------------------ google/cloud/bigquery/job/query.py | 17 ++++++------- tests/unit/job/test_query_stats.py | 11 ++++---- 3 files changed, 12 insertions(+), 56 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8ca4bf6c..7fc0a5fd6 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -28,46 +28,6 @@ class AutoRowIDs(enum.Enum): GENERATE_UUID = enum.auto() -class BiEngineReasonCode(enum.Enum): - """Specifies reason why BI Engine did not accelerate query""" - - CODE_UNSPECIFIED = enum.auto() - """BiEngineReason not specified.""" - - NO_RESERVATION = enum.auto() - """No reservation available for BI Engine acceleration.""" - - INSUFFICIENT_RESERVATION = enum.auto() - """Not enough memory available for BI Engine acceleration.""" - - UNCACHED = enum.auto() - """Data is not-cached and could not be accelerated by BI Engine.""" - - UNSUPPORTED_SQL_TEXT = enum.auto() - """This particular SQL text is not supported for acceleration by BI Engine.""" - - INPUT_TOO_LARGE = enum.auto() - """Input too large for acceleration by BI Engine.""" - - OTHER_REASON = enum.auto() - """Catch-all code for all other cases for partial or disabled acceleration.""" - - TABLE_EXCLUDED = enum.auto() - """One or more tables were not eligible for BI Engine acceleration.""" - - -class BiEngineMode(enum.Enum): - """Specifies which mode of BI Engine acceleration was performed""" - - ACCELERATION_MODE_UNSPECIFIED = enum.auto() - - DISABLED = enum.auto() - - PARTIAL = enum.auto() - - FULL = enum.auto() - - class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 32ca99b68..06fb50364 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -29,7 +29,6 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import KeyResultStatementKind -from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import ( @@ -128,13 +127,13 @@ class BiEngineReason(typing.NamedTuple): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason """ - code: BiEngineReasonCode = BiEngineReasonCode.CODE_UNSPECIFIED + code: str = "CODE_UNSPECIFIED" reason: str = "" @classmethod def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason": - return cls(BiEngineReasonCode[reason.get("code")], reason.get("message")) + return cls(reason.get("code"), reason.get("message")) class BiEngineStats(typing.NamedTuple): @@ -143,7 +142,7 @@ class BiEngineStats(typing.NamedTuple): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics """ - mode: BiEngineMode = BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + mode: str = "ACCELERATION_MODE_UNSPECIFIED" """ Specifies which mode of BI Engine acceleration was performed (if any) """ @@ -153,15 +152,13 @@ class BiEngineStats(typing.NamedTuple): @classmethod def from_api_repr(cls, stats: Dict[str, str]) -> "BiEngineStats": - biEngineMode = stats.get("biEngineMode") - biEngineReasons = stats.get("biEngineReasons") + mode = stats.get("biEngineMode") + reasons = stats.get("biEngineReasons") - mode = BiEngineMode[biEngineMode] - - if biEngineReasons is None: + if reasons is None: reasons = [] else: - reasons = [BiEngineReason.from_api_repr(r) for r in biEngineReasons] + reasons = [BiEngineReason.from_api_repr(r) for r in reasons] return cls(mode, reasons) diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index d618bc3d4..13e022ced 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -13,7 +13,6 @@ # limitations under the License. from .helpers import _Base -from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode class TestBiEngineStats: @@ -28,7 +27,7 @@ def _make_one(self, *args, **kw): def test_ctor_defaults(self): bi_engine_stats = self._make_one() - assert bi_engine_stats.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + assert bi_engine_stats.mode == "ACCELERATION_MODE_UNSPECIFIED" assert bi_engine_stats.reasons == [] def test_from_api_repr_unspecified(self): @@ -36,7 +35,7 @@ def test_from_api_repr_unspecified(self): result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"}) assert isinstance(result, klass) - assert result.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED + assert result.mode == "ACCELERATION_MODE_UNSPECIFIED" assert result.reasons == [] def test_from_api_repr_full(self): @@ -44,7 +43,7 @@ def test_from_api_repr_full(self): result = klass.from_api_repr({"biEngineMode": "FULL"}) assert isinstance(result, klass) - assert result.mode == BiEngineMode.FULL + assert result.mode == "FULL" assert result.reasons == [] def test_from_api_repr_disabled(self): @@ -62,10 +61,10 @@ def test_from_api_repr_disabled(self): ) assert isinstance(result, klass) - assert result.mode == BiEngineMode.DISABLED + assert result.mode == "DISABLED" reason = result.reasons[0] - assert reason.code == BiEngineReasonCode.OTHER_REASON + assert reason.code == "OTHER_REASON" assert ( reason.reason == "Unable to support input table xyz due to an internal error." From 7b3e18f5d41dc3b0960dde7b2b8cc5ebb1821a99 Mon Sep 17 00:00:00 2001 From: Anmol Sahoo Date: Fri, 18 Feb 2022 12:16:27 -0500 Subject: [PATCH 3/4] fix: Fixed logic for creating BIEngineStats and added test case --- google/cloud/bigquery/job/query.py | 12 ++++-------- tests/unit/job/test_query.py | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 06fb50364..116b807a3 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -153,13 +153,9 @@ class BiEngineStats(typing.NamedTuple): @classmethod def from_api_repr(cls, stats: Dict[str, str]) -> "BiEngineStats": mode = stats.get("biEngineMode") - reasons = stats.get("biEngineReasons") - - if reasons is None: - reasons = [] - else: - reasons = [BiEngineReason.from_api_repr(r) for r in reasons] - + reasons = [ + BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", []) + ] return cls(mode, reasons) @@ -1237,7 +1233,7 @@ def dml_stats(self) -> Optional[DmlStats]: def bi_engine_stats(self) -> Optional[BiEngineStats]: stats = self._job_statistics().get("biEngineStatistics") - if stats is not None: + if stats is None: return None else: return BiEngineStats.from_api_repr(stats) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 5fb76b9e9..33a52cfec 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -877,6 +877,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_bi_engine_stats(self): + from google.cloud.bigquery.job.query import BiEngineStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.bi_engine_stats is None + + statistics = job._properties["statistics"] = {} + assert job.bi_engine_stats is None + + query_stats = statistics["query"] = {} + assert job.bi_engine_stats is None + + query_stats["biEngineStatistics"] = {"biEngineMode": "FULL"} + assert isinstance(job.bi_engine_stats, BiEngineStats) + assert job.bi_engine_stats.mode == "FULL" + def test_dml_stats(self): from google.cloud.bigquery.job.query import DmlStats From f122e083158c7ff3b14f68f188f8fe82e7f78f89 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 18 Feb 2022 12:21:56 -0600 Subject: [PATCH 4/4] Attempt at mypy fix --- google/cloud/bigquery/job/query.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 116b807a3..2fd7afb76 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -133,7 +133,7 @@ class BiEngineReason(typing.NamedTuple): @classmethod def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason": - return cls(reason.get("code"), reason.get("message")) + return cls(reason.get("code", "CODE_UNSPECIFIED"), reason.get("message", "")) class BiEngineStats(typing.NamedTuple): @@ -151,8 +151,8 @@ class BiEngineStats(typing.NamedTuple): """ @classmethod - def from_api_repr(cls, stats: Dict[str, str]) -> "BiEngineStats": - mode = stats.get("biEngineMode") + def from_api_repr(cls, stats: Dict[str, Any]) -> "BiEngineStats": + mode = stats.get("biEngineMode", "ACCELERATION_MODE_UNSPECIFIED") reasons = [ BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", []) ]