Skip to content

Commit d7a9a80

Browse files
blainehansentswast
authored andcommitted
Add default QueryJobConfig to Client (#6088)
* master * working implementation of default QueryJobConfigs attached to Client * removing comments and help texts * fixing lints * bringing coverage up to 100% * making revisions * missed some changes * making code tweaks * Make _JobConfig._fill_from_default semi-private. Also, update the docstrings to Google/Napoleon-style.
1 parent 301716d commit d7a9a80

File tree

4 files changed

+250
-3
lines changed

4 files changed

+250
-3
lines changed

google/cloud/bigquery/client.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,11 @@ class Client(ClientWithProject):
108108
current object.
109109
This parameter should be considered private, and could change in
110110
the future.
111-
location str:
111+
location (str):
112112
(Optional) Default location for jobs / datasets / tables.
113+
default_query_job_config (google.cloud.bigquery.job.QueryJobConfig):
114+
(Optional) Default ``QueryJobConfig``.
115+
Will be merged into job configs passed into the ``query`` method.
113116
114117
Raises:
115118
google.auth.exceptions.DefaultCredentialsError:
@@ -122,11 +125,13 @@ class Client(ClientWithProject):
122125
"""The scopes required for authenticating as a BigQuery consumer."""
123126

124127
def __init__(
125-
self, project=None, credentials=None, _http=None, location=None):
128+
self, project=None, credentials=None, _http=None,
129+
location=None, default_query_job_config=None):
126130
super(Client, self).__init__(
127131
project=project, credentials=credentials, _http=_http)
128132
self._connection = Connection(self)
129133
self._location = location
134+
self._default_query_job_config = default_query_job_config
130135

131136
@property
132137
def location(self):
@@ -1187,7 +1192,9 @@ def extract_table(
11871192
return extract_job
11881193

11891194
def query(
1190-
self, query, job_config=None, job_id=None, job_id_prefix=None,
1195+
self, query,
1196+
job_config=None,
1197+
job_id=None, job_id_prefix=None,
11911198
location=None, project=None, retry=DEFAULT_RETRY):
11921199
"""Run a SQL query.
11931200
@@ -1202,6 +1209,10 @@ def query(
12021209
Keyword Arguments:
12031210
job_config (google.cloud.bigquery.job.QueryJobConfig):
12041211
(Optional) Extra configuration options for the job.
1212+
To override any options that were previously set in
1213+
the ``default_query_job_config`` given to the
1214+
``Client`` constructor, manually set those options to ``None``,
1215+
or whatever value is preferred.
12051216
job_id (str): (Optional) ID to use for the query job.
12061217
job_id_prefix (str):
12071218
(Optional) The prefix to use for a randomly generated job ID.
@@ -1226,6 +1237,17 @@ def query(
12261237
if location is None:
12271238
location = self.location
12281239

1240+
if self._default_query_job_config:
1241+
if job_config:
1242+
# anything that's not defined on the incoming
1243+
# that is in the default,
1244+
# should be filled in with the default
1245+
# the incoming therefore has precedence
1246+
job_config = job_config._fill_from_default(
1247+
self._default_query_job_config)
1248+
else:
1249+
job_config = self._default_query_job_config
1250+
12291251
job_ref = job._JobReference(job_id, project=project, location=location)
12301252
query_job = job.QueryJob(
12311253
job_ref, query, client=self, job_config=job_config)

google/cloud/bigquery/job.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,39 @@ def to_api_repr(self):
819819
"""
820820
return copy.deepcopy(self._properties)
821821

822+
def _fill_from_default(self, default_job_config):
823+
"""Merge this job config with a default job config.
824+
825+
The keys in this object take precedence over the keys in the default
826+
config. The merge is done at the top-level as well as for keys one
827+
level below the job type.
828+
829+
Arguments:
830+
default_job_config (google.cloud.bigquery.job._JobConfig):
831+
The default job config that will be used to fill in self.
832+
833+
Returns:
834+
google.cloud.bigquery.job._JobConfig A new (merged) job config.
835+
"""
836+
if self._job_type != default_job_config._job_type:
837+
raise TypeError(
838+
"attempted to merge two incompatible job types: "
839+
+ repr(self._job_type) + ', '
840+
+ repr(default_job_config._job_type))
841+
842+
new_job_config = self.__class__()
843+
844+
default_job_properties = copy.deepcopy(default_job_config._properties)
845+
for key in self._properties:
846+
if key != self._job_type:
847+
default_job_properties[key] = self._properties[key]
848+
849+
default_job_properties[self._job_type] \
850+
.update(self._properties[self._job_type])
851+
new_job_config._properties = default_job_properties
852+
853+
return new_job_config
854+
822855
@classmethod
823856
def from_api_repr(cls, resource):
824857
"""Factory: construct a job configuration given its API representation

tests/unit/test_client.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,27 @@ def test_ctor_w_location(self):
9696
self.assertIs(client._connection.http, http)
9797
self.assertEqual(client.location, location)
9898

99+
def test_ctor_w_query_job_config(self):
100+
from google.cloud.bigquery._http import Connection
101+
from google.cloud.bigquery import QueryJobConfig
102+
103+
creds = _make_credentials()
104+
http = object()
105+
location = 'us-central'
106+
job_config = QueryJobConfig()
107+
job_config.dry_run = True
108+
109+
client = self._make_one(project=self.PROJECT, credentials=creds,
110+
_http=http, location=location,
111+
default_query_job_config=job_config)
112+
self.assertIsInstance(client._connection, Connection)
113+
self.assertIs(client._connection.credentials, creds)
114+
self.assertIs(client._connection.http, http)
115+
self.assertEqual(client.location, location)
116+
117+
self.assertIsInstance(client._default_query_job_config, QueryJobConfig)
118+
self.assertTrue(client._default_query_job_config.dry_run)
119+
99120
def test__get_query_results_miss_w_explicit_project_and_timeout(self):
100121
from google.cloud.exceptions import NotFound
101122

@@ -2707,6 +2728,149 @@ def test_query_w_explicit_project(self):
27072728
data=resource,
27082729
)
27092730

2731+
def test_query_w_explicit_job_config(self):
2732+
job_id = 'some-job-id'
2733+
query = 'select count(*) from persons'
2734+
resource = {
2735+
'jobReference': {
2736+
'jobId': job_id,
2737+
'projectId': self.PROJECT,
2738+
'location': self.LOCATION,
2739+
},
2740+
'configuration': {
2741+
'query': {
2742+
'query': query,
2743+
'defaultDataset': {
2744+
'projectId': self.PROJECT,
2745+
'datasetId': 'some-dataset',
2746+
},
2747+
'useLegacySql': False,
2748+
'useQueryCache': True,
2749+
'maximumBytesBilled': '2000',
2750+
},
2751+
},
2752+
}
2753+
2754+
creds = _make_credentials()
2755+
http = object()
2756+
2757+
from google.cloud.bigquery import QueryJobConfig, DatasetReference
2758+
default_job_config = QueryJobConfig()
2759+
default_job_config.default_dataset = DatasetReference(
2760+
self.PROJECT, 'some-dataset')
2761+
default_job_config.maximum_bytes_billed = 1000
2762+
2763+
client = self._make_one(
2764+
project=self.PROJECT, credentials=creds,
2765+
_http=http, default_query_job_config=default_job_config)
2766+
conn = client._connection = _make_connection(resource)
2767+
2768+
job_config = QueryJobConfig()
2769+
job_config.use_query_cache = True
2770+
job_config.maximum_bytes_billed = 2000
2771+
2772+
client.query(
2773+
query, job_id=job_id, location=self.LOCATION,
2774+
job_config=job_config)
2775+
2776+
# Check that query actually starts the job.
2777+
conn.api_request.assert_called_once_with(
2778+
method='POST',
2779+
path='/projects/PROJECT/jobs',
2780+
data=resource,
2781+
)
2782+
2783+
def test_query_w_explicit_job_config_override(self):
2784+
job_id = 'some-job-id'
2785+
query = 'select count(*) from persons'
2786+
resource = {
2787+
'jobReference': {
2788+
'jobId': job_id,
2789+
'projectId': self.PROJECT,
2790+
'location': self.LOCATION,
2791+
},
2792+
'configuration': {
2793+
'query': {
2794+
'query': query,
2795+
'defaultDataset': None,
2796+
'useLegacySql': False,
2797+
'useQueryCache': True,
2798+
'maximumBytesBilled': '2000',
2799+
},
2800+
},
2801+
}
2802+
2803+
creds = _make_credentials()
2804+
http = object()
2805+
2806+
from google.cloud.bigquery import QueryJobConfig, DatasetReference
2807+
default_job_config = QueryJobConfig()
2808+
default_job_config.default_dataset = DatasetReference(
2809+
self.PROJECT, 'some-dataset')
2810+
default_job_config.maximum_bytes_billed = 1000
2811+
2812+
client = self._make_one(
2813+
project=self.PROJECT, credentials=creds, _http=http,
2814+
default_query_job_config=default_job_config)
2815+
conn = client._connection = _make_connection(resource)
2816+
2817+
job_config = QueryJobConfig()
2818+
job_config.use_query_cache = True
2819+
job_config.maximum_bytes_billed = 2000
2820+
job_config.default_dataset = None
2821+
2822+
client.query(
2823+
query, job_id=job_id, location=self.LOCATION,
2824+
job_config=job_config,
2825+
)
2826+
2827+
# Check that query actually starts the job.
2828+
conn.api_request.assert_called_once_with(
2829+
method='POST',
2830+
path='/projects/PROJECT/jobs',
2831+
data=resource,
2832+
)
2833+
2834+
def test_query_w_client_default_config_no_incoming(self):
2835+
job_id = 'some-job-id'
2836+
query = 'select count(*) from persons'
2837+
resource = {
2838+
'jobReference': {
2839+
'jobId': job_id,
2840+
'projectId': self.PROJECT,
2841+
'location': self.LOCATION,
2842+
},
2843+
'configuration': {
2844+
'query': {
2845+
'query': query,
2846+
'useLegacySql': False,
2847+
'maximumBytesBilled': '1000',
2848+
},
2849+
},
2850+
}
2851+
2852+
creds = _make_credentials()
2853+
http = object()
2854+
2855+
from google.cloud.bigquery import QueryJobConfig
2856+
default_job_config = QueryJobConfig()
2857+
default_job_config.maximum_bytes_billed = 1000
2858+
2859+
client = self._make_one(
2860+
project=self.PROJECT, credentials=creds, _http=http,
2861+
default_query_job_config=default_job_config)
2862+
conn = client._connection = _make_connection(resource)
2863+
2864+
client.query(
2865+
query, job_id=job_id, location=self.LOCATION)
2866+
2867+
# Check that query actually starts the job.
2868+
conn.api_request.assert_called_once_with(
2869+
method='POST',
2870+
path='/projects/PROJECT/jobs',
2871+
data=resource,
2872+
)
2873+
27102874
def test_query_w_client_location(self):
27112875
job_id = 'some-job-id'
27122876
query = 'select count(*) from persons'

tests/unit/test_job.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,34 @@ def test_ctor(self):
912912
self.assertEqual(job_config._job_type, self.JOB_TYPE)
913913
self.assertEqual(job_config._properties, {self.JOB_TYPE: {}})
914914

915+
def test_fill_from_default(self):
916+
from google.cloud.bigquery import QueryJobConfig
917+
918+
job_config = QueryJobConfig()
919+
job_config.dry_run = True
920+
job_config.maximum_bytes_billed = 1000
921+
922+
default_job_config = QueryJobConfig()
923+
default_job_config.use_query_cache = True
924+
default_job_config.maximum_bytes_billed = 2000
925+
926+
final_job_config = job_config._fill_from_default(default_job_config)
927+
self.assertTrue(final_job_config.dry_run)
928+
self.assertTrue(final_job_config.use_query_cache)
929+
self.assertEqual(final_job_config.maximum_bytes_billed, 1000)
930+
931+
def test_fill_from_default_conflict(self):
932+
from google.cloud.bigquery import QueryJobConfig
933+
934+
basic_job_config = QueryJobConfig()
935+
conflicting_job_config = self._make_one('conflicting_job_type')
936+
self.assertNotEqual(
937+
basic_job_config._job_type, conflicting_job_config._job_type)
938+
939+
with self.assertRaises(TypeError):
940+
basic_job_config._fill_from_default(
941+
conflicting_job_config)
942+
915943
@mock.patch('google.cloud.bigquery._helpers._get_sub_prop')
916944
def test__get_sub_prop_wo_default(self, _get_sub_prop):
917945
job_config = self._make_one()

0 commit comments

Comments
 (0)