-
Notifications
You must be signed in to change notification settings - Fork 322
Description
This test failed!
To configure my behavior, see the Build Cop Bot documentation.
If I'm commenting on this issue too often, add the buildcop: quiet label and
I will stop commenting.
commit: 114489e
buildURL: Build Status, Sponge
status: failed
Test output
line = '' query = 'SELECT\n source_year AS year,\n COUNT(is_male) AS birth_count\nFROM `bigquery-public-data.samples.natality`\nGROUP BY year\nORDER BY year DESC\nLIMIT 15'@magic_arguments.magic_arguments() @magic_arguments.argument( "destination_var", nargs="?", help=("If provided, save the output to this variable instead of displaying it."), ) @magic_arguments.argument( "--destination_table", type=str, default=None, help=( "If provided, save the output of the query to a new BigQuery table. " "Variable should be in a format <dataset_id>.<table_id>. " "If table does not exists, it will be created. " "If table already exists, its data will be overwritten." ), ) @magic_arguments.argument( "--project", type=str, default=None, help=("Project to use for executing this query. Defaults to the context project."), ) @magic_arguments.argument( "--max_results", default=None, help=( "Maximum number of rows in dataframe returned from executing the query." "Defaults to returning all rows." ), ) @magic_arguments.argument( "--maximum_bytes_billed", default=None, help=( "maximum_bytes_billed to use for executing this query. Defaults to " "the context default_query_job_config.maximum_bytes_billed." ), ) @magic_arguments.argument( "--dry_run", action="store_true", default=False, help=( "Sets query to be a dry run to estimate costs. " "Defaults to executing the query instead of dry run if this argument is not used." ), ) @magic_arguments.argument( "--use_legacy_sql", action="store_true", default=False, help=( "Sets query to use Legacy SQL instead of Standard SQL. Defaults to " "Standard SQL if this argument is not used." ), ) @magic_arguments.argument( "--use_bqstorage_api", action="store_true", default=None, help=( "[Deprecated] The BigQuery Storage API is already used by default to " "download large query results, and this option has no effect. " "If you want to switch to the classic REST API instead, use the " "--use_rest_api option." ), ) @magic_arguments.argument( "--use_rest_api", action="store_true", default=False, help=( "Use the classic REST API instead of the BigQuery Storage API to " "download query results." ), ) @magic_arguments.argument( "--verbose", action="store_true", default=False, help=( "If set, print verbose output, including the query job ID and the " "amount of time for the query to finish. By default, this " "information will be displayed as the query runs, but will be " "cleared after the query is finished." ), ) @magic_arguments.argument( "--params", nargs="+", default=None, help=( "Parameters to format the query string. If present, the --params " "flag should be followed by a string representation of a dictionary " "in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), " "or a reference to a dictionary in the same format. The dictionary " "reference can be made by including a '$' before the variable " "name (ex. $my_dict_var)." ), ) def _cell_magic(line, query): """Underlying function for bigquery cell magic Note: This function contains the underlying logic for the 'bigquery' cell magic. This function is not meant to be called directly. Args: line (str): "%%bigquery" followed by arguments as required query (str): SQL query to run Returns: pandas.DataFrame: the query results. """ # The built-in parser does not recognize Python structures such as dicts, thus # we extract the "--params" option and inteprpret it separately. try: params_option_value, rest_of_args = _split_args_line(line) except lap.exceptions.QueryParamsParseError as exc: rebranded_error = SyntaxError( "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary" ) six.raise_from(rebranded_error, exc) except lap.exceptions.DuplicateQueryParamsError as exc: rebranded_error = ValueError("Duplicate --params option.") six.raise_from(rebranded_error, exc) except lap.exceptions.ParseError as exc: rebranded_error = ValueError( "Unrecognized input, are option values correct? " "Error details: {}".format(exc.args[0]) ) six.raise_from(rebranded_error, exc) args = magic_arguments.parse_argstring(_cell_magic, rest_of_args) if args.use_bqstorage_api is not None: warnings.warn( "Deprecated option --use_bqstorage_api, the BigQuery " "Storage API is already used by default.", category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api params = [] if params_option_value: # A non-existing params variable is not expanded and ends up in the input # in its raw form, e.g. "$query_params". if params_option_value.startswith("$"): msg = 'Parameter expansion failed, undefined variable "{}".'.format( params_option_value[1:] ) raise NameError(msg) params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) project = args.project or context.project client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) if context._connection: client._connection = context._connection bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials) close_transports = functools.partial(_close_transports, client, bqstorage_client) try: if args.max_results: max_results = int(args.max_results) else: max_results = None query = query.strip() if not query: error = ValueError("Query is missing.") _handle_error(error, args.destination_var) return # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): try: rows = client.list_rows(query, max_results=max_results) except Exception as ex: _handle_error(ex, args.destination_var) return result = rows.to_dataframe(bqstorage_client=bqstorage_client) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return else: return result job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql job_config.dry_run = args.dry_run if args.destination_table: split = args.destination_table.split(".") if len(split) != 2: raise ValueError( "--destination_table should be in a <dataset_id>.<table_id> format." ) dataset_id, table_id = split job_config.allow_large_results = True dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id) destination_table_ref = dataset_ref.table(table_id) job_config.destination = destination_table_ref job_config.create_disposition = "CREATE_IF_NEEDED" job_config.write_disposition = "WRITE_TRUNCATE" _create_dataset_if_necessary(client, dataset_id) if args.maximum_bytes_billed == "None": job_config.maximum_bytes_billed = 0 elif args.maximum_bytes_billed is not None: value = int(args.maximum_bytes_billed) job_config.maximum_bytes_billed = value try: query_job = _run_query(client, query, job_config=job_config) except Exception as ex: _handle_error(ex, args.destination_var) return if not args.verbose: display.clear_output() if args.dry_run and args.destination_var: IPython.get_ipython().push({args.destination_var: query_job}) return elif args.dry_run: print( "Query validated. This query will process {} bytes.".format( query_job.total_bytes_processed ) ) return query_job if max_results: result = query_job.result(max_results=max_results).to_dataframe( bqstorage_client=bqstorage_client ) else:result = query_job.to_dataframe(bqstorage_client=bqstorage_client)../../google/cloud/bigquery/magics/magics.py:606:
self = <google.cloud.bigquery.job.QueryJob object at 0x7f03f021bdd8>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
dtypes = None, progress_bar_type = None, create_bqstorage_client = True
date_as_object = Truedef to_dataframe( self, bqstorage_client=None, dtypes=None, progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, ): """Return a pandas DataFrame from a QueryJob Args: bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. This method requires the ``fastavro`` and ``google-cloud-bigquery-storage`` libraries. Reading from a specific partition or snapshot is not currently supported by this method. dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. progress_bar_type (Optional[str]): If set, use the `tqdm <https://tqdm.github.io/>`_ library to display a progress bar while the data downloads. Install the ``tqdm`` package to use this feature. See :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` for details. ..versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. ..versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. ..versionadded:: 1.26.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived from the destination table's schema. Raises: ValueError: If the `pandas` library cannot be imported. """ return self.result().to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client,date_as_object=date_as_object,)../../google/cloud/bigquery/job.py:3406:
self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
dtypes = {}, progress_bar_type = None, create_bqstorage_client = True
date_as_object = Truedef to_dataframe( self, bqstorage_client=None, dtypes=None, progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, ): """Create a pandas DataFrame by loading all pages of a query. Args: bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): A dictionary of column names pandas ``dtype``s. The provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. progress_bar_type (Optional[str]): If set, use the `tqdm <https://tqdm.github.io/>`_ library to display a progress bar while the data downloads. Install the ``tqdm`` package to use this feature. Possible values of ``progress_bar_type`` include: ``None`` No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar to :data:`sys.stderr`. ``'tqdm_notebook'`` Use the :func:`tqdm.tqdm_notebook` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. ..versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. ..versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. ..versionadded:: 1.26.0 Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived from the destination table's schema. Raises: ValueError: If the :mod:`pandas` library cannot be imported, or the :mod:`google.cloud.bigquery_storage_v1` module is required but cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) if dtypes is None: dtypes = {} if ( bqstorage_client or create_bqstorage_client ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the tabledata.list endpoint.", stacklevel=2, ) create_bqstorage_client = False bqstorage_client = None if pyarrow is not None: # If pyarrow is available, calling to_arrow, then converting to a # pandas dataframe is about 2x faster. This is because pandas.concat is # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is # usually no-copy. record_batch = self.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client,create_bqstorage_client=create_bqstorage_client,)../../google/cloud/bigquery/table.py:1690:
self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
progress_bar_type = None
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
create_bqstorage_client = Truedef to_arrow( self, progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, ): """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. Args: progress_bar_type (Optional[str]): If set, use the `tqdm <https://tqdm.github.io/>`_ library to display a progress bar while the data downloads. Install the ``tqdm`` package to use this feature. Possible values of ``progress_bar_type`` include: ``None`` No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar to :data:`sys.stderr`. ``'tqdm_notebook'`` Use the :func:`tqdm.tqdm_notebook` function to display a progress bar as a Jupyter notebook widget. ``'tqdm_gui'`` Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. This method requires the ``pyarrow`` and ``google-cloud-bigquery-storage`` libraries. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features (projections, filters, snapshots) use the Storage API directly. create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API is a faster way to fetch rows from BigQuery. See the ``bqstorage_client`` parameter for more information. This argument does nothing if ``bqstorage_client`` is supplied. ..versionadded:: 1.24.0 Returns: pyarrow.Table A :class:`pyarrow.Table` populated with row data and column headers from the query results. The column headers are derived from the destination table's schema. Raises: ValueError: If the :mod:`pyarrow` library cannot be imported. ..versionadded:: 1.17.0 """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) if ( bqstorage_client or create_bqstorage_client ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the tabledata.list endpoint.", stacklevel=2, ) create_bqstorage_client = False bqstorage_client = None owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: bqstorage_client = self.client._create_bqstorage_client() owns_bqstorage_client = bqstorage_client is not None try: progress_bar = self._get_progress_bar(progress_bar_type) record_batches = [] for record_batch in self._to_arrow_iterable(bqstorage_client=bqstorage_client):../../google/cloud/bigquery/table.py:1508:
self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
bqstorage_download = functools.partial(<function download_arrow_bqstorage at 0x7f03f2e5fbf8>, 'python-docs-samples-tests', Table(TableRefer...ct at 0x7f03f02a2a58>, preserve_order=<_sre.SRE_Match object; span=(125, 133), match='ORDER BY'>, selected_fields=None)
tabledata_list_download = functools.partial(<function download_arrow_tabledata_list at 0x7f03f2e5f7b8>, <generator object Iterator._page_iter at...eld('year', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('birth_count', 'INTEGER', 'NULLABLE', None, (), None)])
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): if bqstorage_client is not None:for item in bqstorage_download():../../google/cloud/bigquery/table.py:1397:
project_id = 'python-docs-samples-tests'
table = Table(TableReference(DatasetReference('python-docs-samples-tests', '_d5c40c0e092f2208d182028dd13910fd3f0ceb53'), 'anon6023c82685ab155c9f7fb4144a0a67f172df90a6'))
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
preserve_order = <_sre.SRE_Match object; span=(125, 133), match='ORDER BY'>
selected_fields = None
page_to_item = <function _bqstorage_page_to_arrow at 0x7f03f2e5f950>def _download_table_bqstorage( project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, page_to_item=None, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" # Passing a BQ Storage client in implies that the BigQuery Storage library # is available and can be imported. from google.cloud import bigquery_storage if "$" in table.table_id: raise ValueError( "Reading from a specific partition is not currently supported." ) if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") requested_streams = 1 if preserve_order else 0 requested_session = bigquery_storage.types.ReadSession(table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW)E AttributeError: module 'google.cloud.bigquery_storage_v1.types' has no attribute 'DataFormat'
../../google/cloud/bigquery/_pandas_helpers.py:615: AttributeError
During handling of the above exception, another exception occurred:
ipython = <IPython.terminal.interactiveshell.TerminalInteractiveShell object at 0x7f03f09c7160>
def test_jupyter_tutorial(ipython): matplotlib.use("agg") ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") sample = """ # [START bigquery_jupyter_magic_gender_by_year] %%bigquery SELECT source_year AS year, COUNT(is_male) AS birth_count FROM `bigquery-public-data.samples.natality` GROUP BY year ORDER BY year DESC LIMIT 15 # [END bigquery_jupyter_magic_gender_by_year] """ result = ip.run_cell(_strip_region_tags(sample))result.raise_error() # Throws an exception if the cell failed.jupyter_tutorial_test.py:69:
.nox/py-3-6/lib/python3.6/site-packages/IPython/core/interactiveshell.py:331: in raise_error
raise self.error_in_exec
:1: in
get_ipython().run_cell_magic('bigquery', '', 'SELECT\n source_year AS year,\n COUNT(is_male) AS birth_count\nFROMbigquery-public-data.samples.natality\nGROUP BY year\nORDER BY year DESC\nLIMIT 15\n\n')
.nox/py-3-6/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2371: in run_cell_magic
result = fn(*args, **kwargs)
../../google/cloud/bigquery/magics/magics.py:613: in _cell_magic
close_transports()
client = <google.cloud.bigquery.client.Client object at 0x7f03f02a2908>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>def _close_transports(client, bqstorage_client): """Close the given clients' underlying transport channels. Closing the transport is needed to release system resources, namely open sockets. Args: client (:class:`~google.cloud.bigquery.client.Client`): bqstorage_client (Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]): A client for the BigQuery Storage API. """ client.close() if bqstorage_client is not None:bqstorage_client._transport.grpc_channel.close()E AttributeError: 'BigQueryReadClient' object has no attribute '_transport'
../../google/cloud/bigquery/magics/magics.py:679: AttributeError