Skip to content

samples.snippets.jupyter_tutorial_test: test_jupyter_tutorial failed #288

@flaky-bot

Description

@flaky-bot

This test failed!

To configure my behavior, see the Build Cop Bot documentation.

If I'm commenting on this issue too often, add the buildcop: quiet label and
I will stop commenting.


commit: 114489e
buildURL: Build Status, Sponge
status: failed

Test output
line = ''
query = 'SELECT\n    source_year AS year,\n    COUNT(is_male) AS birth_count\nFROM `bigquery-public-data.samples.natality`\nGROUP BY year\nORDER BY year DESC\nLIMIT 15'
@magic_arguments.magic_arguments()
@magic_arguments.argument(
    "destination_var",
    nargs="?",
    help=("If provided, save the output to this variable instead of displaying it."),
)
@magic_arguments.argument(
    "--destination_table",
    type=str,
    default=None,
    help=(
        "If provided, save the output of the query to a new BigQuery table. "
        "Variable should be in a format <dataset_id>.<table_id>. "
        "If table does not exists, it will be created. "
        "If table already exists, its data will be overwritten."
    ),
)
@magic_arguments.argument(
    "--project",
    type=str,
    default=None,
    help=("Project to use for executing this query. Defaults to the context project."),
)
@magic_arguments.argument(
    "--max_results",
    default=None,
    help=(
        "Maximum number of rows in dataframe returned from executing the query."
        "Defaults to returning all rows."
    ),
)
@magic_arguments.argument(
    "--maximum_bytes_billed",
    default=None,
    help=(
        "maximum_bytes_billed to use for executing this query. Defaults to "
        "the context default_query_job_config.maximum_bytes_billed."
    ),
)
@magic_arguments.argument(
    "--dry_run",
    action="store_true",
    default=False,
    help=(
        "Sets query to be a dry run to estimate costs. "
        "Defaults to executing the query instead of dry run if this argument is not used."
    ),
)
@magic_arguments.argument(
    "--use_legacy_sql",
    action="store_true",
    default=False,
    help=(
        "Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
        "Standard SQL if this argument is not used."
    ),
)
@magic_arguments.argument(
    "--use_bqstorage_api",
    action="store_true",
    default=None,
    help=(
        "[Deprecated] The BigQuery Storage API is already used by default to "
        "download large query results, and this option has no effect. "
        "If you want to switch to the classic REST API instead, use the "
        "--use_rest_api option."
    ),
)
@magic_arguments.argument(
    "--use_rest_api",
    action="store_true",
    default=False,
    help=(
        "Use the classic REST API instead of the BigQuery Storage API to "
        "download query results."
    ),
)
@magic_arguments.argument(
    "--verbose",
    action="store_true",
    default=False,
    help=(
        "If set, print verbose output, including the query job ID and the "
        "amount of time for the query to finish. By default, this "
        "information will be displayed as the query runs, but will be "
        "cleared after the query is finished."
    ),
)
@magic_arguments.argument(
    "--params",
    nargs="+",
    default=None,
    help=(
        "Parameters to format the query string. If present, the --params "
        "flag should be followed by a string representation of a dictionary "
        "in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), "
        "or a reference to a dictionary in the same format. The dictionary "
        "reference can be made by including a '$' before the variable "
        "name (ex. $my_dict_var)."
    ),
)
def _cell_magic(line, query):
    """Underlying function for bigquery cell magic

    Note:
        This function contains the underlying logic for the 'bigquery' cell
        magic. This function is not meant to be called directly.

    Args:
        line (str): "%%bigquery" followed by arguments as required
        query (str): SQL query to run

    Returns:
        pandas.DataFrame: the query results.
    """
    # The built-in parser does not recognize Python structures such as dicts, thus
    # we extract the "--params" option and inteprpret it separately.
    try:
        params_option_value, rest_of_args = _split_args_line(line)
    except lap.exceptions.QueryParamsParseError as exc:
        rebranded_error = SyntaxError(
            "--params is not a correctly formatted JSON string or a JSON "
            "serializable dictionary"
        )
        six.raise_from(rebranded_error, exc)
    except lap.exceptions.DuplicateQueryParamsError as exc:
        rebranded_error = ValueError("Duplicate --params option.")
        six.raise_from(rebranded_error, exc)
    except lap.exceptions.ParseError as exc:
        rebranded_error = ValueError(
            "Unrecognized input, are option values correct? "
            "Error details: {}".format(exc.args[0])
        )
        six.raise_from(rebranded_error, exc)

    args = magic_arguments.parse_argstring(_cell_magic, rest_of_args)

    if args.use_bqstorage_api is not None:
        warnings.warn(
            "Deprecated option --use_bqstorage_api, the BigQuery "
            "Storage API is already used by default.",
            category=DeprecationWarning,
        )
    use_bqstorage_api = not args.use_rest_api

    params = []
    if params_option_value:
        # A non-existing params variable is not expanded and ends up in the input
        # in its raw form, e.g. "$query_params".
        if params_option_value.startswith("$"):
            msg = 'Parameter expansion failed, undefined variable "{}".'.format(
                params_option_value[1:]
            )
            raise NameError(msg)

        params = _helpers.to_query_parameters(ast.literal_eval(params_option_value))

    project = args.project or context.project
    client = bigquery.Client(
        project=project,
        credentials=context.credentials,
        default_query_job_config=context.default_query_job_config,
        client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
    )
    if context._connection:
        client._connection = context._connection
    bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials)

    close_transports = functools.partial(_close_transports, client, bqstorage_client)

    try:
        if args.max_results:
            max_results = int(args.max_results)
        else:
            max_results = None

        query = query.strip()

        if not query:
            error = ValueError("Query is missing.")
            _handle_error(error, args.destination_var)
            return

        # Any query that does not contain whitespace (aside from leading and trailing whitespace)
        # is assumed to be a table id
        if not re.search(r"\s", query):
            try:
                rows = client.list_rows(query, max_results=max_results)
            except Exception as ex:
                _handle_error(ex, args.destination_var)
                return

            result = rows.to_dataframe(bqstorage_client=bqstorage_client)
            if args.destination_var:
                IPython.get_ipython().push({args.destination_var: result})
                return
            else:
                return result

        job_config = bigquery.job.QueryJobConfig()
        job_config.query_parameters = params
        job_config.use_legacy_sql = args.use_legacy_sql
        job_config.dry_run = args.dry_run

        if args.destination_table:
            split = args.destination_table.split(".")
            if len(split) != 2:
                raise ValueError(
                    "--destination_table should be in a <dataset_id>.<table_id> format."
                )
            dataset_id, table_id = split
            job_config.allow_large_results = True
            dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id)
            destination_table_ref = dataset_ref.table(table_id)
            job_config.destination = destination_table_ref
            job_config.create_disposition = "CREATE_IF_NEEDED"
            job_config.write_disposition = "WRITE_TRUNCATE"
            _create_dataset_if_necessary(client, dataset_id)

        if args.maximum_bytes_billed == "None":
            job_config.maximum_bytes_billed = 0
        elif args.maximum_bytes_billed is not None:
            value = int(args.maximum_bytes_billed)
            job_config.maximum_bytes_billed = value

        try:
            query_job = _run_query(client, query, job_config=job_config)
        except Exception as ex:
            _handle_error(ex, args.destination_var)
            return

        if not args.verbose:
            display.clear_output()

        if args.dry_run and args.destination_var:
            IPython.get_ipython().push({args.destination_var: query_job})
            return
        elif args.dry_run:
            print(
                "Query validated. This query will process {} bytes.".format(
                    query_job.total_bytes_processed
                )
            )
            return query_job

        if max_results:
            result = query_job.result(max_results=max_results).to_dataframe(
                bqstorage_client=bqstorage_client
            )
        else:
          result = query_job.to_dataframe(bqstorage_client=bqstorage_client)

../../google/cloud/bigquery/magics/magics.py:606:


self = <google.cloud.bigquery.job.QueryJob object at 0x7f03f021bdd8>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
dtypes = None, progress_bar_type = None, create_bqstorage_client = True
date_as_object = True

def to_dataframe(
    self,
    bqstorage_client=None,
    dtypes=None,
    progress_bar_type=None,
    create_bqstorage_client=True,
    date_as_object=True,
):
    """Return a pandas DataFrame from a QueryJob

    Args:
        bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
            A BigQuery Storage API client. If supplied, use the faster
            BigQuery Storage API to fetch rows from BigQuery. This
            API is a billable API.

            This method requires the ``fastavro`` and
            ``google-cloud-bigquery-storage`` libraries.

            Reading from a specific partition or snapshot is not
            currently supported by this method.

        dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
            A dictionary of column names pandas ``dtype``s. The provided
            ``dtype`` is used when constructing the series for the column
            specified. Otherwise, the default pandas behavior is used.

        progress_bar_type (Optional[str]):
            If set, use the `tqdm <https://tqdm.github.io/>`_ library to
            display a progress bar while the data downloads. Install the
            ``tqdm`` package to use this feature.

            See
            :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
            for details.

            ..versionadded:: 1.11.0
        create_bqstorage_client (Optional[bool]):
            If ``True`` (default), create a BigQuery Storage API client
            using the default API settings. The BigQuery Storage API
            is a faster way to fetch rows from BigQuery. See the
            ``bqstorage_client`` parameter for more information.

            This argument does nothing if ``bqstorage_client`` is supplied.

            ..versionadded:: 1.24.0

        date_as_object (Optional[bool]):
            If ``True`` (default), cast dates to objects. If ``False``, convert
            to datetime64[ns] dtype.

            ..versionadded:: 1.26.0

    Returns:
        A :class:`~pandas.DataFrame` populated with row data and column
        headers from the query results. The column headers are derived
        from the destination table's schema.

    Raises:
        ValueError: If the `pandas` library cannot be imported.
    """
    return self.result().to_dataframe(
        bqstorage_client=bqstorage_client,
        dtypes=dtypes,
        progress_bar_type=progress_bar_type,
        create_bqstorage_client=create_bqstorage_client,
      date_as_object=date_as_object,
    )

../../google/cloud/bigquery/job.py:3406:


self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
dtypes = {}, progress_bar_type = None, create_bqstorage_client = True
date_as_object = True

def to_dataframe(
    self,
    bqstorage_client=None,
    dtypes=None,
    progress_bar_type=None,
    create_bqstorage_client=True,
    date_as_object=True,
):
    """Create a pandas DataFrame by loading all pages of a query.

    Args:
        bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
            A BigQuery Storage API client. If supplied, use the faster
            BigQuery Storage API to fetch rows from BigQuery.

            This method requires the ``pyarrow`` and
            ``google-cloud-bigquery-storage`` libraries.

            This method only exposes a subset of the capabilities of the
            BigQuery Storage API. For full access to all features
            (projections, filters, snapshots) use the Storage API directly.

        dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
            A dictionary of column names pandas ``dtype``s. The provided
            ``dtype`` is used when constructing the series for the column
            specified. Otherwise, the default pandas behavior is used.
        progress_bar_type (Optional[str]):
            If set, use the `tqdm <https://tqdm.github.io/>`_ library to
            display a progress bar while the data downloads. Install the
            ``tqdm`` package to use this feature.

            Possible values of ``progress_bar_type`` include:

            ``None``
              No progress bar.
            ``'tqdm'``
              Use the :func:`tqdm.tqdm` function to print a progress bar
              to :data:`sys.stderr`.
            ``'tqdm_notebook'``
              Use the :func:`tqdm.tqdm_notebook` function to display a
              progress bar as a Jupyter notebook widget.
            ``'tqdm_gui'``
              Use the :func:`tqdm.tqdm_gui` function to display a
              progress bar as a graphical dialog box.

            ..versionadded:: 1.11.0
        create_bqstorage_client (Optional[bool]):
            If ``True`` (default), create a BigQuery Storage API client
            using the default API settings. The BigQuery Storage API
            is a faster way to fetch rows from BigQuery. See the
            ``bqstorage_client`` parameter for more information.

            This argument does nothing if ``bqstorage_client`` is supplied.

            ..versionadded:: 1.24.0

        date_as_object (Optional[bool]):
            If ``True`` (default), cast dates to objects. If ``False``, convert
            to datetime64[ns] dtype.

            ..versionadded:: 1.26.0

    Returns:
        pandas.DataFrame:
            A :class:`~pandas.DataFrame` populated with row data and column
            headers from the query results. The column headers are derived
            from the destination table's schema.

    Raises:
        ValueError:
            If the :mod:`pandas` library cannot be imported, or the
            :mod:`google.cloud.bigquery_storage_v1` module is
            required but cannot be imported.

    """
    if pandas is None:
        raise ValueError(_NO_PANDAS_ERROR)
    if dtypes is None:
        dtypes = {}

    if (
        bqstorage_client or create_bqstorage_client
    ) and self.max_results is not None:
        warnings.warn(
            "Cannot use bqstorage_client if max_results is set, "
            "reverting to fetching data with the tabledata.list endpoint.",
            stacklevel=2,
        )
        create_bqstorage_client = False
        bqstorage_client = None

    if pyarrow is not None:
        # If pyarrow is available, calling to_arrow, then converting to a
        # pandas dataframe is about 2x faster. This is because pandas.concat is
        # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is
        # usually no-copy.
        record_batch = self.to_arrow(
            progress_bar_type=progress_bar_type,
            bqstorage_client=bqstorage_client,
          create_bqstorage_client=create_bqstorage_client,
        )

../../google/cloud/bigquery/table.py:1690:


self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
progress_bar_type = None
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
create_bqstorage_client = True

def to_arrow(
    self,
    progress_bar_type=None,
    bqstorage_client=None,
    create_bqstorage_client=True,
):
    """[Beta] Create a class:`pyarrow.Table` by loading all pages of a
    table or query.

    Args:
        progress_bar_type (Optional[str]):
            If set, use the `tqdm <https://tqdm.github.io/>`_ library to
            display a progress bar while the data downloads. Install the
            ``tqdm`` package to use this feature.

            Possible values of ``progress_bar_type`` include:

            ``None``
              No progress bar.
            ``'tqdm'``
              Use the :func:`tqdm.tqdm` function to print a progress bar
              to :data:`sys.stderr`.
            ``'tqdm_notebook'``
              Use the :func:`tqdm.tqdm_notebook` function to display a
              progress bar as a Jupyter notebook widget.
            ``'tqdm_gui'``
              Use the :func:`tqdm.tqdm_gui` function to display a
              progress bar as a graphical dialog box.
        bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
            A BigQuery Storage API client. If supplied, use the faster BigQuery
            Storage API to fetch rows from BigQuery. This API is a billable API.

            This method requires the ``pyarrow`` and
            ``google-cloud-bigquery-storage`` libraries.

            This method only  exposes a subset of the capabilities of the
            BigQuery Storage API.  For full access to all features
            (projections, filters, snapshots) use the Storage API directly.
        create_bqstorage_client (Optional[bool]):
            If ``True`` (default), create a BigQuery Storage API client using
            the default API settings. The BigQuery Storage API is a faster way
            to fetch rows from BigQuery. See the ``bqstorage_client`` parameter
            for more information.

            This argument does nothing if ``bqstorage_client`` is supplied.

            ..versionadded:: 1.24.0

    Returns:
        pyarrow.Table
            A :class:`pyarrow.Table` populated with row data and column
            headers from the query results. The column headers are derived
            from the destination table's schema.

    Raises:
        ValueError: If the :mod:`pyarrow` library cannot be imported.

    ..versionadded:: 1.17.0
    """
    if pyarrow is None:
        raise ValueError(_NO_PYARROW_ERROR)

    if (
        bqstorage_client or create_bqstorage_client
    ) and self.max_results is not None:
        warnings.warn(
            "Cannot use bqstorage_client if max_results is set, "
            "reverting to fetching data with the tabledata.list endpoint.",
            stacklevel=2,
        )
        create_bqstorage_client = False
        bqstorage_client = None

    owns_bqstorage_client = False
    if not bqstorage_client and create_bqstorage_client:
        bqstorage_client = self.client._create_bqstorage_client()
        owns_bqstorage_client = bqstorage_client is not None

    try:
        progress_bar = self._get_progress_bar(progress_bar_type)

        record_batches = []
        for record_batch in self._to_arrow_iterable(
          bqstorage_client=bqstorage_client
        ):

../../google/cloud/bigquery/table.py:1508:


self = <google.cloud.bigquery.table.RowIterator object at 0x7f03f01a99b0>
bqstorage_download = functools.partial(<function download_arrow_bqstorage at 0x7f03f2e5fbf8>, 'python-docs-samples-tests', Table(TableRefer...ct at 0x7f03f02a2a58>, preserve_order=<_sre.SRE_Match object; span=(125, 133), match='ORDER BY'>, selected_fields=None)
tabledata_list_download = functools.partial(<function download_arrow_tabledata_list at 0x7f03f2e5f7b8>, <generator object Iterator._page_iter at...eld('year', 'INTEGER', 'NULLABLE', None, (), None), SchemaField('birth_count', 'INTEGER', 'NULLABLE', None, (), None)])
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>

def _to_page_iterable(
    self, bqstorage_download, tabledata_list_download, bqstorage_client=None
):
    if bqstorage_client is not None:
      for item in bqstorage_download():

../../google/cloud/bigquery/table.py:1397:


project_id = 'python-docs-samples-tests'
table = Table(TableReference(DatasetReference('python-docs-samples-tests', '_d5c40c0e092f2208d182028dd13910fd3f0ceb53'), 'anon6023c82685ab155c9f7fb4144a0a67f172df90a6'))
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>
preserve_order = <_sre.SRE_Match object; span=(125, 133), match='ORDER BY'>
selected_fields = None
page_to_item = <function _bqstorage_page_to_arrow at 0x7f03f2e5f950>

def _download_table_bqstorage(
    project_id,
    table,
    bqstorage_client,
    preserve_order=False,
    selected_fields=None,
    page_to_item=None,
):
    """Use (faster, but billable) BQ Storage API to construct DataFrame."""

    # Passing a BQ Storage client in implies that the BigQuery Storage library
    # is available and can be imported.
    from google.cloud import bigquery_storage

    if "$" in table.table_id:
        raise ValueError(
            "Reading from a specific partition is not currently supported."
        )
    if "@" in table.table_id:
        raise ValueError("Reading from a specific snapshot is not currently supported.")

    requested_streams = 1 if preserve_order else 0

    requested_session = bigquery_storage.types.ReadSession(
      table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW
    )

E AttributeError: module 'google.cloud.bigquery_storage_v1.types' has no attribute 'DataFormat'

../../google/cloud/bigquery/_pandas_helpers.py:615: AttributeError

During handling of the above exception, another exception occurred:

ipython = <IPython.terminal.interactiveshell.TerminalInteractiveShell object at 0x7f03f09c7160>

def test_jupyter_tutorial(ipython):
    matplotlib.use("agg")
    ip = IPython.get_ipython()
    ip.extension_manager.load_extension("google.cloud.bigquery")

    sample = """
    # [START bigquery_jupyter_magic_gender_by_year]
    %%bigquery
    SELECT
        source_year AS year,
        COUNT(is_male) AS birth_count
    FROM `bigquery-public-data.samples.natality`
    GROUP BY year
    ORDER BY year DESC
    LIMIT 15
    # [END bigquery_jupyter_magic_gender_by_year]
    """
    result = ip.run_cell(_strip_region_tags(sample))
  result.raise_error()  # Throws an exception if the cell failed.

jupyter_tutorial_test.py:69:


.nox/py-3-6/lib/python3.6/site-packages/IPython/core/interactiveshell.py:331: in raise_error
raise self.error_in_exec
:1: in
get_ipython().run_cell_magic('bigquery', '', 'SELECT\n source_year AS year,\n COUNT(is_male) AS birth_count\nFROM bigquery-public-data.samples.natality\nGROUP BY year\nORDER BY year DESC\nLIMIT 15\n\n')
.nox/py-3-6/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2371: in run_cell_magic
result = fn(*args, **kwargs)
../../google/cloud/bigquery/magics/magics.py:613: in _cell_magic
close_transports()


client = <google.cloud.bigquery.client.Client object at 0x7f03f02a2908>
bqstorage_client = <google.cloud.bigquery_storage_v1.BigQueryReadClient object at 0x7f03f02a2a58>

def _close_transports(client, bqstorage_client):
    """Close the given clients' underlying transport channels.

    Closing the transport is needed to release system resources, namely open
    sockets.

    Args:
        client (:class:`~google.cloud.bigquery.client.Client`):
        bqstorage_client
            (Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]):
            A client for the BigQuery Storage API.

    """
    client.close()
    if bqstorage_client is not None:
      bqstorage_client._transport.grpc_channel.close()

E AttributeError: 'BigQueryReadClient' object has no attribute '_transport'

../../google/cloud/bigquery/magics/magics.py:679: AttributeError

Metadata

Metadata

Assignees

Labels

🚨This issue needs some love.api: bigqueryIssues related to the googleapis/python-bigquery API.priority: p1Important issue which blocks shipping the next release. Will be fixed prior to next release.samplesIssues that are directly related to samples.type: bugError or flaw in code with unintended results or allowing sub-optimal usage patterns.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions