From c497dfb1fa3bf3114d819f08f96662a823916a65 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 21 Jul 2022 19:35:23 +0200 Subject: [PATCH 1/2] Initial commit --- docs/source/python/api/filesystems.rst | 1 + docs/source/python/filesystems.rst | 35 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/docs/source/python/api/filesystems.rst b/docs/source/python/api/filesystems.rst index f84a3e229c8..4b416abd9e2 100644 --- a/docs/source/python/api/filesystems.rst +++ b/docs/source/python/api/filesystems.rst @@ -40,6 +40,7 @@ Filesystem Implementations LocalFileSystem S3FileSystem + GcsFileSystem HadoopFileSystem SubTreeFileSystem diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 1ddb4dfa2b3..13958dbb067 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -40,6 +40,7 @@ Pyarrow implements natively the following filesystem subclasses: * :ref:`filesystem-localfs` (:class:`LocalFileSystem`) * :ref:`filesystem-s3` (:class:`S3FileSystem`) +* :ref:`filesystem-gcs` (:class:`GcsFileSystem`) * :ref:`filesystem-hdfs` (:class:`HadoopFileSystem`) It is also possible to use your own fsspec-compliant filesystem with pyarrow functionalities as described in the section :ref:`filesystem-fsspec`. @@ -183,6 +184,40 @@ Example how you can read contents from a S3 bucket:: for the different ways to configure the AWS credentials. +.. _filesystem-gcs: + +Google Cloud Storage File System +-------------------------------- + +PyArrow implements natively a Google Cloud Storage (GCS) backed file system +for GCS storage. + +If not running on Google Cloud Platform (GCP), this generally requires the +environment variable GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file +containing credentials. + +Example how you can read contents from a GCS bucket:: + + >>> from datetime import timedelta + >>> from pyarrow import fs + >>> gcs = fs.GcsFileSystem(anonymous=True, retry_time_limit=timedelta(seconds=15)) + + # List all contents in a bucket, recursively + >>> uri = "gcp-public-data-landsat/LC08/01/001/003/" + >>> file_list = gcs.get_file_info(fs.FileSelector(uri, recursive=True)) + + # Open a file for reading and download its contents + >>> f = gcs.open_input_stream(file_list[0].path) + >>> f.read(64) + b'GROUP = FILE_HEADER\n LANDSAT_SCENE_ID = "LC80010032013082LGN03"\n S' + +.. seealso:: + + The :class:`GcsFileSystem` constructor by default uses the + process described in `GCS docs `__ + to resolve credentials. + + .. _filesystem-hdfs: Hadoop Distributed File System (HDFS) From 74904230b322cee592f54c99adcdb375e4814972 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 22 Jul 2022 07:05:29 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Ian Cook --- docs/source/python/filesystems.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 13958dbb067..a34ce88baea 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -193,10 +193,10 @@ PyArrow implements natively a Google Cloud Storage (GCS) backed file system for GCS storage. If not running on Google Cloud Platform (GCP), this generally requires the -environment variable GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file -containing credentials. +environment variable ``GOOGLE_APPLICATION_CREDENTIALS`` to point to a +JSON file containing credentials. -Example how you can read contents from a GCS bucket:: +Example showing how you can read contents from a GCS bucket:: >>> from datetime import timedelta >>> from pyarrow import fs