From eb643e4cff4122187968da1e952f8a1d3cb7f9ff Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 14:16:10 -0800 Subject: [PATCH 01/25] ARROW-2066 Add documentation for Arrow/Azure/Parquet solution --- python/doc/source/parquet.rst | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index d466ba128cd..a63b596bf42 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -237,3 +237,37 @@ throughput: pq.read_table(where, nthreads=4) pq.ParquetDataset(where).read(nthreads=4) + +Reading a Parquet File from Azure Blob storage +------------------- + +The code below shows how to use Azure's storage sdk along with pyarrow to read +a parquet file into a Pandas dataframe. +This is suitable for executing inside a Jupyter notebook running on a Python 3 +kernel. + +Dependencies: +python 3.6.2 +azure-storage 0.36.0 +pyarrow 0.8.0 + +.. code-block:: python + + import pyarrow.parquet as pq + import io + from azure.storage.blob import BlockBlobService + import tempfile + + account_name = '...' + account_key = '...' + container_name = '...' + parquet_file = 'mysample.parquet' + + block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) + with tempfile.TemporaryFile() as fp: + block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=fp) + pd = pq.read_table(source=fp).to_pandas() + pd.head(10) + fp.close + + \ No newline at end of file From 6841116ad1ffc2b23ac870056e87f420d18adf6b Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 14:37:50 -0800 Subject: [PATCH 02/25] Polish the formatting --- python/doc/source/parquet.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index a63b596bf42..63512c4ab12 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -246,10 +246,11 @@ a parquet file into a Pandas dataframe. This is suitable for executing inside a Jupyter notebook running on a Python 3 kernel. -Dependencies: -python 3.6.2 -azure-storage 0.36.0 -pyarrow 0.8.0 +Dependencies: + +- python 3.6.2 +- azure-storage 0.36.0 +- pyarrow 0.8.0 .. code-block:: python From 5365a9c861ea65abe48c21357a3aa0afa4cdf486 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 14:55:12 -0800 Subject: [PATCH 03/25] Add helpful notes about Azure properties --- python/doc/source/parquet.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 63512c4ab12..7d443f6c3cd 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -270,5 +270,10 @@ Dependencies: pd = pq.read_table(source=fp).to_pandas() pd.head(10) fp.close - - \ No newline at end of file + +Notes: + +- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container. +- The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked +- The parquet file was Blob Type = Block blob + From 5fbea897eb0e0d4a5a3b98ab3a030627f78dc54a Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 15:02:14 -0800 Subject: [PATCH 04/25] Add a note about keys and add polish --- python/doc/source/parquet.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 7d443f6c3cd..c53c11f72e5 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,8 @@ Dependencies: Notes: -- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container. +- The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container +-- Pick either one of the two default keys issued by Microsoft - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 26a53e4a63852c7fd5ad9f615744cd7e23a8b658 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 15:04:48 -0800 Subject: [PATCH 05/25] Fix formatting --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index c53c11f72e5..e2c1474ff3a 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -274,7 +274,7 @@ Dependencies: Notes: - The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container --- Pick either one of the two default keys issued by Microsoft + Pick either one of the two default keys issued by Microsoft - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 7bab6401c0c115f96ac4c844212eb7963b5985de Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 15:25:57 -0800 Subject: [PATCH 06/25] Refine indented bullet and fix title underline --- python/doc/source/parquet.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index e2c1474ff3a..232b9a60bbb 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -239,7 +239,7 @@ throughput: pq.ParquetDataset(where).read(nthreads=4) Reading a Parquet File from Azure Blob storage -------------------- +---------------------------------------------- The code below shows how to use Azure's storage sdk along with pyarrow to read a parquet file into a Pandas dataframe. @@ -274,7 +274,7 @@ Dependencies: Notes: - The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container - Pick either one of the two default keys issued by Microsoft + - Pick either one of the two default keys issued by Microsoft - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 718bd944c907f64df79c96cdc4995b771d6defcb Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 15:42:19 -0800 Subject: [PATCH 07/25] Fix unintended italics --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 232b9a60bbb..862d38e23a9 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,7 @@ Dependencies: Notes: -- The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container +- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container - Pick either one of the two default keys issued by Microsoft - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From f130e04fb89c53a3190d05c48bc6f3e63dc7199e Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 15:58:52 -0800 Subject: [PATCH 08/25] Change wording a bit --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 862d38e23a9..e54b2426f0f 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -274,7 +274,7 @@ Dependencies: Notes: - The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container - - Pick either one of the two default keys issued by Microsoft + - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 83a38c47144eb8f5c1b8ece024f62d53a28bc5e8 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:03:43 -0800 Subject: [PATCH 09/25] Try to fix italics --- python/doc/source/parquet.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index e54b2426f0f..8ac6804596d 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,8 +273,8 @@ Dependencies: Notes: -- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container - - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob +- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container + - Choose either one of the two default keys issued by Microsoft, or one you created yourself From 6fd9f7093c685f3476ead28a849a6ddc3033c5de Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:04:46 -0800 Subject: [PATCH 10/25] remove inline edits --- python/doc/source/parquet.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 8ac6804596d..f7dccc62cae 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,8 +273,8 @@ Dependencies: Notes: +- The account_key can be found under Settings -> Access keys in the Microsoft Azure portal for a given container + - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob -- The `account_key` can be found under `Settings -> Access keys` in the Microsoft Azure portal for a given container - - Choose either one of the two default keys issued by Microsoft, or one you created yourself From 34c5a16695c427b26ec4d345c9fb5c6b10733ae8 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:13:44 -0800 Subject: [PATCH 11/25] Fix formatting --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index f7dccc62cae..a2fe0485d20 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,7 @@ Dependencies: Notes: -- The account_key can be found under Settings -> Access keys in the Microsoft Azure portal for a given container +- The account key can be found under Settings -> Access keys in the Microsoft Azure portal for a given container - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 599e04feee5559052b0fb21cdcdc3d31c734dcbc Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:21:22 -0800 Subject: [PATCH 12/25] Fix formatting --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index a2fe0485d20..19db1a0e532 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,7 @@ Dependencies: Notes: -- The account key can be found under Settings -> Access keys in the Microsoft Azure portal for a given container +- The ``account_key`` can be found under ``Settings -\> Access keys`` in the Microsoft Azure portal for a given container - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 18158168204a5b15ccda2d423cb6e59feda3ce5a Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:26:08 -0800 Subject: [PATCH 13/25] fix formatting --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 19db1a0e532..dd8d8aebefc 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,7 @@ Dependencies: Notes: -- The ``account_key`` can be found under ``Settings -\> Access keys`` in the Microsoft Azure portal for a given container +- The ``account_key`` can be found under ``Settings Access keys`` in the Microsoft Azure portal for a given container - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From a015deb2b40793f5d2fe021c12624e0767d8a0a4 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:27:01 -0800 Subject: [PATCH 14/25] Fix formatting --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index dd8d8aebefc..faa15c931c6 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,7 @@ Dependencies: Notes: -- The ``account_key`` can be found under ``Settings Access keys`` in the Microsoft Azure portal for a given container +- The ``account key`` can be found under ``Settings Access keys`` in the Microsoft Azure portal for a given container - Choose either one of the two default keys issued by Microsoft, or one you created yourself - The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked - The parquet file was Blob Type = Block blob From 051b91d3e83d426d698d707841d0d5b5213a2a40 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:33:21 -0800 Subject: [PATCH 15/25] Fix formatting --- python/doc/source/parquet.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index faa15c931c6..91692e61c81 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,8 +273,8 @@ Dependencies: Notes: -- The ``account key`` can be found under ``Settings Access keys`` in the Microsoft Azure portal for a given container +* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container - Choose either one of the two default keys issued by Microsoft, or one you created yourself -- The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked -- The parquet file was Blob Type = Block blob +* The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked +* The parquet file was Blob Type = Block blob From 803cbca9a5b95f7ddcf7400aeea63284d2cf5ead Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:34:24 -0800 Subject: [PATCH 16/25] Use asterisks for list --- python/doc/source/parquet.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 91692e61c81..f56204c9973 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -248,9 +248,9 @@ kernel. Dependencies: -- python 3.6.2 -- azure-storage 0.36.0 -- pyarrow 0.8.0 +* python 3.6.2 +* azure-storage 0.36.0 +* pyarrow 0.8.0 .. code-block:: python From 4c75824c87884b7a14d0abecdaca65954fa58d39 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:39:45 -0800 Subject: [PATCH 17/25] Try moving the bullet to remove italics --- python/doc/source/parquet.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index f56204c9973..370f231fb5c 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,8 +273,8 @@ Dependencies: Notes: -* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container - - Choose either one of the two default keys issued by Microsoft, or one you created yourself * The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked * The parquet file was Blob Type = Block blob +* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container + - Choose either one of the two default keys issued by Microsoft, or one you created yourself From 4770de19c2c28aaff927e3a63e9551efad322968 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:42:11 -0800 Subject: [PATCH 18/25] fix --- python/doc/source/parquet.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 370f231fb5c..0509e6e6dd6 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,8 +273,7 @@ Dependencies: Notes: -* The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked -* The parquet file was Blob Type = Block blob -* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container - - Choose either one of the two default keys issued by Microsoft, or one you created yourself - +* A +- a +* B +* C From 5d450fc50e1b4895e08bd9b4adff7033792ee862 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:42:54 -0800 Subject: [PATCH 19/25] fix --- python/doc/source/parquet.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 0509e6e6dd6..a620b13a6e3 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -274,6 +274,6 @@ Dependencies: Notes: * A -- a + - a * B * C From 654a6f9550f41956ec4725a96f50dd24fec0162c Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 31 Jan 2018 16:45:04 -0800 Subject: [PATCH 20/25] Add back original Notes bullets --- python/doc/source/parquet.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index a620b13a6e3..9a8be353bf9 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -273,7 +273,6 @@ Dependencies: Notes: -* A - - a -* B -* C +* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container +* The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked +* The parquet file was Blob Type = Block blob From 36f737896eda8c19bef46d3bc8d09e04042d5edd Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 21 Feb 2018 23:19:38 -0800 Subject: [PATCH 21/25] Replace usage of tempfile buffer with BytesIO stream --- python/doc/source/parquet.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 9a8be353bf9..588c25d3b7f 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -257,7 +257,6 @@ Dependencies: import pyarrow.parquet as pq import io from azure.storage.blob import BlockBlobService - import tempfile account_name = '...' account_key = '...' @@ -265,14 +264,13 @@ Dependencies: parquet_file = 'mysample.parquet' block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) - with tempfile.TemporaryFile() as fp: - block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=fp) - pd = pq.read_table(source=fp).to_pandas() - pd.head(10) - fp.close + byte_stream = io.BytesIO() + block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream) + pd = pq.read_table(source=byte_stream).to_pandas() + pd.head(10) Notes: * The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container -* The code above worked for a container with private access, Lease State = Available, Lease Status = Unlocked +* The code above works for a container with private access, Lease State = Available, Lease Status = Unlocked * The parquet file was Blob Type = Block blob From 1fe9866c9bce6aeb3c86c7c235221eb6eb6b51d4 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 21 Feb 2018 23:43:35 -0800 Subject: [PATCH 22/25] Add try/except/finally blocks to ensure closure of the byte stream --- python/doc/source/parquet.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 588c25d3b7f..aa052bb580c 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -264,10 +264,14 @@ Dependencies: parquet_file = 'mysample.parquet' block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) - byte_stream = io.BytesIO() - block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream) - pd = pq.read_table(source=byte_stream).to_pandas() - pd.head(10) + try: + block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream) + pd = pq.read_table(source=byte_stream).to_pandas() + pd.head(10) + except Exception as err: + print("Error: {0}".format(err)) + finally: + byte_stream.close() Notes: From f056888d147af704db6348a72441504416a994a3 Mon Sep 17 00:00:00 2001 From: rrussell Date: Wed, 21 Feb 2018 23:45:29 -0800 Subject: [PATCH 23/25] Clean up white space --- python/doc/source/parquet.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index aa052bb580c..53cfbce2d97 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -266,10 +266,10 @@ Dependencies: block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) try: block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream) - pd = pq.read_table(source=byte_stream).to_pandas() - pd.head(10) + pd = pq.read_table(source=byte_stream).to_pandas() + pd.head(10) except Exception as err: - print("Error: {0}".format(err)) + print("Error: {0}".format(err)) finally: byte_stream.close() From a5addb0b83348aff9a9cdfdf0c75588006a795b6 Mon Sep 17 00:00:00 2001 From: rrussell Date: Thu, 22 Feb 2018 09:05:48 -0800 Subject: [PATCH 24/25] use more common 'df' instead of 'pd' for pandas dataframe variable, remove head() call and instead use comment to indicate generic fill-in code, add comment re: stream closure in finally block --- python/doc/source/parquet.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 53cfbce2d97..0edfd57dbd4 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -266,11 +266,10 @@ Dependencies: block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) try: block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream) - pd = pq.read_table(source=byte_stream).to_pandas() - pd.head(10) - except Exception as err: - print("Error: {0}".format(err)) + df = pq.read_table(source=byte_stream).to_pandas() + # Do work on df ... finally: + # Add finally block to ensure closure of the stream byte_stream.close() Notes: From 0d3972c8810782d5a1458cea929b5d8f13f97f50 Mon Sep 17 00:00:00 2001 From: rrussell Date: Thu, 22 Feb 2018 14:08:33 -0800 Subject: [PATCH 25/25] Add missing byte_stream declaration/assignment --- python/doc/source/parquet.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst index 0edfd57dbd4..b394f562aba 100644 --- a/python/doc/source/parquet.rst +++ b/python/doc/source/parquet.rst @@ -255,7 +255,7 @@ Dependencies: .. code-block:: python import pyarrow.parquet as pq - import io + from io import BytesIO from azure.storage.blob import BlockBlobService account_name = '...' @@ -263,6 +263,7 @@ Dependencies: container_name = '...' parquet_file = 'mysample.parquet' + byte_stream = io.BytesIO() block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) try: block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream)