apache · rjrussell77 · Jan 31, 2018 · Jan 31, 2018 · Jan 31, 2018 · Jan 31, 2018
diff --git a/python/doc/source/parquet.rst b/python/doc/source/parquet.rst
@@ -237,3 +237,44 @@ throughput:
 
    pq.read_table(where, nthreads=4)
    pq.ParquetDataset(where).read(nthreads=4)
+
+Reading a Parquet File from Azure Blob storage
+----------------------------------------------
+
+The code below shows how to use Azure's storage sdk along with pyarrow to read
+a parquet file into a Pandas dataframe.
+This is suitable for executing inside a Jupyter notebook running on a Python 3
+kernel.
+
+Dependencies: 
+
+* python 3.6.2 
+* azure-storage 0.36.0 
+* pyarrow 0.8.0 
+
+.. code-block:: python
+
+   import pyarrow.parquet as pq
+   from io import BytesIO
+   from azure.storage.blob import BlockBlobService
+
+   account_name = '...'
+   account_key = '...'
+   container_name = '...'
+   parquet_file = 'mysample.parquet'
+
+   byte_stream = io.BytesIO()
+   block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key)
+   try:
+      block_blob_service.get_blob_to_stream(container_name=container_name, blob_name=parquet_file, stream=byte_stream)
+      df = pq.read_table(source=byte_stream).to_pandas()
+      # Do work on df ...
+   finally:
+      # Add finally block to ensure closure of the stream
+      byte_stream.close()
+
+Notes:
+
+* The ``account_key`` can be found under ``Settings -> Access keys`` in the Microsoft Azure portal for a given container
+* The code above works for a container with private access, Lease State = Available, Lease Status = Unlocked
+* The parquet file was Blob Type = Block blob