Skip to content

Commit 5f716db

Browse files
authored
feat(storage): add opt-in raw download support (#9572)
* deps(bigquery): pin to allow g-r-m 0.5.x * deps(storage): pin to require g-r-m >= 0.5.0
1 parent 4af68dc commit 5f716db

File tree

5 files changed

+311
-352
lines changed

5 files changed

+311
-352
lines changed

bigquery/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
dependencies = [
3232
'enum34; python_version < "3.4"',
3333
"google-cloud-core >= 1.0.3, < 2.0dev",
34-
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5.0dev",
34+
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev",
3535
"protobuf >= 3.6.0",
3636
]
3737
extras = {

storage/google/cloud/storage/blob.py

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
from google import resumable_media
4444
from google.resumable_media.requests import ChunkedDownload
4545
from google.resumable_media.requests import Download
46+
from google.resumable_media.requests import RawDownload
47+
from google.resumable_media.requests import RawChunkedDownload
4648
from google.resumable_media.requests import MultipartUpload
4749
from google.resumable_media.requests import ResumableUpload
4850

@@ -591,7 +593,14 @@ def _get_download_url(self):
591593
return _add_query_parameters(base_url, name_value_pairs)
592594

593595
def _do_download(
594-
self, transport, file_obj, download_url, headers, start=None, end=None
596+
self,
597+
transport,
598+
file_obj,
599+
download_url,
600+
headers,
601+
start=None,
602+
end=None,
603+
raw_download=False,
595604
):
596605
"""Perform a download without any error handling.
597606
@@ -617,14 +626,30 @@ def _do_download(
617626
618627
:type end: int
619628
:param end: Optional, The last byte in a range to be downloaded.
629+
630+
:type raw_download: bool
631+
:param raw_download:
632+
Optional, If true, download the object without any expansion.
620633
"""
621634
if self.chunk_size is None:
622-
download = Download(
635+
if raw_download:
636+
klass = RawDownload
637+
else:
638+
klass = Download
639+
640+
download = klass(
623641
download_url, stream=file_obj, headers=headers, start=start, end=end
624642
)
625643
download.consume(transport)
644+
626645
else:
627-
download = ChunkedDownload(
646+
647+
if raw_download:
648+
klass = RawChunkedDownload
649+
else:
650+
klass = ChunkedDownload
651+
652+
download = klass(
628653
download_url,
629654
self.chunk_size,
630655
file_obj,
@@ -636,7 +661,9 @@ def _do_download(
636661
while not download.finished:
637662
download.consume_next_chunk(transport)
638663

639-
def download_to_file(self, file_obj, client=None, start=None, end=None):
664+
def download_to_file(
665+
self, file_obj, client=None, start=None, end=None, raw_download=False
666+
):
640667
"""Download the contents of this blob into a file-like object.
641668
642669
.. note::
@@ -676,6 +703,10 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):
676703
:type end: int
677704
:param end: Optional, The last byte in a range to be downloaded.
678705
706+
:type raw_download: bool
707+
:param raw_download:
708+
Optional, If true, download the object without any expansion.
709+
679710
:raises: :class:`google.cloud.exceptions.NotFound`
680711
"""
681712
download_url = self._get_download_url()
@@ -684,11 +715,15 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):
684715

685716
transport = self._get_transport(client)
686717
try:
687-
self._do_download(transport, file_obj, download_url, headers, start, end)
718+
self._do_download(
719+
transport, file_obj, download_url, headers, start, end, raw_download
720+
)
688721
except resumable_media.InvalidResponse as exc:
689722
_raise_from_invalid_response(exc)
690723

691-
def download_to_filename(self, filename, client=None, start=None, end=None):
724+
def download_to_filename(
725+
self, filename, client=None, start=None, end=None, raw_download=False
726+
):
692727
"""Download the contents of this blob into a named file.
693728
694729
If :attr:`user_project` is set on the bucket, bills the API request
@@ -708,11 +743,21 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
708743
:type end: int
709744
:param end: Optional, The last byte in a range to be downloaded.
710745
746+
:type raw_download: bool
747+
:param raw_download:
748+
Optional, If true, download the object without any expansion.
749+
711750
:raises: :class:`google.cloud.exceptions.NotFound`
712751
"""
713752
try:
714753
with open(filename, "wb") as file_obj:
715-
self.download_to_file(file_obj, client=client, start=start, end=end)
754+
self.download_to_file(
755+
file_obj,
756+
client=client,
757+
start=start,
758+
end=end,
759+
raw_download=raw_download,
760+
)
716761
except resumable_media.DataCorruption:
717762
# Delete the corrupt downloaded file.
718763
os.remove(filename)
@@ -723,7 +768,7 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
723768
mtime = time.mktime(updated.timetuple())
724769
os.utime(file_obj.name, (mtime, mtime))
725770

726-
def download_as_string(self, client=None, start=None, end=None):
771+
def download_as_string(self, client=None, start=None, end=None, raw_download=False):
727772
"""Download the contents of this blob as a bytes object.
728773
729774
If :attr:`user_project` is set on the bucket, bills the API request
@@ -740,12 +785,22 @@ def download_as_string(self, client=None, start=None, end=None):
740785
:type end: int
741786
:param end: Optional, The last byte in a range to be downloaded.
742787
788+
:type raw_download: bool
789+
:param raw_download:
790+
Optional, If true, download the object without any expansion.
791+
743792
:rtype: bytes
744793
:returns: The data stored in this blob.
745794
:raises: :class:`google.cloud.exceptions.NotFound`
746795
"""
747796
string_buffer = BytesIO()
748-
self.download_to_file(string_buffer, client=client, start=start, end=end)
797+
self.download_to_file(
798+
string_buffer,
799+
client=client,
800+
start=start,
801+
end=end,
802+
raw_download=raw_download,
803+
)
749804
return string_buffer.getvalue()
750805

751806
def _get_content_type(self, content_type, filename=None):

storage/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
dependencies = [
3232
"google-auth >= 1.2.0",
3333
"google-cloud-core >= 1.0.3, < 2.0dev",
34-
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5dev",
34+
"google-resumable-media >= 0.5.0, < 0.6dev",
3535
]
3636
extras = {}
3737

storage/tests/system.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
import base64
1616
import datetime
17+
import gzip
1718
import hashlib
19+
import io
1820
import os
1921
import re
2022
import tempfile
@@ -620,6 +622,23 @@ def test_download_blob_w_uri(self):
620622

621623
self.assertEqual(file_contents, stored_contents)
622624

625+
def test_upload_gzip_encoded_download_raw(self):
626+
payload = b"DEADBEEF" * 1000
627+
raw_stream = io.BytesIO()
628+
with gzip.GzipFile(fileobj=raw_stream, mode="wb") as gzip_stream:
629+
gzip_stream.write(payload)
630+
zipped = raw_stream.getvalue()
631+
632+
blob = self.bucket.blob("test_gzipped.gz")
633+
blob.content_encoding = "gzip"
634+
blob.upload_from_file(raw_stream, rewind=True)
635+
636+
expanded = blob.download_as_string()
637+
self.assertEqual(expanded, payload)
638+
639+
raw = blob.download_as_string(raw_download=True)
640+
self.assertEqual(raw, zipped)
641+
623642

624643
class TestUnicode(unittest.TestCase):
625644
@unittest.skipIf(RUNNING_IN_VPCSC, "Test is not VPCSC compatible.")

0 commit comments

Comments
 (0)