From 862a41e08b89aba4bf1927bda2e3c0d092b2030d Mon Sep 17 00:00:00 2001 From: Ulas Can Cengiz <721050+ulsc@users.noreply.github.com> Date: Fri, 11 Mar 2022 13:26:44 +0100 Subject: [PATCH 1/3] Support Uploading Bigger Files to Google Drive Adding `resumable=True` parameter to `MediaFileUpload` gives the ability to upload files bigger than 5MB to Google Drive. --- airflow/providers/google/suite/hooks/drive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/providers/google/suite/hooks/drive.py b/airflow/providers/google/suite/hooks/drive.py index 5c14bfdb6c14d..8b13ca872e4ec 100644 --- a/airflow/providers/google/suite/hooks/drive.py +++ b/airflow/providers/google/suite/hooks/drive.py @@ -197,7 +197,7 @@ def upload_file(self, local_location: str, remote_location: str) -> str: parent = "root" file_metadata = {"name": file_name, "parents": [parent]} - media = MediaFileUpload(local_location) + media = MediaFileUpload(local_location, resumable=True) file = ( service.files() .create(body=file_metadata, media_body=media, fields="id", supportsAllDrives=True) From a49908660b9af59a59e1fa76cf6375f6d1a4c5db Mon Sep 17 00:00:00 2001 From: Ulas Can Cengiz <721050+ulsc@users.noreply.github.com> Date: Sat, 12 Mar 2022 13:56:07 +0100 Subject: [PATCH 2/3] Add `chunk_size` & `resumable` as parameters to `upload_file` method --- airflow/providers/google/suite/hooks/drive.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/airflow/providers/google/suite/hooks/drive.py b/airflow/providers/google/suite/hooks/drive.py index 8b13ca872e4ec..7c6fa78cf988e 100644 --- a/airflow/providers/google/suite/hooks/drive.py +++ b/airflow/providers/google/suite/hooks/drive.py @@ -180,12 +180,25 @@ def get_file_id(self, folder_id: str, file_name: str, drive_id: Optional[str] = file_metadata = {"id": files['files'][0]['id'], "mime_type": files['files'][0]['mimeType']} return file_metadata - def upload_file(self, local_location: str, remote_location: str) -> str: + def upload_file( + self, + local_location: str, + remote_location: str, + chunk_size: int = 104857600, + resumable: bool = False, + ) -> str: """ Uploads a file that is available locally to a Google Drive service. :param local_location: The path where the file is available. :param remote_location: The path where the file will be send + :param chunk_size: File will be uploaded in chunks of this many bytes. Only + used if resumable=True. Pass in a value of -1 if the file is to be + uploaded as a single chunk. Note that Google App Engine has a 5MB limit + on request size, so you should never set your chunksize larger than 5MB, + or to -1. + :param resumable: True if this is a resumable upload. False means upload + in a single request. :return: File ID :rtype: str """ @@ -197,7 +210,7 @@ def upload_file(self, local_location: str, remote_location: str) -> str: parent = "root" file_metadata = {"name": file_name, "parents": [parent]} - media = MediaFileUpload(local_location, resumable=True) + media = MediaFileUpload(local_location, chunksize=chunk_size, resumable=resumable) file = ( service.files() .create(body=file_metadata, media_body=media, fields="id", supportsAllDrives=True) From 5f440f0ebe015175f625b1cc93ae384ce82c0f6a Mon Sep 17 00:00:00 2001 From: Ulas Can Cengiz <721050+ulsc@users.noreply.github.com> Date: Sun, 13 Mar 2022 14:15:52 +0100 Subject: [PATCH 3/3] Change the default `chunk_size` to a clear representation & fix documentation typo --- airflow/providers/google/suite/hooks/drive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/providers/google/suite/hooks/drive.py b/airflow/providers/google/suite/hooks/drive.py index 7c6fa78cf988e..10144a13fe111 100644 --- a/airflow/providers/google/suite/hooks/drive.py +++ b/airflow/providers/google/suite/hooks/drive.py @@ -184,7 +184,7 @@ def upload_file( self, local_location: str, remote_location: str, - chunk_size: int = 104857600, + chunk_size: int = 100 * 1024 * 1024, resumable: bool = False, ) -> str: """ @@ -195,7 +195,7 @@ def upload_file( :param chunk_size: File will be uploaded in chunks of this many bytes. Only used if resumable=True. Pass in a value of -1 if the file is to be uploaded as a single chunk. Note that Google App Engine has a 5MB limit - on request size, so you should never set your chunksize larger than 5MB, + on request size, so you should never set your chunk size larger than 5MB, or to -1. :param resumable: True if this is a resumable upload. False means upload in a single request.