From 57b68c63c51fd42e8f9a67f33d50306194a6e905 Mon Sep 17 00:00:00 2001 From: toddn Date: Thu, 26 Aug 2021 15:53:57 -0500 Subject: [PATCH 1/7] FOR NOW adding try/catch. I can't think of any cases where we should try to download a dataset with no files, but since it's in there, I'm doing this for now until I find out more. --- pyclowder/connectors.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pyclowder/connectors.py b/pyclowder/connectors.py index 3e0fb9b..e79d224 100644 --- a/pyclowder/connectors.py +++ b/pyclowder/connectors.py @@ -293,6 +293,9 @@ def _download_file_metadata(self, host, secret_key, fileid, filepath): return (md_dir, md_file) def _prepare_dataset(self, host, secret_key, resource): + + logger = logging.getLogger(__name__) + located_files = [] missing_files = [] tmp_files_created = [] @@ -354,10 +357,14 @@ def _prepare_dataset(self, host, secret_key, resource): # If we didn't find any files locally, download dataset .zip as normal else: - inputzip = pyclowder.datasets.download(self, host, secret_key, resource["id"]) - file_paths = pyclowder.utils.extract_zip_contents(inputzip) - tmp_files_created += file_paths - tmp_files_created.append(inputzip) + try: + inputzip = pyclowder.datasets.download(self, host, secret_key, resource["id"]) + file_paths = pyclowder.utils.extract_zip_contents(inputzip) + tmp_files_created += file_paths + tmp_files_created.append(inputzip) + except Exception as e: + logger.debug("No files found and download failed") + logger.debug(e) return (file_paths, tmp_files_created, tmp_dirs_created) @@ -370,7 +377,7 @@ def _process_message(self, body): file should be downloaded. Finally it will call the actual process_message function. """ - logger = logging.getLogger(__name__) + d emailaddrlist = None if body.get('notifies'): emailaddrlist = body.get('notifies') From 0bf8c6a2e1f5467b6dcd16dac1b945f07f0f18f4 Mon Sep 17 00:00:00 2001 From: toddn Date: Thu, 26 Aug 2021 16:22:55 -0500 Subject: [PATCH 2/7] declaring variables as empty lists in case none are found. --- pyclowder/connectors.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyclowder/connectors.py b/pyclowder/connectors.py index e79d224..ff012ac 100644 --- a/pyclowder/connectors.py +++ b/pyclowder/connectors.py @@ -357,6 +357,10 @@ def _prepare_dataset(self, host, secret_key, resource): # If we didn't find any files locally, download dataset .zip as normal else: + file_paths = [] + tmp_files_created = [] + tmp_dirs_created = [] + logger.info("we found no files") try: inputzip = pyclowder.datasets.download(self, host, secret_key, resource["id"]) file_paths = pyclowder.utils.extract_zip_contents(inputzip) @@ -377,7 +381,7 @@ def _process_message(self, body): file should be downloaded. Finally it will call the actual process_message function. """ - d + logger = logging.getLogger(__name__) emailaddrlist = None if body.get('notifies'): emailaddrlist = body.get('notifies') From 72cc128eda08ddb17c320c625c5de13abb9fb87c Mon Sep 17 00:00:00 2001 From: toddn Date: Fri, 27 Aug 2021 11:16:37 -0500 Subject: [PATCH 3/7] variables do not need to be declared twice --- pyclowder/connectors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyclowder/connectors.py b/pyclowder/connectors.py index ff012ac..93d189a 100644 --- a/pyclowder/connectors.py +++ b/pyclowder/connectors.py @@ -357,9 +357,6 @@ def _prepare_dataset(self, host, secret_key, resource): # If we didn't find any files locally, download dataset .zip as normal else: - file_paths = [] - tmp_files_created = [] - tmp_dirs_created = [] logger.info("we found no files") try: inputzip = pyclowder.datasets.download(self, host, secret_key, resource["id"]) From e1f08dc343121e7e901e0887529145d495398f6b Mon Sep 17 00:00:00 2001 From: toddn Date: Fri, 27 Aug 2021 13:47:46 -0500 Subject: [PATCH 4/7] only declaring file_paths - other return values already defined so will be in scope for return --- pyclowder/connectors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyclowder/connectors.py b/pyclowder/connectors.py index 93d189a..78db903 100644 --- a/pyclowder/connectors.py +++ b/pyclowder/connectors.py @@ -296,6 +296,8 @@ def _prepare_dataset(self, host, secret_key, resource): logger = logging.getLogger(__name__) + file_paths = [] + located_files = [] missing_files = [] tmp_files_created = [] From 1f1254a5d36eef8923d81dd4793fc19f32095231 Mon Sep 17 00:00:00 2001 From: Rob Kooper Date: Tue, 22 Feb 2022 17:02:19 -0600 Subject: [PATCH 5/7] remove some empty lines log exceptin when downloading dataset --- pyclowder/connectors.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pyclowder/connectors.py b/pyclowder/connectors.py index 78db903..7d95a26 100644 --- a/pyclowder/connectors.py +++ b/pyclowder/connectors.py @@ -293,11 +293,9 @@ def _download_file_metadata(self, host, secret_key, fileid, filepath): return (md_dir, md_file) def _prepare_dataset(self, host, secret_key, resource): - logger = logging.getLogger(__name__) file_paths = [] - located_files = [] missing_files = [] tmp_files_created = [] @@ -359,15 +357,13 @@ def _prepare_dataset(self, host, secret_key, resource): # If we didn't find any files locally, download dataset .zip as normal else: - logger.info("we found no files") try: inputzip = pyclowder.datasets.download(self, host, secret_key, resource["id"]) file_paths = pyclowder.utils.extract_zip_contents(inputzip) tmp_files_created += file_paths tmp_files_created.append(inputzip) except Exception as e: - logger.debug("No files found and download failed") - logger.debug(e) + logger.exception("No files found and download failed") return (file_paths, tmp_files_created, tmp_dirs_created) From c9356916a32598c97c7091843875f050817b3478 Mon Sep 17 00:00:00 2001 From: toddn Date: Tue, 22 Feb 2022 17:05:15 -0600 Subject: [PATCH 6/7] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 627ae1a..73811d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## 2.4.1 - 2021-07-21 +### Fixed +- extractor would fail on empty dataset download [#36](https://github.com/clowder-framework/pyclowder/issues/36) + ### Added - Add `--max_retry` CLI flag and `CLOWDER_MAX_RETRY` environment variable. From 0fc97a03a6c237efbe6539a59a383ca838c11888 Mon Sep 17 00:00:00 2001 From: toddn Date: Tue, 22 Feb 2022 17:07:09 -0600 Subject: [PATCH 7/7] update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73811d6..2ad17d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased +### Fixed +- extractor would fail on empty dataset download [#36](https://github.com/clowder-framework/pyclowder/issues/36) + ### Added - ability to set the heartbeat for an extractractor [#42](https://github.com/clowder-framework/pyclowder/issues/42) @@ -15,9 +18,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## 2.4.1 - 2021-07-21 -### Fixed -- extractor would fail on empty dataset download [#36](https://github.com/clowder-framework/pyclowder/issues/36) - ### Added - Add `--max_retry` CLI flag and `CLOWDER_MAX_RETRY` environment variable.