Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion monai/apps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from __future__ import annotations

import hashlib
import json
import logging
import os
import shutil
Expand All @@ -24,7 +25,7 @@
from typing import TYPE_CHECKING, Any
from urllib.error import ContentTooShortError, HTTPError, URLError
from urllib.parse import urlparse
from urllib.request import urlretrieve
from urllib.request import urlopen, urlretrieve

from monai.config.type_definitions import PathLike
from monai.utils import look_up_option, min_version, optional_import
Expand Down Expand Up @@ -203,6 +204,17 @@ def download_url(
if not has_gdown:
raise RuntimeError("To download files from Google Drive, please install the gdown dependency.")
gdown.download(url, f"{tmp_name}", quiet=not progress, **gdown_kwargs)
elif urlparse(url).netloc == "cloud-api.yandex.net":
with urlopen(url) as response:
code = response.getcode()
if code == 200:
download_url = json.loads(response.read())["href"]
_download_with_progress(download_url, tmp_name, progress=progress)
else:
raise RuntimeError(
f"Download of file from {download_url}, received from {url} "
+ f" to {filepath} failed due to network issue or denied permission."
)
else:
_download_with_progress(url, tmp_name, progress=progress)
if not tmp_name.exists():
Expand Down
43 changes: 43 additions & 0 deletions tests/test_download_url_yandex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import os
import tempfile
import unittest
from urllib.error import HTTPError

from monai.apps.utils import download_url

YANDEX_MODEL_URL = (
"https://cloud-api.yandex.net/v1/disk/public/resources/download?"
"public_key=https%3A%2F%2Fdisk.yandex.ru%2Fd%2Fxs0gzlj2_irgWA"
Copy link
Contributor

@wyli wyli Jun 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test will run regularly by testing pipelines, do you think this URL and the file will be available all the time for those pipelines? @blacky-i (currently removed 7cb71a0)

Copy link
Contributor Author

@blacky-i blacky-i Jun 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two risks in this url:

  1. API deprecation (which is very unlikely)
  2. This little file is on my personal yandex disk, I will not delete it.

Copy link
Contributor

@wyli wyli Jun 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, it seems when there are concurrent downloadings from multiple jobs, it becomes unavailable, for example:

https://github.com/Project-MONAI/MONAI/actions/runs/5403919298/jobs/9817414158

======================================================================
ERROR: test_verify (tests.test_download_url_yandex.TestDownloadUrlYandex)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/runner/work/MONAI/MONAI/tests/test_download_url_yandex.py", line 34, in test_verify
    download_url(url=YANDEX_MODEL_URL, filepath=os.path.join(tempdir, "model.pt"))
  File "/home/runner/work/MONAI/MONAI/monai/apps/utils.py", line 208, in download_url
    with urlopen(url) as response:
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/hostedtoolcache/Python/3.8.17/x64/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: NOT FOUND

----------------------------------------------------------------------

anyway I think it's now skipped and is not a critical issue because it'll not impact the other downloading capability..

)
YANDEX_MODEL_FLAWED_URL = (
"https://cloud-api.yandex.net/v1/disk/public/resources/download?"
"public_key=https%3A%2F%2Fdisk.yandex.ru%2Fd%2Fxs0gzlj2_irgWA-url-with-error"
)


class TestDownloadUrlYandex(unittest.TestCase):
def test_verify(self):
with tempfile.TemporaryDirectory() as tempdir:
download_url(url=YANDEX_MODEL_URL, filepath=os.path.join(tempdir, "model.pt"))

def test_verify_error(self):
with tempfile.TemporaryDirectory() as tempdir:
with self.assertRaises(HTTPError):
download_url(url=YANDEX_MODEL_FLAWED_URL, filepath=os.path.join(tempdir, "model.pt"))


if __name__ == "__main__":
unittest.main()