From 798feb4f8f807b312eb2e1232f67da5d2860fbfe Mon Sep 17 00:00:00 2001 From: Robert Van Wesep Date: Sat, 15 May 2021 16:31:28 -0700 Subject: [PATCH 1/2] config, remote: Made S3 CA bundle customizable botocore allows a path to a custom CA bundle either by passing a path to the CA bundle file into the verify argument of boto3.session.Session.client or passing None (the default) which will fall back to the AWS config. Previously, the DVC config only accepted a boolean into the ssl_verify option in the remote S3 config. This changes the DVC config to accept both string and None in addition to boolean and defaults to None. I also changed the default for ssl_verfiy to None in BaseS3FileSystem. Thus, if ssl_verify is not provided, botocore will fall back to the AWS config. Testing Unit tests to cover the changes to the config schema and addition ssl_verify types that will be passed into S3FileSystem. Also, ran dvc push -r object-store data/cifar-10-python.tar.gz in my work environment that has a private S3 endpoint that requires a custom CA bundle, both with and without ssl_verify specified in the config. This was successful, showing that communication could be established. And I ran dvc remote modify object-store ssl_verify "$HOME/.aws/cabundle.pem" and confirmed that the custom CA bundle path was added to the config. Fixes #6012 --- dvc/config_schema.py | 2 +- dvc/fs/s3.py | 2 +- tests/unit/fs/test_s3.py | 33 ++++++++++++++++++++++++++++++++- tests/unit/test_config.py | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 19bec8d1d7..dcafe5c5cf 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -145,7 +145,7 @@ class RelPath(str): "session_token": str, Optional("listobjects", default=False): Bool, # obsoleted Optional("use_ssl", default=True): Bool, - Optional("ssl_verify", default=True): Bool, + Optional("ssl_verify", default=None): Any(Bool, str, None), "sse": str, "sse_kms_key_id": str, "acl": str, diff --git a/dvc/fs/s3.py b/dvc/fs/s3.py index 0a6b6165cd..b14f750802 100644 --- a/dvc/fs/s3.py +++ b/dvc/fs/s3.py @@ -102,7 +102,7 @@ def _prepare_credentials(self, **config): client = login_info["client_kwargs"] client["region_name"] = config.get("region") client["endpoint_url"] = config.get("endpointurl") - client["verify"] = config.get("ssl_verify", True) + client["verify"] = config.get("ssl_verify") # encryptions additional = login_info["s3_additional_kwargs"] diff --git a/tests/unit/fs/test_s3.py b/tests/unit/fs/test_s3.py index ec323753bf..cccdfc1bf0 100644 --- a/tests/unit/fs/test_s3.py +++ b/tests/unit/fs/test_s3.py @@ -39,7 +39,15 @@ def test_verify_ssl_default_param(dvc): } fs = S3FileSystem(**config) - assert fs.fs_args["client_kwargs"]["verify"] + assert "client_kwargs" not in fs.fs_args + + config = { + "url": url, + "endpointurl": "https://my.custom.s3:1234", + } + fs = S3FileSystem(**config) + + assert "verify" not in fs.fs_args["client_kwargs"] def test_s3_config_credentialpath(dvc, monkeypatch): @@ -74,6 +82,29 @@ def test_ssl_verify_bool_param(dvc): assert fs.fs_args["client_kwargs"]["verify"] == config["ssl_verify"] +def test_ssl_verify_path_param(dvc): + config = {"url": url, "ssl_verify": "/path/to/custom/cabundle.pem"} + fs = S3FileSystem(**config) + + assert fs.fs_args["client_kwargs"]["verify"] == config["ssl_verify"] + + +def test_ssl_verify_none_param(dvc): + config = {"url": url, "ssl_verify": None} + fs = S3FileSystem(**config) + + assert "client_kwargs" not in fs.fs_args + + config = { + "url": url, + "endpointurl": "https://my.custom.s3:1234", + "ssl_verify": None, + } + fs = S3FileSystem(**config) + + assert "verify" not in fs.fs_args["client_kwargs"] + + def test_grants(dvc): config = { "url": url, diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 4e8164f4ba..dbd77f1a1c 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,4 +1,5 @@ import os +import textwrap import pytest @@ -33,3 +34,39 @@ def test_get_fs(tmp_dir, scm): assert config._get_fs("local") == config.wfs assert config._get_fs("global") == config.wfs assert config._get_fs("system") == config.wfs + + +def test_s3_ssl_verify(tmp_dir, dvc): + config = Config(validate=False) + with config.edit() as conf: + conf["remote"]["remote-name"] = {"url": "s3://bucket/dvc"} + + assert config["remote"]["remote-name"]["ssl_verify"] is None + + with config.edit() as conf: + section = conf["remote"]["remote-name"] + section["ssl_verify"] = False + + assert (tmp_dir / ".dvc" / "config").read_text() == textwrap.dedent( + """\ + [core] + no_scm = True + ['remote "remote-name"'] + url = s3://bucket/dvc + ssl_verify = False + """ + ) + + with config.edit() as conf: + section = conf["remote"]["remote-name"] + section["ssl_verify"] = "/path/to/custom/cabundle.pem" + + assert (tmp_dir / ".dvc" / "config").read_text() == textwrap.dedent( + """\ + [core] + no_scm = True + ['remote "remote-name"'] + url = s3://bucket/dvc + ssl_verify = /path/to/custom/cabundle.pem + """ + ) From ccd4461bab3c770b25817493bb2ea37debc8e650 Mon Sep 17 00:00:00 2001 From: Robert Van Wesep Date: Wed, 19 May 2021 18:14:44 -0700 Subject: [PATCH 2/2] Removed default None on ssl_verify Responding to PR comment, removed the Optional, default None on ssl_verify since the config keys are optional by default. Rather than a missing ssl_verify producing a None that eventually gets filtered, it doesn't appear in the parsed config in the first place. --- dvc/config_schema.py | 2 +- tests/unit/test_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/config_schema.py b/dvc/config_schema.py index dcafe5c5cf..abfd7c34c4 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -145,7 +145,7 @@ class RelPath(str): "session_token": str, Optional("listobjects", default=False): Bool, # obsoleted Optional("use_ssl", default=True): Bool, - Optional("ssl_verify", default=None): Any(Bool, str, None), + "ssl_verify": Any(Bool, str), "sse": str, "sse_kms_key_id": str, "acl": str, diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index dbd77f1a1c..a12a9992e8 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -41,7 +41,7 @@ def test_s3_ssl_verify(tmp_dir, dvc): with config.edit() as conf: conf["remote"]["remote-name"] = {"url": "s3://bucket/dvc"} - assert config["remote"]["remote-name"]["ssl_verify"] is None + assert "ssl_verify" not in config["remote"]["remote-name"] with config.edit() as conf: section = conf["remote"]["remote-name"]