From 1f6714b025dc9720692a05975a068f8ca3e3ff51 Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Wed, 15 Aug 2018 12:52:45 -0700 Subject: [PATCH 1/6] Adding kerberos support for Presto --- dev_requirements.txt | 1 + pyhive/presto.py | 52 +++++++++++++++++++++++++++++++++++--------- setup.py | 3 ++- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 73c419be..0bf6d8a7 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -10,6 +10,7 @@ pytest-timeout==1.2.0 # actual dependencies: let things break if a package changes requests>=1.0.0 +requests_kerberos>=0.12.0 sasl>=0.2.1 thrift>=0.10.0 #thrift_sasl>=0.1.0 diff --git a/pyhive/presto.py b/pyhive/presto.py index e90d5bfa..38908319 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -18,6 +18,8 @@ import logging import requests from requests.auth import HTTPBasicAuth +from requests_kerberos import HTTPKerberosAuth, OPTIONAL +import os try: # Python 3 import urllib.parse as urlparse @@ -80,7 +82,10 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, requests_session=None, requests_kwargs=None): + protocol='http', password=None, KerberosRemoteServiceName=None, + KerberosPrincipal=None, KerberosConfigPath=None, KerberosKeytabPath=None, + KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None, + requests_session=None, requests_kwargs=None): """ :param host: hostname to connect to, e.g. ``presto.example.com`` :param port: int -- port, defaults to 8080 @@ -96,6 +101,15 @@ def __init__(self, host, port='8080', username=None, catalog='hive', Using BasicAuth, requires ``https``. Prefer ``requests_kwargs={'auth': HTTPBasicAuth(username, password)}``. May not be specified with ``requests_kwargs['auth']``. + :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. + This parameter is required for Kerberos authentiation. + :param KerberosPrincipal: string -- The principal to use when authenticating to the Presto coordinator. + :param KerberosConfigPath: string -- Kerberos configuration file. (default: /etc/krb5.conf) + :param KerberosKeytabPath: string -- Kerberos keytab file. + :param KerberosCredentialCachePath: string -- Kerberos credential cache. + :param KerberosUseCanonicalHostname: boolean -- Use the canonical hostname of the Presto coordinator for the + Kerberos service principal by first resolving the hostname to an IP address and then doing a reverse DNS + lookup for that IP address. This is enabled by default. :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this class will use the default requests behavior of making a new session per HTTP request. Caller is responsible for closing session. @@ -120,15 +134,33 @@ class will use the default requests behavior of making a new session per HTTP re self._requests_session = requests_session or requests requests_kwargs = dict(requests_kwargs) if requests_kwargs is not None else {} - if password is not None and 'auth' in requests_kwargs: - raise ValueError("Cannot use both password and requests_kwargs authentication") - for k in ('method', 'url', 'data', 'headers'): - if k in requests_kwargs: - raise ValueError("Cannot override requests argument {}".format(k)) - if password is not None: - requests_kwargs['auth'] = HTTPBasicAuth(username, password) - if protocol != 'https': - raise ValueError("Protocol must be https when passing a password") + + if KerberosRemoteServiceName is not None: + hostname_override = None + if KerberosUseCanonicalHostname is not None and KerberosUseCanonicalHostname.lower() == 'false': + hostname_override = host + if KerberosConfigPath is not None: + os.environ['KRB5_CONFIG'] = KerberosConfigPath + if KerberosKeytabPath is not None: + os.environ['KRB5_CLIENT_KTNAME'] = KerberosKeytabPath + if KerberosCredentialCachePath is not None: + os.environ['KRB5CCNAME'] = KerberosCredentialCachePath + + requests_kwargs['auth'] = HTTPKerberosAuth(mutual_authentication=OPTIONAL, + principal=KerberosPrincipal, + service=KerberosRemoteServiceName, + hostname_override=hostname_override) + + else: + if password is not None and 'auth' in requests_kwargs: + raise ValueError("Cannot use both password and requests_kwargs authentication") + for k in ('method', 'url', 'data', 'headers'): + if k in requests_kwargs: + raise ValueError("Cannot override requests argument {}".format(k)) + if password is not None: + requests_kwargs['auth'] = HTTPBasicAuth(username, password) + if protocol != 'https': + raise ValueError("Protocol must be https when passing a password") self._requests_kwargs = requests_kwargs self._reset_state() diff --git a/setup.py b/setup.py index e4e6b1cc..0bf173bb 100755 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def run_tests(self): 'python-dateutil', ], extras_require={ - 'presto': ['requests>=1.0.0'], + 'presto': ['requests>=1.0.0', 'requests_kerberos>=0.12.0'], 'hive': ['sasl>=0.2.1', 'thrift>=0.10.0', 'thrift_sasl>=0.1.0'], 'sqlalchemy': ['sqlalchemy>=0.8.7'], }, @@ -52,6 +52,7 @@ def run_tests(self): 'pytest', 'pytest-cov', 'requests>=1.0.0', + 'requests_kerberos>=0.12.0', 'sasl>=0.2.1', 'sqlalchemy>=0.12.0', 'thrift>=0.10.0', From dc6c8bf29dd7d0f9effb832a7a00404e0f5f9ef2 Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Wed, 15 Aug 2018 13:43:40 -0700 Subject: [PATCH 2/6] Improving formatting of presto.py --- pyhive/presto.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pyhive/presto.py b/pyhive/presto.py index 38908319..e95f61f4 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -103,13 +103,16 @@ def __init__(self, host, port='8080', username=None, catalog='hive', May not be specified with ``requests_kwargs['auth']``. :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. This parameter is required for Kerberos authentiation. - :param KerberosPrincipal: string -- The principal to use when authenticating to the Presto coordinator. - :param KerberosConfigPath: string -- Kerberos configuration file. (default: /etc/krb5.conf) + :param KerberosPrincipal: string -- The principal to use when authenticating to + the Presto coordinator. + :param KerberosConfigPath: string -- Kerberos configuration file. + (default: /etc/krb5.conf) :param KerberosKeytabPath: string -- Kerberos keytab file. :param KerberosCredentialCachePath: string -- Kerberos credential cache. - :param KerberosUseCanonicalHostname: boolean -- Use the canonical hostname of the Presto coordinator for the - Kerberos service principal by first resolving the hostname to an IP address and then doing a reverse DNS - lookup for that IP address. This is enabled by default. + :param KerberosUseCanonicalHostname: boolean -- Use the canonical hostname of the + Presto coordinator for the Kerberos service principal by first resolving the + hostname to an IP address and then doing a reverse DNS lookup for that IP address. + This is enabled by default. :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this class will use the default requests behavior of making a new session per HTTP request. Caller is responsible for closing session. @@ -137,7 +140,8 @@ class will use the default requests behavior of making a new session per HTTP re if KerberosRemoteServiceName is not None: hostname_override = None - if KerberosUseCanonicalHostname is not None and KerberosUseCanonicalHostname.lower() == 'false': + if KerberosUseCanonicalHostname is not None \ + and KerberosUseCanonicalHostname.lower() == 'false': hostname_override = host if KerberosConfigPath is not None: os.environ['KRB5_CONFIG'] = KerberosConfigPath From e210e39b8ef707d2a890e205fab43a008c079f6d Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Mon, 4 Mar 2019 14:09:31 -0800 Subject: [PATCH 3/6] making it backward compatible by incorporating review comments and some minor changes --- pyhive/presto.py | 17 +++++++++-------- setup.py | 3 ++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pyhive/presto.py b/pyhive/presto.py index e95f61f4..40cacd99 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -82,10 +82,11 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, KerberosRemoteServiceName=None, - KerberosPrincipal=None, KerberosConfigPath=None, KerberosKeytabPath=None, - KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None, - requests_session=None, requests_kwargs=None): + protocol='http', password=None, requests_session=None, requests_kwargs=None, + KerberosRemoteServiceName=None, KerberosPrincipal=None, + KerberosConfigPath=None, KerberosKeytabPath=None, + KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None + ): """ :param host: hostname to connect to, e.g. ``presto.example.com`` :param port: int -- port, defaults to 8080 @@ -101,6 +102,10 @@ def __init__(self, host, port='8080', username=None, catalog='hive', Using BasicAuth, requires ``https``. Prefer ``requests_kwargs={'auth': HTTPBasicAuth(username, password)}``. May not be specified with ``requests_kwargs['auth']``. + :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this + class will use the default requests behavior of making a new session per HTTP request. + Caller is responsible for closing session. + :param requests_kwargs: Additional ``**kwargs`` to pass to requests :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. This parameter is required for Kerberos authentiation. :param KerberosPrincipal: string -- The principal to use when authenticating to @@ -113,10 +118,6 @@ def __init__(self, host, port='8080', username=None, catalog='hive', Presto coordinator for the Kerberos service principal by first resolving the hostname to an IP address and then doing a reverse DNS lookup for that IP address. This is enabled by default. - :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this - class will use the default requests behavior of making a new session per HTTP request. - Caller is responsible for closing session. - :param requests_kwargs: Additional ``**kwargs`` to pass to requests """ super(Cursor, self).__init__(poll_interval) # Config diff --git a/setup.py b/setup.py index 0bf173bb..f9c78895 100755 --- a/setup.py +++ b/setup.py @@ -43,9 +43,10 @@ def run_tests(self): 'python-dateutil', ], extras_require={ - 'presto': ['requests>=1.0.0', 'requests_kerberos>=0.12.0'], + 'presto': ['requests>=1.0.0'], 'hive': ['sasl>=0.2.1', 'thrift>=0.10.0', 'thrift_sasl>=0.1.0'], 'sqlalchemy': ['sqlalchemy>=0.8.7'], + 'kerberos': ['requests_kerberos>=0.12.0'], }, tests_require=[ 'mock>=1.0.0', From e54045fc8bf3f5fbb729e0db52c6d9ffb05ee5d6 Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Mon, 4 Mar 2019 14:57:42 -0800 Subject: [PATCH 4/6] moving request_kwargs at the end --- pyhive/presto.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyhive/presto.py b/pyhive/presto.py index 40cacd99..49f71d8d 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -82,10 +82,11 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, requests_session=None, requests_kwargs=None, + protocol='http', password=None, requests_session=None, KerberosRemoteServiceName=None, KerberosPrincipal=None, KerberosConfigPath=None, KerberosKeytabPath=None, - KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None + KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None, + requests_kwargs=None ): """ :param host: hostname to connect to, e.g. ``presto.example.com`` @@ -105,7 +106,6 @@ def __init__(self, host, port='8080', username=None, catalog='hive', :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this class will use the default requests behavior of making a new session per HTTP request. Caller is responsible for closing session. - :param requests_kwargs: Additional ``**kwargs`` to pass to requests :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. This parameter is required for Kerberos authentiation. :param KerberosPrincipal: string -- The principal to use when authenticating to @@ -118,6 +118,7 @@ class will use the default requests behavior of making a new session per HTTP re Presto coordinator for the Kerberos service principal by first resolving the hostname to an IP address and then doing a reverse DNS lookup for that IP address. This is enabled by default. + :param requests_kwargs: Additional ``**kwargs`` to pass to requests """ super(Cursor, self).__init__(poll_interval) # Config From 5453412c2663c3baf74bca6c6d7d5ecb0871c146 Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Mon, 4 Mar 2019 16:10:53 -0800 Subject: [PATCH 5/6] Fixing missing argument error --- pyhive/presto.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pyhive/presto.py b/pyhive/presto.py index 49f71d8d..e45d6572 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -82,12 +82,10 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, requests_session=None, + protocol='http', password=None, requests_session=None,requests_kwargs=None, KerberosRemoteServiceName=None, KerberosPrincipal=None, KerberosConfigPath=None, KerberosKeytabPath=None, - KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None, - requests_kwargs=None - ): + KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None): """ :param host: hostname to connect to, e.g. ``presto.example.com`` :param port: int -- port, defaults to 8080 @@ -106,6 +104,7 @@ def __init__(self, host, port='8080', username=None, catalog='hive', :param requests_session: a ``requests.Session`` object for advanced usage. If absent, this class will use the default requests behavior of making a new session per HTTP request. Caller is responsible for closing session. + :param requests_kwargs: Additional ``**kwargs`` to pass to requests :param KerberosRemoteServiceName: string -- Presto coordinator Kerberos service name. This parameter is required for Kerberos authentiation. :param KerberosPrincipal: string -- The principal to use when authenticating to @@ -118,7 +117,6 @@ class will use the default requests behavior of making a new session per HTTP re Presto coordinator for the Kerberos service principal by first resolving the hostname to an IP address and then doing a reverse DNS lookup for that IP address. This is enabled by default. - :param requests_kwargs: Additional ``**kwargs`` to pass to requests """ super(Cursor, self).__init__(poll_interval) # Config From c2bde77efc7881be100e949a360a46466d34f18c Mon Sep 17 00:00:00 2001 From: Shashwat Arghode Date: Mon, 4 Mar 2019 16:11:13 -0800 Subject: [PATCH 6/6] Fixing missing argument error --- pyhive/presto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyhive/presto.py b/pyhive/presto.py index e45d6572..cc84bf80 100644 --- a/pyhive/presto.py +++ b/pyhive/presto.py @@ -82,7 +82,7 @@ class Cursor(common.DBAPICursor): def __init__(self, host, port='8080', username=None, catalog='hive', schema='default', poll_interval=1, source='pyhive', session_props=None, - protocol='http', password=None, requests_session=None,requests_kwargs=None, + protocol='http', password=None, requests_session=None, requests_kwargs=None, KerberosRemoteServiceName=None, KerberosPrincipal=None, KerberosConfigPath=None, KerberosKeytabPath=None, KerberosCredentialCachePath=None, KerberosUseCanonicalHostname=None):