From d40a3d504e21c816ee9e9a617acb826b873fa71b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 11:48:44 +0100 Subject: [PATCH 01/13] Update MongoDB connection related code to handle various SSL related connection errors better. Without specifying a lower serverSelectionTimeoutMs value, the client would wait up to 30 seconds (default value) in case there are SSL errors (e.g. handshake failed or similar). With lower timeout we ensure a faster failure on fatal errors. --- st2common/st2common/models/db/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/st2common/st2common/models/db/__init__.py b/st2common/st2common/models/db/__init__.py index 31c4abc1d4..09b3310600 100644 --- a/st2common/st2common/models/db/__init__.py +++ b/st2common/st2common/models/db/__init__.py @@ -25,6 +25,7 @@ from pymongo import uri_parser from pymongo.errors import OperationFailure from pymongo.errors import ConnectionFailure +from pymongo.errors import ServerSelectionTimeoutError from st2common import log as logging from st2common.util import isotime @@ -122,9 +123,13 @@ def _db_connect(db_name, db_host, db_port, username=None, password=None, authentication_mechanism=authentication_mechanism, ssl_match_hostname=ssl_match_hostname) + # NOTE: We intentionally set "serverSelectionTimeoutMS" to 5 seconds. By default it's set to + # 30 seconds, which means it will block up to 30 seconds and fail if there are any SSL related + # errors connection = mongoengine.connection.connect(db_name, host=db_host, port=db_port, tz_aware=True, username=username, password=password, + serverSelectionTimeoutMS=5000, **ssl_kwargs) # NOTE: Since pymongo 3.0, connect() method is lazy and not blocking (always returns success) @@ -134,7 +139,10 @@ def _db_connect(db_name, db_host, db_port, username=None, password=None, try: # The ismaster command is cheap and does not require auth connection.admin.command('ismaster') - except ConnectionFailure as e: + except (ConnectionFailure, ServerSelectionTimeoutError) as e: + # NOTE: ServerSelectionTimeoutError can also be thrown if SSLHandShake fails in the server + # Sadly the client doesn't include more information about the error so in such scenarios + # user needs to check MongoDB server log LOG.error('Failed to connect to database "%s" @ "%s" as user "%s": %s' % (db_name, host_string, str(username_string), six.text_type(e))) raise e From b652442d7b4cff2e71cce08cbb702c28660c8603 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:04:35 +0100 Subject: [PATCH 02/13] Fix a bug in st2api and st2auth and make sure we perform monkey patching as early as possible. This important, because if we don't do it early enough and "ssl" module is imported before monkey patching is performed, SSL support for MongoDB won't work. Fixes issue reported in https://github.com/StackStorm/st2/issues/4832. --- st2api/st2api/app.py | 11 ++++------- st2api/st2api/cmd/api.py | 10 +++++++--- st2api/st2api/wsgi.py | 14 ++++++++++++++ st2auth/st2auth/app.py | 11 ++++------- st2auth/st2auth/wsgi.py | 10 ++++++++++ 5 files changed, 39 insertions(+), 17 deletions(-) diff --git a/st2api/st2api/app.py b/st2api/st2api/app.py index ff6bd37f37..2ad943d5c3 100644 --- a/st2api/st2api/app.py +++ b/st2api/st2api/app.py @@ -24,7 +24,6 @@ from st2common.middleware.instrumentation import RequestInstrumentationMiddleware from st2common.middleware.instrumentation import ResponseInstrumentationMiddleware from st2common.router import Router -from st2common.util.monkey_patch import monkey_patch from st2common.constants.system import VERSION_STRING from st2common.service_setup import setup as common_setup from st2common.util import spec_loader @@ -33,16 +32,14 @@ LOG = logging.getLogger(__name__) -def setup_app(config={}): +def setup_app(config=None): + config = config or {} + LOG.info('Creating st2api: %s as OpenAPI app.', VERSION_STRING) is_gunicorn = config.get('is_gunicorn', False) if is_gunicorn: - # Note: We need to perform monkey patching in the worker. If we do it in - # the master process (gunicorn_config.py), it breaks tons of things - # including shutdown - monkey_patch() - + # NOTE: We only want to perform this logic in the WSGI worker st2api_config.register_opts() capabilities = { 'name': 'api', diff --git a/st2api/st2api/cmd/api.py b/st2api/st2api/cmd/api.py index 15f2f3499a..dcdf3e3396 100644 --- a/st2api/st2api/cmd/api.py +++ b/st2api/st2api/cmd/api.py @@ -15,6 +15,13 @@ import os import sys +# NOTE: It's important that we perform monkey patch as early as possible before any other modules +# are important, otherwise SSL support for MongoDB won't work. +# See https://github.com/StackStorm/st2/issues/4832 and https://github.com/gevent/gevent/issues/1016 +# for details. +from st2common.util.monkey_patch import monkey_patch +monkey_patch() + import eventlet from oslo_config import cfg from eventlet import wsgi @@ -22,7 +29,6 @@ from st2common import log as logging from st2common.service_setup import setup as common_setup from st2common.service_setup import teardown as common_teardown -from st2common.util.monkey_patch import monkey_patch from st2api import config config.register_opts() from st2api import app @@ -33,8 +39,6 @@ 'main' ] -monkey_patch() - LOG = logging.getLogger(__name__) # How much time to give to the request in progress to finish in seconds before killing them diff --git a/st2api/st2api/wsgi.py b/st2api/st2api/wsgi.py index 3492f3fd95..faa5afc60a 100644 --- a/st2api/st2api/wsgi.py +++ b/st2api/st2api/wsgi.py @@ -12,8 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +WSGI entry point used by gunicorn. +""" + import os +from st2common.util.monkey_patch import monkey_patch +# Note: We need to perform monkey patching in the worker. If we do it in +# the master process (gunicorn_config.py), it breaks tons of things +# including shutdown +# NOTE: It's important that we perform monkey patch as early as possible before any other modules +# are important, otherwise SSL support for MongoDB won't work. +# See https://github.com/StackStorm/st2/issues/4832 and https://github.com/gevent/gevent/issues/1016 +# for details. +monkey_patch() + from st2api import app config = { diff --git a/st2auth/st2auth/app.py b/st2auth/st2auth/app.py index a8858e6ae3..6f3f81803e 100644 --- a/st2auth/st2auth/app.py +++ b/st2auth/st2auth/app.py @@ -22,7 +22,6 @@ from st2common.middleware.instrumentation import RequestInstrumentationMiddleware from st2common.middleware.instrumentation import ResponseInstrumentationMiddleware from st2common.router import Router -from st2common.util.monkey_patch import monkey_patch from st2common.constants.system import VERSION_STRING from st2common.service_setup import setup as common_setup from st2common.util import spec_loader @@ -32,16 +31,14 @@ LOG = logging.getLogger(__name__) -def setup_app(config={}): +def setup_app(config=None): + config = config or {} + LOG.info('Creating st2auth: %s as OpenAPI app.', VERSION_STRING) is_gunicorn = config.get('is_gunicorn', False) if is_gunicorn: - # Note: We need to perform monkey patching in the worker. If we do it in - # the master process (gunicorn_config.py), it breaks tons of things - # including shutdown - monkey_patch() - + # NOTE: We only want to perform this logic in the WSGI worker st2auth_config.register_opts() capabilities = { 'name': 'auth', diff --git a/st2auth/st2auth/wsgi.py b/st2auth/st2auth/wsgi.py index 0a1914fb42..614a1c4ce7 100644 --- a/st2auth/st2auth/wsgi.py +++ b/st2auth/st2auth/wsgi.py @@ -14,6 +14,16 @@ import os +from st2common.util.monkey_patch import monkey_patch +# Note: We need to perform monkey patching in the worker. If we do it in +# the master process (gunicorn_config.py), it breaks tons of things +# including shutdown +# NOTE: It's important that we perform monkey patch as early as possible before any other modules +# are important, otherwise SSL support for MongoDB won't work. +# See https://github.com/StackStorm/st2/issues/4832 and https://github.com/gevent/gevent/issues/1016 +# for details. +monkey_patch() + from st2auth import app config = { From 3becebf98427f8301c1abd16d3f4dd8c102b707f Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:22:42 +0100 Subject: [PATCH 03/13] Add changelog entry. --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 639d48c58f..b19b739fab 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -91,6 +91,11 @@ Fixed * Update all the various rule criteria comparison operators which also work with strings (equals, icontains, nequals, etc.) to work correctly on Python 3 deployments if one of the operators is of a type bytes and the other is of a type unicode / string. (bug fix) #4831 +* Fix SSL connection support for MongoDB and RabbitMQ which wouldn't work under Python 3 and would + result in cryptic "maximum recursion depth exceeded while calling a Python object" error on + connection failure. (bug fix) #4382 #4384 + + Reported by @alexku7. 3.1.0 - June 27, 2019 --------------------- From 1afecade3f681f32025d798ff7cbb9af4d8f4cc0 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:36:40 +0100 Subject: [PATCH 04/13] Add changelog entry. --- CHANGELOG.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b19b739fab..53b8c438fd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -47,6 +47,10 @@ Changed function must be called separately. (improvement) * Update various internal dependencies to latest stable versions (cryptography, jinja2, requests, apscheduler, eventlet, amqp, kombu, semver, six) #4819 (improvement) +* Improve MongoDB connection timeout related code. Connection and server selection timeout is now + set to 3 seconds. Previously a default value of 30 seconds was used which means that for many + connection related errors, our code would first wait for this timeout to be reached (30 seconds) + before returning error to the end user. #4384 Fixed ~~~~~ From e97954981634ab35c063b6d811580d6e8bd01559 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:54:07 +0100 Subject: [PATCH 05/13] Make it configurable via st2.conf config option. --- st2common/st2common/config.py | 3 +++ st2common/st2common/models/db/__init__.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/st2common/st2common/config.py b/st2common/st2common/config.py index 4b89c08650..e0cbb92278 100644 --- a/st2common/st2common/config.py +++ b/st2common/st2common/config.py @@ -176,6 +176,9 @@ def register_opts(ignore_errors=False): cfg.StrOpt( 'password', help='password for db login'), + cfg.IntOpt( + 'connection_timeout', default=3 * 1000, + help='Connection and server selection timeout (in ms).'), cfg.IntOpt( 'connection_retry_max_delay_m', default=3, help='Connection retry total time (minutes).'), diff --git a/st2common/st2common/models/db/__init__.py b/st2common/st2common/models/db/__init__.py index 09b3310600..9137978c7b 100644 --- a/st2common/st2common/models/db/__init__.py +++ b/st2common/st2common/models/db/__init__.py @@ -20,6 +20,7 @@ import ssl as ssl_lib import six +from oslo_config import cfg import mongoengine from mongoengine.queryset import visitor from pymongo import uri_parser @@ -123,13 +124,14 @@ def _db_connect(db_name, db_host, db_port, username=None, password=None, authentication_mechanism=authentication_mechanism, ssl_match_hostname=ssl_match_hostname) - # NOTE: We intentionally set "serverSelectionTimeoutMS" to 5 seconds. By default it's set to + # NOTE: We intentionally set "serverSelectionTimeoutMS" to 3 seconds. By default it's set to # 30 seconds, which means it will block up to 30 seconds and fail if there are any SSL related - # errors + # or other errors + connection_timeout = cfg.CONF.database.connection_timeout connection = mongoengine.connection.connect(db_name, host=db_host, port=db_port, tz_aware=True, username=username, password=password, - serverSelectionTimeoutMS=5000, + serverSelectionTimeoutMS=connection_timeout, **ssl_kwargs) # NOTE: Since pymongo 3.0, connect() method is lazy and not blocking (always returns success) From 69f04b1fbada61b29b3af52d424844a381979529 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:54:38 +0100 Subject: [PATCH 06/13] Add a test case for it. --- st2common/tests/unit/test_db.py | 45 ++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/st2common/tests/unit/test_db.py b/st2common/tests/unit/test_db.py index 462ab8fa3e..638558e3e8 100644 --- a/st2common/tests/unit/test_db.py +++ b/st2common/tests/unit/test_db.py @@ -15,12 +15,16 @@ from __future__ import absolute_import import ssl +import time import jsonschema import mock import mongoengine.connection +from mongoengine.connection import disconnect from oslo_config import cfg from pymongo.errors import ConnectionFailure +from pymongo.errors import ServerSelectionTimeoutError + from st2common.constants.triggers import TRIGGER_INSTANCE_PROCESSED from st2common.models.system.common import ResourceReference from st2common.transport.publishers import PoolPublisher @@ -86,6 +90,13 @@ def test_index_name_length(self): class DbConnectionTestCase(DbTestCase): + def setUp(self): + # NOTE: It's important we re-establish a connection on each setUp + self.setUpClass() + + def tearDown(self): + # NOTE: It's important we disconnect here otherwise tests will fail + disconnect() def test_check_connect(self): """ @@ -180,7 +191,8 @@ def test_db_setup(self, mock_mongoengine): 'tz_aware': True, 'authentication_mechanism': 'MONGODB-X509', 'ssl': True, - 'ssl_match_hostname': True + 'ssl_match_hostname': True, + 'serverSelectionTimeoutMS': 3000 }) @mock.patch('st2common.models.db.mongoengine') @@ -299,6 +311,37 @@ def test_db_setup_connecting_info_logging(self, mock_log, mock_mongoengine): actual_message = mock_log.error.call_args_list[0][0][0] self.assertEqual(expected_message, actual_message) + def test_db_connect_server_selection_timeout_ssl_on_non_ssl_listener(self): + # Verify that the we wait connection_timeout ms (server selection timeout ms) before failing + # and propagating the error + disconnect() + + db_name = 'st2' + db_host = 'localhost' + db_port = 27017 + + cfg.CONF.set_override(name='connection_timeout', group='database', override=1000) + + start = time.time() + self.assertRaises(ServerSelectionTimeoutError, db_setup, db_name=db_name, db_host=db_host, + db_port=db_port, ssl=True) + end = time.time() + diff = (end - start) + + self.assertTrue(diff >= 1) + + disconnect() + + cfg.CONF.set_override(name='connection_timeout', group='database', override=400) + + start = time.time() + self.assertRaises(ServerSelectionTimeoutError, db_setup, db_name=db_name, db_host=db_host, + db_port=db_port, ssl=True) + end = time.time() + diff = (end - start) + + self.assertTrue(diff >= 0.4) + class DbCleanupTestCase(DbTestCase): ensure_indexes = True From 68b9e613e49be7ebff19835898ecc6e31f0a8268 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 12:55:00 +0100 Subject: [PATCH 07/13] Re-generate the sample config. --- conf/st2.conf.sample | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conf/st2.conf.sample b/conf/st2.conf.sample index 50e8042baf..967ed611fa 100644 --- a/conf/st2.conf.sample +++ b/conf/st2.conf.sample @@ -119,6 +119,8 @@ ssl_ca_certs = None ssl_certfile = None # Connection retry backoff max (seconds). connection_retry_backoff_max_s = 10 +# If True and `ssl_cert_reqs` is not None, enables hostname verification +ssl_match_hostname = True # Specifies whether a certificate is required from the other side of the connection, and whether it will be validated if provided ssl_cert_reqs = None # Create the connection to mongodb using SSL @@ -133,8 +135,8 @@ connection_retry_backoff_mul = 1 authentication_mechanism = None # Private keyfile used to identify the local connection against MongoDB. ssl_keyfile = None -# If True and `ssl_cert_reqs` is not None, enables hostname verification -ssl_match_hostname = True +# Connection and server selection timeout (in ms). +connection_timeout = 3000 # password for db login password = None # port of db server From 437350790da195414c533e6e13f160ddba7aa8c3 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 15:03:21 +0100 Subject: [PATCH 08/13] Also pass connectTimeoutMs argument which matches the server selection timeout. --- st2common/st2common/models/db/__init__.py | 1 + st2common/tests/unit/test_db.py | 1 + 2 files changed, 2 insertions(+) diff --git a/st2common/st2common/models/db/__init__.py b/st2common/st2common/models/db/__init__.py index 9137978c7b..d8f6f48257 100644 --- a/st2common/st2common/models/db/__init__.py +++ b/st2common/st2common/models/db/__init__.py @@ -131,6 +131,7 @@ def _db_connect(db_name, db_host, db_port, username=None, password=None, connection = mongoengine.connection.connect(db_name, host=db_host, port=db_port, tz_aware=True, username=username, password=password, + connectTimeoutMS=connection_timeout, serverSelectionTimeoutMS=connection_timeout, **ssl_kwargs) diff --git a/st2common/tests/unit/test_db.py b/st2common/tests/unit/test_db.py index 638558e3e8..af64a915ff 100644 --- a/st2common/tests/unit/test_db.py +++ b/st2common/tests/unit/test_db.py @@ -192,6 +192,7 @@ def test_db_setup(self, mock_mongoengine): 'authentication_mechanism': 'MONGODB-X509', 'ssl': True, 'ssl_match_hostname': True, + 'connectTimeoutMS': 3000, 'serverSelectionTimeoutMS': 3000 }) From 4d12795e6bd864068f584603810d2f834a864fbf Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 15:37:57 +0100 Subject: [PATCH 09/13] Make sure we perform monkey patch before importing ssl module otherwise tests will fail under Python 3. --- st2common/tests/unit/test_db.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/st2common/tests/unit/test_db.py b/st2common/tests/unit/test_db.py index af64a915ff..ae5e781d08 100644 --- a/st2common/tests/unit/test_db.py +++ b/st2common/tests/unit/test_db.py @@ -14,6 +14,11 @@ from __future__ import absolute_import +# NOTE: We need to perform monkeypatch before importing ssl module otherwise tests will fail. +# See https://github.com/StackStorm/st2/pull/4834 for details +from st2common.util.monkey_patch import monkey_patch +monkey_patch() + import ssl import time From a0929d8a5e62bdd739627ec2386d8efa59f5c4f1 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 15:38:50 +0100 Subject: [PATCH 10/13] Update changelog. --- CHANGELOG.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 53b8c438fd..67fc0fff42 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -97,7 +97,9 @@ Fixed of a type bytes and the other is of a type unicode / string. (bug fix) #4831 * Fix SSL connection support for MongoDB and RabbitMQ which wouldn't work under Python 3 and would result in cryptic "maximum recursion depth exceeded while calling a Python object" error on - connection failure. (bug fix) #4382 #4384 + connection failure. + + NOTE: This issue only affected installations using Python 3. (bug fix) #4382 #4384 Reported by @alexku7. From bd013f7367756674f882a015ab0cec7f651a8c9a Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 16:28:30 +0100 Subject: [PATCH 11/13] Add a workaround for a test failure. Sadly due to how tests run, we need to add monkey patch to another file which is imported before the actual affected test file (nose imports all the tests in the same process and relies on the ordering). --- st2tests/st2tests/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/st2tests/st2tests/base.py b/st2tests/st2tests/base.py index 58d7fd8501..76ced0dae9 100644 --- a/st2tests/st2tests/base.py +++ b/st2tests/st2tests/base.py @@ -15,6 +15,11 @@ from __future__ import absolute_import from __future__ import print_function +# NOTE: We need to perform monkeypatch before importing ssl module otherwise tests will fail. +# See https://github.com/StackStorm/st2/pull/4834 for details +from st2common.util.monkey_patch import monkey_patch +monkey_patch() + try: import simplejson as json except ImportError: From fa9509ea44240e42673663e3de8ad7f9f910d7f3 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 16 Dec 2019 19:05:08 +0100 Subject: [PATCH 12/13] Update CHANGELOG.rst Co-Authored-By: Eugen C. --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 67fc0fff42..80375b340a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -50,7 +50,7 @@ Changed * Improve MongoDB connection timeout related code. Connection and server selection timeout is now set to 3 seconds. Previously a default value of 30 seconds was used which means that for many connection related errors, our code would first wait for this timeout to be reached (30 seconds) - before returning error to the end user. #4384 + before returning error to the end user. #4834 Fixed ~~~~~ From 6ece0c2528f1217dec4efa3c8b89cb2f78a114eb Mon Sep 17 00:00:00 2001 From: Eugen C Date: Mon, 16 Dec 2019 19:30:17 +0000 Subject: [PATCH 13/13] Update CHANGELOG.rst --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 80375b340a..66013b9fa9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -99,7 +99,7 @@ Fixed result in cryptic "maximum recursion depth exceeded while calling a Python object" error on connection failure. - NOTE: This issue only affected installations using Python 3. (bug fix) #4382 #4384 + NOTE: This issue only affected installations using Python 3. (bug fix) #4832 #4834 Reported by @alexku7.