From 128d0823a12b3930a9d293a1da00db4ece5e6565 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 11:11:16 +0100 Subject: [PATCH 01/10] Update service setup code so also print used locale and fs encoding. This will make it easier to troubleshoot locale / encoding related issue. Also make sure we print those version related messages under INFO log level instead of DEBUG since they may be material when troubleshooting various issues so we should use INFO. --- st2common/st2common/service_setup.py | 30 +++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/st2common/st2common/service_setup.py b/st2common/st2common/service_setup.py index bd01d205e7..03f9bcdfa7 100644 --- a/st2common/st2common/service_setup.py +++ b/st2common/st2common/service_setup.py @@ -22,6 +22,7 @@ import os import sys import traceback +import locale import logging as stdlib_logging import six @@ -111,17 +112,40 @@ def setup( sys.version_info[1], sys.version_info[2], ) - LOG.debug("Using Python: %s (%s)" % (version, sys.executable)) + + # We print locale related info to make it easier to troubleshoot issues where locale is not + # set correctly (e.g. using C / ascii, but services are trying to work with unicode data + # would result in things blowing up) + + fs_encoding = sys.getfilesystemencoding() + default_encoding = sys.getdefaultencoding() + lang_env = os.environ.get("LANG", "unknown") + + try: + language_code, encoding = locale.getdefaultlocale() + + if language_code and encoding: + used_locale = ".".join([language_code, encoding]) + else: + used_locale = "unable to retrieve locale" + except Exception as e: + used_locale = "unable to retrieve locale: %s " % (str(e)) + + LOG.info("Using Python: %s (%s)" % (version, sys.executable)) + LOG.info( + "Using fs encoding: %s, default encoding: %s, LANG env variable: %s, locale: %s" + % (fs_encoding, default_encoding, lang_env, used_locale) + ) config_file_paths = cfg.CONF.config_file config_file_paths = [os.path.abspath(path) for path in config_file_paths] - LOG.debug("Using config files: %s", ",".join(config_file_paths)) + LOG.info("Using config files: %s", ",".join(config_file_paths)) # Setup logging. logging_config_path = config.get_logging_config_path() logging_config_path = os.path.abspath(logging_config_path) - LOG.debug("Using logging config: %s", logging_config_path) + LOG.info("Using logging config: %s", logging_config_path) is_debug_enabled = cfg.CONF.debug or cfg.CONF.system.debug From 68fbfc6bc97d56997f34a05fffb89060fee2fbac Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 12:02:29 +0100 Subject: [PATCH 02/10] Update affected tests, add test for verifying service startup messages. --- .../test_service_setup_log_level_filtering.py | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/st2common/tests/integration/test_service_setup_log_level_filtering.py b/st2common/tests/integration/test_service_setup_log_level_filtering.py index a03e90688a..67479fc8e3 100644 --- a/st2common/tests/integration/test_service_setup_log_level_filtering.py +++ b/st2common/tests/integration/test_service_setup_log_level_filtering.py @@ -65,6 +65,21 @@ class ServiceSetupLogLevelFilteringTestCase(IntegrationTestCase): def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): + # 0. Verify INFO level service start up messages + process = self._start_process(config_path=ST2_CONFIG_INFO_LL_PATH) + self.add_process(process=process) + + # Give it some time to start up + eventlet.sleep(3) + process.send_signal(signal.SIGKILL) + + # Verify first 4 environment related log messages + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[:4]) + self.assertIn("INFO [-] Using Python:", stdout) + self.assertIn("INFO [-] Using fs encoding:", stdout) + self.assertIn("INFO [-] Using config files:", stdout) + self.assertIn("INFO [-] Using logging config:", stdout) + # 1. INFO log level - audit messages should not be included process = self._start_process(config_path=ST2_CONFIG_INFO_LL_PATH) self.add_process(process=process) @@ -73,8 +88,8 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): eventlet.sleep(3) process.send_signal(signal.SIGKILL) - # First 3 log lines are debug messages about the environment which are always logged - stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[3:]) + # First 4 log lines are debug messages about the environment which are always logged + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[4:]) self.assertIn("INFO [-]", stdout) self.assertNotIn("DEBUG [-]", stdout) @@ -88,8 +103,8 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): eventlet.sleep(5) process.send_signal(signal.SIGKILL) - # First 3 log lines are debug messages about the environment which are always logged - stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[3:]) + # First 4 log lines are debug messages about the environment which are always logged + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[4:]) self.assertIn("INFO [-]", stdout) self.assertIn("DEBUG [-]", stdout) @@ -103,8 +118,8 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): eventlet.sleep(5) process.send_signal(signal.SIGKILL) - # First 3 log lines are debug messages about the environment which are always logged - stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[3:]) + # First 4 log lines are debug messages about the environment which are always logged + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[4:]) self.assertNotIn("INFO [-]", stdout) self.assertNotIn("DEBUG [-]", stdout) @@ -118,8 +133,8 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): eventlet.sleep(5) process.send_signal(signal.SIGKILL) - # First 3 log lines are debug messages about the environment which are always logged - stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[3:]) + # First 4 log lines are debug messages about the environment which are always logged + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[4:]) self.assertIn("INFO [-]", stdout) self.assertIn("DEBUG [-]", stdout) From a461311d5bb71ae58a1c7b44751db4f0d7195d2d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 16:42:14 +0100 Subject: [PATCH 03/10] Update service bootstrap / start up code to log a warning on non-utf-8 locale. --- st2common/st2common/service_setup.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/st2common/st2common/service_setup.py b/st2common/st2common/service_setup.py index 03f9bcdfa7..c3d74fa96c 100644 --- a/st2common/st2common/service_setup.py +++ b/st2common/st2common/service_setup.py @@ -61,6 +61,19 @@ "register_service_in_service_registry", ] +# Message which is logged if non utf-8 locale is detected on startup. +NON_UTF8_LOCALE_WARNING_MSG = """ +Detected a non utf-8 locale / encoding (fs encoding: %s, default encoding: %s, locale: %s). +Using non utf-8 locale while working with unicode data will result in exceptions and undefined +behavior. +You are strongly encouraged to configure all the StackStorm services to use utf-8 encoding (e.g. +LANG=en_US.UTF-8). +""".strip().replace( + "\n", " " +) + +VALID_UTF8_ENCODINGS = ["utf8", "utf-8"] + LOG = logging.getLogger(__name__) @@ -129,6 +142,7 @@ def setup( else: used_locale = "unable to retrieve locale" except Exception as e: + language_code, encoding = "unknown", "unknown" used_locale = "unable to retrieve locale: %s " % (str(e)) LOG.info("Using Python: %s (%s)" % (version, sys.executable)) @@ -147,6 +161,17 @@ def setup( LOG.info("Using logging config: %s", logging_config_path) + # Warn on non utf-8 locale which could cause issues when running under Python 3 and working + # with unicode data + if ( + fs_encoding.lower() not in VALID_UTF8_ENCODINGS + or encoding.lower() not in VALID_UTF8_ENCODINGS + ): + LOG.warning( + NON_UTF8_LOCALE_WARNING_MSG + % (fs_encoding, default_encoding, used_locale.strip()) + ) + is_debug_enabled = cfg.CONF.debug or cfg.CONF.system.debug try: From b8c2c89958622ae2a9b0219da83c4866aa80dbfb Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 16:48:14 +0100 Subject: [PATCH 04/10] Add a test case for it. --- .../test_service_setup_log_level_filtering.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/st2common/tests/integration/test_service_setup_log_level_filtering.py b/st2common/tests/integration/test_service_setup_log_level_filtering.py index 67479fc8e3..6aa83a9931 100644 --- a/st2common/tests/integration/test_service_setup_log_level_filtering.py +++ b/st2common/tests/integration/test_service_setup_log_level_filtering.py @@ -64,8 +64,8 @@ class ServiceSetupLogLevelFilteringTestCase(IntegrationTestCase): - def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): - # 0. Verify INFO level service start up messages + def test_system_info_is_logged_on_startup(self): + # Verify INFO level service start up messages process = self._start_process(config_path=ST2_CONFIG_INFO_LL_PATH) self.add_process(process=process) @@ -80,6 +80,23 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): self.assertIn("INFO [-] Using config files:", stdout) self.assertIn("INFO [-] Using logging config:", stdout) + def test_warning_is_emitted_on_non_utf8_encoding(self): + env = os.environ.copy() + env["LC_ALL"] = "invalid" + process = self._start_process(config_path=ST2_CONFIG_INFO_LL_PATH, env=env) + self.add_process(process=process) + + # Give it some time to start up + eventlet.sleep(3) + process.send_signal(signal.SIGKILL) + + # Verify first 4 environment related log messages + stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")) + self.assertIn("WARNING [-] Detected a non utf-8 locale / encoding", stdout) + self.assertIn("fs encoding: ascii", stdout) + self.assertIn("unknown locale: invalid", stdout) + + def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): # 1. INFO log level - audit messages should not be included process = self._start_process(config_path=ST2_CONFIG_INFO_LL_PATH) self.add_process(process=process) @@ -163,12 +180,13 @@ def test_kombu_heartbeat_tick_log_messages_are_excluded(self): stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")) self.assertNotIn("heartbeat_tick", stdout) - def _start_process(self, config_path): + def _start_process(self, config_path, env=None): cmd = CMD + [config_path] cwd = os.path.abspath(os.path.join(BASE_DIR, "../../../")) cwd = os.path.abspath(cwd) process = subprocess.Popen( cmd, + env=env or os.environ.copy(), cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, From b55682f881c795412697a33d8b9aeaa218034747 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 16:57:03 +0100 Subject: [PATCH 05/10] Add changelog entry. --- CHANGELOG.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0fa0a562b8..35b4347bbc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,15 @@ Changed Contributed by @Kami. +* Service start up code has been updated to log a warning if a non-utf-8 encoding / locale is + detected. + + Using non-utf-8 locale while working with unicode data will result in various issues so users + are strongly recommended to ensure encoding for all the StackStorm service is + set to ``utf-8``. (#5182) + + Contributed by @Kami. + 3.4.0 - March 02, 2021 ---------------------- From 697a50aeabbaa1604a7c6162af5658dde21cf676 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 7 Mar 2021 23:29:31 +0100 Subject: [PATCH 06/10] Add bug fix for LoggingStream stderr wrapper which would result in infinite loop when trying to format the message field when StackStorm process was not using utf-8 encoding and when unicode sequence was logged. --- st2common/st2common/log.py | 49 ++++ st2common/st2common/service_setup.py | 17 +- .../tests/integration/log_unicode_data.py | 86 ++++++ st2common/tests/integration/test_logging.py | 258 ++++++++++++++++++ 4 files changed, 406 insertions(+), 4 deletions(-) create mode 100644 st2common/tests/integration/log_unicode_data.py create mode 100644 st2common/tests/integration/test_logging.py diff --git a/st2common/st2common/log.py b/st2common/st2common/log.py index fbf6205bb9..cee5253244 100644 --- a/st2common/st2common/log.py +++ b/st2common/st2common/log.py @@ -61,10 +61,24 @@ "audit", ] +# True if sys.stdout should be patched and re-opened with utf-8 encoding in situations where +# utf-8 encoding is not used (if we don't do that and a logger tries to log a unicode string, +# log format would go in an infinite loop). +# We only expose this variable for testing purposes +PATCH_STDOUT = os.environ.get("ST2_LOG_PATCH_STDOUT", "true").lower() in [ + "true", + "1", + "yes", +] + +LOG = logging.getLogger(__name__) + # Note: This attribute is used by "find_caller" so it can correctly exclude this file when looking # for the logger method caller frame. _srcfile = get_normalized_file_path(__file__) +_original_stdstderr = sys.stderr + def find_caller(stack_info=False, stacklevel=1): """ @@ -193,6 +207,13 @@ def __init__(self, name, level=logging.ERROR): self._level = level def write(self, message): + # Work around for infinite loop issue - ensure we don't log unicode data. + # If message contains unicode sequences and process locale is not set to utf-8, it would + # result in infinite lop in _log on formatting the message. + # This is because of the differences between Python 2.7 and Python 3 with log error + # handlers. + message = message.encode("utf-8", "replace").decode("ascii", "ignore") + self._logger._log(self._level, message, None) def flush(self): @@ -218,6 +239,33 @@ def _redirect_stderr(): sys.stderr = LoggingStream("STDERR") +def _patch_stdout(): + """ + This function re-opens sys.stdout using utf-8 encoding. + + It's to be used in situations where process encoding / locale is not set to utf-8. In such + situations when unicode sequence is logged, it would cause logging formatter to go in an infite + loop on formatting a record. + + This function works around that by ensuring sys.stdout is always opened in utf-8 mode. + """ + + stdout_encoding = getattr(sys.stdout, "encoding", "none") + + if stdout_encoding not in ["utf8", "utf-8"] and PATCH_STDOUT: + LOG.info( + "Patching sys.stdout and re-opening it with utf-8 encoding (originally opened " + "with %s encoding)..." % (stdout_encoding) + ) + sys.stdout = open( + sys.stdout.fileno(), + mode="w", + encoding="utf-8", + errors="replace", + buffering=1, + ) + + def setup( config_file, redirect_stderr=True, @@ -249,6 +297,7 @@ def setup( _add_exclusion_filters(handlers=handlers, excludes=excludes) if redirect_stderr: _redirect_stderr() + _patch_stdout() except Exception as exc: exc_cls = type(exc) tb_msg = traceback.format_exc() diff --git a/st2common/st2common/service_setup.py b/st2common/st2common/service_setup.py index c3d74fa96c..0fca2be6e8 100644 --- a/st2common/st2common/service_setup.py +++ b/st2common/st2common/service_setup.py @@ -132,7 +132,8 @@ def setup( fs_encoding = sys.getfilesystemencoding() default_encoding = sys.getdefaultencoding() - lang_env = os.environ.get("LANG", "unknown") + lang_env = os.environ.get("LANG", "notset") + pythonipencoding_env = os.environ.get("PYTHONIOENCODING", "notset") try: language_code, encoding = locale.getdefaultlocale() @@ -147,8 +148,9 @@ def setup( LOG.info("Using Python: %s (%s)" % (version, sys.executable)) LOG.info( - "Using fs encoding: %s, default encoding: %s, LANG env variable: %s, locale: %s" - % (fs_encoding, default_encoding, lang_env, used_locale) + "Using fs encoding: %s, default encoding: %s, locale: %s, LANG env variable: %s, " + "PYTHONIOENCODING env variable: %s" + % (fs_encoding, default_encoding, lang_env, used_locale, pythonipencoding_env) ) config_file_paths = cfg.CONF.config_file @@ -204,8 +206,15 @@ def setup( and handler.level < stdlib_logging.AUDIT ) if not is_debug_enabled and ignore_audit_log_messages: + try: + handler_repr = str(handler) + except TypeError: + # In case handler doesn't have name assigned, repr would throw + handler_repr = "unknown" + LOG.debug( - 'Excluding log messages with level "AUDIT" for handler "%s"' % (handler) + 'Excluding log messages with level "AUDIT" for handler "%s"' + % (handler_repr) ) handler.addFilter(LogLevelFilter(log_levels=exclude_log_levels)) diff --git a/st2common/tests/integration/log_unicode_data.py b/st2common/tests/integration/log_unicode_data.py new file mode 100644 index 0000000000..a4a30f664f --- /dev/null +++ b/st2common/tests/integration/log_unicode_data.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The StackStorm Authors. +# Copyright 2019 Extreme Networks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This file is used to test edge case with logging unicode data. +""" + +from __future__ import absolute_import + +import os +import sys + +from oslo_config import cfg + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +ST2ACTIONS_PATH = os.path.join(BASE_DIR, "../../../st2actions") +ST2COMMON_PATH = os.path.join(BASE_DIR, "../../") +ST2TESTS_PATH = os.path.join(BASE_DIR, "../../../st2tests") + +# Ensure st2actions is in PYTHONPATH. +# This is needed since this script is spawned from inside integration tests +sys.path.append(ST2ACTIONS_PATH) +sys.path.append(ST2COMMON_PATH) +sys.path.append(ST2TESTS_PATH) + +from st2actions.notifier import config +from st2common import log as logging +from st2common.service_setup import setup as common_setup + +FIXTURES_DIR = os.path.join(ST2TESTS_PATH, "st2tests/fixtures") +ST2_CONFIG_DEBUG_LL_PATH = os.path.join( + FIXTURES_DIR, "conf/st2.tests.api.debug_log_level.conf" +) + +LOG = logging.getLogger(__name__) + + +def main(): + cfg.CONF.set_override("debug", True) + common_setup( + service="test", + config=config, + setup_db=False, + run_migrations=False, + register_runners=False, + register_internal_trigger_types=False, + register_mq_exchanges=False, + register_signal_handlers=False, + service_registry=False, + config_args=["--config-file", ST2_CONFIG_DEBUG_LL_PATH], + ) + + LOG.info("Test info message 1") + LOG.debug("Test debug message 1") + + # 1. Actual unicode sequence + LOG.info("Test info message with unicode 1 - 好好好") + LOG.debug("Test debug message with unicode 1 - 好好好") + + # 2. Ascii escape sequence + LOG.info( + "Test info message with unicode 1 - " + + "好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace") + ) + LOG.debug( + "Test debug message with unicode 1 - " + + "好好好".encode("ascii", "backslashreplace").decode("ascii", "backslashreplace") + ) + + +if __name__ == "__main__": + main() diff --git a/st2common/tests/integration/test_logging.py b/st2common/tests/integration/test_logging.py new file mode 100644 index 0000000000..41206f25c8 --- /dev/null +++ b/st2common/tests/integration/test_logging.py @@ -0,0 +1,258 @@ +# Copyright 2020 The StackStorm Authors. +# Copyright 2019 Extreme Networks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import os +import sys +import signal + +import eventlet +from eventlet.green import subprocess + +from st2tests.base import IntegrationTestCase + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +TEST_FILE_PATH = os.path.join(BASE_DIR, "log_unicode_data.py") + + +class LogFormattingAndEncodingTestCase(IntegrationTestCase): + def test_formatting_with_unicode_data_works_no_stdout_patching(self): + return + # Ensure that process doesn't end up in an infinite loop if non-utf8 locale / encoding is + # used and a unicode sequence is logged. + + # 1. Process is using a utf-8 encoding + process = self._start_process( + env={ + "LC_ALL": "en_US.UTF-8", + "ST2_LOG_PATCH_STDOUT": "false", + "PYTHONIOENCODING": "utf-8", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8").strip() + stderr = process.stderr.read().decode("utf-8").strip() + stdout_lines = stdout.split("\n") + + self.assertEqual(stderr, "") + self.assertTrue(len(stdout_lines) < 20) + + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + # 2. Process is not using utf-8 encoding - LC_ALL set to invalid locale - should result in + # single exception being logged, but not infinite loop + process = self._start_process( + env={ + "LC_ALL": "invalid", + "ST2_LOG_PATCH_STDOUT": "false", + "PYTHONIOENCODING": "utf-8", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8") + stderr = process.stderr.read().decode("utf-8") + stdout_lines = stdout.split("\n") + + self.assertEqual(stderr, "") + + self.assertIn("ERROR [-] ", stdout) + self.assertIn("can't encode", stdout) + self.assertIn("'ascii' codec can't encode", stdout) + + self.assertTrue(len(stdout_lines) >= 50) + + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + # 3. Process is not using utf-8 encoding - PYTHONIOENCODING set to ascii - should result in + # single exception being logged, but not infinite loop + process = self._start_process( + env={ + "LC_ALL": "en_US.UTF-8", + "ST2_LOG_PATCH_STDOUT": "false", + "PYTHONIOENCODING": "ascii", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8") + stderr = process.stderr.read().decode("utf-8") + stdout_lines = stdout.split("\n") + + self.assertEqual(stderr, "") + + self.assertIn("ERROR [-] ", stdout) + self.assertIn("can't encode", stdout) + self.assertIn("'ascii' codec can't encode", stdout) + + self.assertTrue(len(stdout_lines) >= 50) + + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertNotIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertNotIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertNotIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertNotIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + def test_formatting_with_unicode_data_works_with_stdout_patching(self): + # Test a scenario where patching is enabled which means it should never result in infinite + # loop + # 1. Process is using a utf-8 encoding + process = self._start_process( + env={ + "LC_ALL": "en_US.UTF-8", + "ST2_LOG_PATCH_STDOUT": "true", + "PYTHONIOENCODING": "utf-8", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8") + stderr = process.stderr.read().decode("utf-8") + stdout_lines = stdout.split("\n") + print(stderr) + + self.assertEqual(stderr, "") + self.assertTrue(len(stdout_lines) < 20) + + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + # 2. Process is not using utf-8 encoding + process = self._start_process( + env={ + "LC_ALL": "invalid", + "ST2_LOG_PATCH_STDOUT": "true", + "PYTHONIOENCODING": "utf-8", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8") + stderr = process.stderr.read().decode("utf-8") + + self.assertEqual(stderr, "") + self.assertTrue(len(stdout_lines) < 50) + + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + # 3. Process is not using utf-8 encoding - PYTHONIOENCODING set to ascii + process = self._start_process( + env={ + "LC_ALL": "en_US.UTF-8", + "ST2_LOG_PATCH_STDOUT": "true", + "PYTHONIOENCODING": "ascii", + } + ) + self.add_process(process=process) + + # Give it some time to start up and run for a while + eventlet.sleep(2) + process.send_signal(signal.SIGKILL) + + stdout = process.stdout.read().decode("utf-8") + stderr = process.stderr.read().decode("utf-8") + stdout_lines = stdout.split("\n") + + self.assertEqual(stderr, "") + + self.assertTrue(len(stdout_lines) < 20) + + self.assertIn("Patching sys.stdout", stdout) + self.assertIn("INFO [-] Test info message 1", stdout) + self.assertIn("Test debug message 1", stdout) + self.assertIn("INFO [-] Test info message with unicode 1 - 好好好", stdout) + self.assertIn("DEBUG [-] Test debug message with unicode 1 - 好好好", stdout) + self.assertIn( + "INFO [-] Test info message with unicode 1 - \u597d\u597d\u597d", stdout + ) + self.assertIn( + "DEBUG [-] Test debug message with unicode 1 - \u597d\u597d\u597d", stdout + ) + + def _start_process(self, env=None): + cmd = [sys.executable, TEST_FILE_PATH] + process = subprocess.Popen( + cmd, + env=env or os.environ.copy(), + cwd=os.getcwd(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False, + preexec_fn=os.setsid, + ) + return process From 99ae8b081eba4c70d0ee14a7322e248f28855384 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 8 Mar 2021 00:04:39 +0100 Subject: [PATCH 07/10] Update affected tests. --- .../integration/test_service_setup_log_level_filtering.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/st2common/tests/integration/test_service_setup_log_level_filtering.py b/st2common/tests/integration/test_service_setup_log_level_filtering.py index 6aa83a9931..a7a79d20c4 100644 --- a/st2common/tests/integration/test_service_setup_log_level_filtering.py +++ b/st2common/tests/integration/test_service_setup_log_level_filtering.py @@ -74,7 +74,7 @@ def test_system_info_is_logged_on_startup(self): process.send_signal(signal.SIGKILL) # Verify first 4 environment related log messages - stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[:4]) + stdout = process.stdout.read().decode("utf-8") self.assertIn("INFO [-] Using Python:", stdout) self.assertIn("INFO [-] Using fs encoding:", stdout) self.assertIn("INFO [-] Using config files:", stdout) @@ -138,7 +138,7 @@ def test_audit_log_level_is_filtered_if_log_level_is_not_debug_or_audit(self): # First 4 log lines are debug messages about the environment which are always logged stdout = "\n".join(process.stdout.read().decode("utf-8").split("\n")[4:]) - self.assertNotIn("INFO [-]", stdout) + self.assertIn("INFO [-]", stdout) self.assertNotIn("DEBUG [-]", stdout) self.assertIn("AUDIT [-]", stdout) From 84ad5faa9da830ede9bdf3b37a9d25b7479557ac Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 8 Mar 2021 12:22:11 +0100 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: Jacob Floyd --- st2common/st2common/service_setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/service_setup.py b/st2common/st2common/service_setup.py index 0fca2be6e8..9d4f4601d8 100644 --- a/st2common/st2common/service_setup.py +++ b/st2common/st2common/service_setup.py @@ -133,7 +133,7 @@ def setup( fs_encoding = sys.getfilesystemencoding() default_encoding = sys.getdefaultencoding() lang_env = os.environ.get("LANG", "notset") - pythonipencoding_env = os.environ.get("PYTHONIOENCODING", "notset") + pythonioencoding_env = os.environ.get("PYTHONIOENCODING", "notset") try: language_code, encoding = locale.getdefaultlocale() @@ -150,7 +150,7 @@ def setup( LOG.info( "Using fs encoding: %s, default encoding: %s, locale: %s, LANG env variable: %s, " "PYTHONIOENCODING env variable: %s" - % (fs_encoding, default_encoding, lang_env, used_locale, pythonipencoding_env) + % (fs_encoding, default_encoding, lang_env, used_locale, pythonioencoding_env) ) config_file_paths = cfg.CONF.config_file From 12473d50d8005b42b95f2e26bf8d6930ab490f76 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 8 Mar 2021 12:42:38 +0100 Subject: [PATCH 09/10] Uncomment out the test, add additional asserts. --- st2common/tests/integration/test_logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/st2common/tests/integration/test_logging.py b/st2common/tests/integration/test_logging.py index 41206f25c8..af4eab3bf1 100644 --- a/st2common/tests/integration/test_logging.py +++ b/st2common/tests/integration/test_logging.py @@ -31,7 +31,6 @@ class LogFormattingAndEncodingTestCase(IntegrationTestCase): def test_formatting_with_unicode_data_works_no_stdout_patching(self): - return # Ensure that process doesn't end up in an infinite loop if non-utf8 locale / encoding is # used and a unicode sequence is logged. @@ -93,6 +92,7 @@ def test_formatting_with_unicode_data_works_no_stdout_patching(self): self.assertIn("'ascii' codec can't encode", stdout) self.assertTrue(len(stdout_lines) >= 50) + self.assertTrue(len(stdout_lines) < 100) self.assertIn("INFO [-] Test info message 1", stdout) self.assertIn("Test debug message 1", stdout) @@ -131,6 +131,7 @@ def test_formatting_with_unicode_data_works_no_stdout_patching(self): self.assertIn("'ascii' codec can't encode", stdout) self.assertTrue(len(stdout_lines) >= 50) + self.assertTrue(len(stdout_lines) < 100) self.assertIn("INFO [-] Test info message 1", stdout) self.assertIn("Test debug message 1", stdout) From b431d740a52bae19a300538cb95bc5f6761ac93b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 5 Apr 2021 11:19:23 +0200 Subject: [PATCH 10/10] Cherry pick changelog entries from #5192 and sync it up. --- CHANGELOG.rst | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6d39a5f6e0..86cc9f72f0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -257,6 +257,50 @@ Fixed Contributed by @Kami. +3.4.1 - March 14, 2021 +---------------------- + +Added +~~~~~ + + +* Service start up code has been updated to log a warning if a non-utf-8 encoding / locale is + detected. + + Using non-utf-8 locale while working with unicode data will result in various issues so users + are strongly recommended to ensure encoding for all the StackStorm service is + set to ``utf-8``. (#5182) + + Contributed by @Kami. + +Changed +~~~~~~~ + +* Use `sudo -E` to fix GitHub Actions tests #5187 + + Contributed by @cognifloyd + +Fixed +~~~~~ + +* Properly handle unicode strings in logs #5184 + + Fix a logging loop when attempting to encode Unicode characters in locales that do not support + Unicode characters - CVE-2021-28667. + + See https://stackstorm.com/2021/03/10/stackstorm-v3-4-1-security-fix/ for more information. + + Contributed by @Kami + +* Fix SensorTypeAPI schema to use class_name instead of name since documentation for pack + development uses class_name and registrar used to load sensor to database assign class_name + to name in the database model. (bug fix) + +* Updated paramiko version to 2.7.2, to go with updated cryptography to prevent problems + with ssh keys on remote actions. #5201 + + Contributed by Amanda McGuinness (@amanda11 Ammeon Solutions) + 3.4.0 - March 02, 2021 ----------------------