From 0245a18198babafe2ab0abde31791cf526435234 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 12:54:47 +0200 Subject: [PATCH 01/38] Fix typo. --- st2reactor/st2reactor/cmd/timersengine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/st2reactor/st2reactor/cmd/timersengine.py b/st2reactor/st2reactor/cmd/timersengine.py index 43ced30930..acf1e6932e 100644 --- a/st2reactor/st2reactor/cmd/timersengine.py +++ b/st2reactor/st2reactor/cmd/timersengine.py @@ -82,7 +82,7 @@ def main(): except SystemExit as exit_code: sys.exit(exit_code) except: - LOG.exception('(PID=%s) RulesEngine quit due to exception.', os.getpid()) + LOG.exception('(PID=%s) TimerEngine quit due to exception.', os.getpid()) return 1 finally: _teardown() From 0a975aabfaaf6d219879c52b308a8265566cfa47 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 13:13:07 +0200 Subject: [PATCH 02/38] Add missing __all__, instrument rules engine with additional metrics. Now we also track: 1. Number of rules processed by rules engine (counter + timer) 2. How long it took to process rule for each unique trigger instance. --- st2common/st2common/metrics/base.py | 3 ++- st2reactor/st2reactor/rules/engine.py | 5 ++++- st2reactor/st2reactor/rules/worker.py | 9 ++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 66f932d095..49f5b842e9 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -58,7 +58,8 @@ def _format_metrics_key_for_liveaction_db(liveaction_db): def format_metrics_key(action_db=None, liveaction_db=None, key=None): - """Return a string for usage as metrics key. + """ + Return a string for usage as metrics key. """ assert (action_db or key or liveaction_db), """Must supply one of key, action_db, or liveaction_db""" diff --git a/st2reactor/st2reactor/rules/engine.py b/st2reactor/st2reactor/rules/engine.py index bf6c6f24d1..ac8b6e190a 100644 --- a/st2reactor/st2reactor/rules/engine.py +++ b/st2reactor/st2reactor/rules/engine.py @@ -23,6 +23,10 @@ LOG = logging.getLogger('st2reactor.rules.RulesEngine') +__all__ = [ + 'RulesEngine' +] + class RulesEngine(object): def handle_trigger_instance(self, trigger_instance): @@ -72,7 +76,6 @@ def create_rule_enforcers(self, trigger_instance, matching_rules): """ enforcers = [] for matching_rule in matching_rules: - get_driver().inc_counter( format_metrics_key( key='rule.%s' % matching_rule diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index 78e0de9d41..1d4071b8f0 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -14,6 +14,7 @@ # limitations under the License. from __future__ import absolute_import + from kombu import Connection from st2common import log as logging @@ -26,6 +27,8 @@ import st2reactor.container.utils as container_utils from st2reactor.rules.engine import RulesEngine from st2common.transport.queues import RULESENGINE_WORK_QUEUE +from st2common.metrics.base import CounterWithTimer +from st2common.metrics.base import Timer from st2common.metrics.base import format_metrics_key, get_driver @@ -87,7 +90,11 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - self.rules_engine.handle_trigger_instance(trigger_instance) + + with CounterWithTimer(key="st2.rules.processing"): + with Timer(key='st2.rules.processing.trigger_instance.%s' % (trigger_instance.id)): + self.rules_engine.handle_trigger_instance(trigger_instance) + container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSED) except: From 9b5a5af8b09fe1f8d256560cdd13b101588e03d8 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 13:17:35 +0200 Subject: [PATCH 03/38] Use consistent metric name. --- st2reactor/st2reactor/rules/worker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index 1d4071b8f0..72da3aad3f 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -91,8 +91,8 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - with CounterWithTimer(key="st2.rules.processing"): - with Timer(key='st2.rules.processing.trigger_instance.%s' % (trigger_instance.id)): + with CounterWithTimer(key="st2.rule.processed"): + with Timer(key='st2.rule.processed.trigger_instance.%s' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) container_utils.update_trigger_instance_status( From 4d1b8f79b4e314e825da6f6089de7ad06d2b2957 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 13:33:53 +0200 Subject: [PATCH 04/38] Add missing __all__. --- st2common/st2common/metrics/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 49f5b842e9..b6114b25f9 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -26,6 +26,18 @@ from st2common.util.date import get_datetime_utc_now from st2common.exceptions.plugins import PluginLoadError +__all__ = [ + 'BaseMetricsDriver', + + 'Timer', + 'Counter', + 'CounterWithTimer', + + 'metrics_initialize', + 'get_driver', + 'check_key' +] + if not hasattr(cfg.CONF, 'metrics'): from st2common.config import register_opts register_opts() From 20134d8acbfc74bf253c3d0efd3d5001b27c8ed2 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 14:00:32 +0200 Subject: [PATCH 05/38] Add support for "Gauge" metric type to our metrics drivers and code. NOTE: Prometheus driver conflats counter and gauge atm a bit, we should eventually sort this out so using different drivers won't result in using different metrics type and different representation and visualization. --- st2common/st2common/metrics/base.py | 37 +++++++++++--- .../metrics/drivers/prometheus_driver.py | 48 +++++++++++++++++-- .../metrics/drivers/statsd_driver.py | 44 +++++++++++++++-- 3 files changed, 117 insertions(+), 12 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index b6114b25f9..d2cb02d9f0 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -96,20 +96,43 @@ def format_metrics_key(action_db=None, liveaction_db=None, key=None): class BaseMetricsDriver(object): - """ Base class for driver implementations for metric collection """ + Base class for driver implementations for metric collection + """ + def time(self, key, time): - """ Timer metric + """ + Timer metric """ pass def inc_counter(self, key, amount=1): - """ Increment counter + """ + Increment counter """ pass def dec_counter(self, key, amount=1): - """ Decrement metric + """ + Decrement metric + """ + pass + + def set_gauge(self, key, value): + """ + Set gauge value. + """ + pass + + def incr_gauge(self, key, amount=1): + """ + Increment gauge value. + """ + pass + + def decr_gauge(self, key, amount=1): + """ + Decrement gauge value. """ pass @@ -122,7 +145,8 @@ def check_key(key): class Timer(object): - """ Timer context manager for easily sending timer statistics. + """ + Timer context manager for easily sending timer statistics. """ def __init__(self, key, include_parameter=False): check_key(key) @@ -165,7 +189,8 @@ def wrapper(*args, **kw): class Counter(object): - """ Timer context manager for easily sending timer statistics. + """ + Counter context manager for easily sending counter statistics. """ def __init__(self, key): check_key(key) diff --git a/st2common/st2common/metrics/drivers/prometheus_driver.py b/st2common/st2common/metrics/drivers/prometheus_driver.py index 8a85ef8c20..3078bfc38c 100644 --- a/st2common/st2common/metrics/drivers/prometheus_driver.py +++ b/st2common/st2common/metrics/drivers/prometheus_driver.py @@ -12,9 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from numbers import Number + from prometheus_client import Histogram, Gauge from st2common.metrics.base import BaseMetricsDriver +from st2common.metrics.base import check_key + +__all__ = [ + 'PrometheusDriver' +] class PrometheusDriver(BaseMetricsDriver): @@ -24,7 +32,8 @@ def __init__(self): pass def time(self, key, time): - """ Timer metric + """ + Timer metric """ prometheus_histogram = Histogram( # pylint: disable=no-value-for-parameter key @@ -32,7 +41,8 @@ def time(self, key, time): prometheus_histogram.observe(time) def inc_counter(self, key, amount=1): - """ Increment counter + """ + Increment counter """ prometheus_counter = Gauge( # pylint: disable=no-value-for-parameter key @@ -40,9 +50,41 @@ def inc_counter(self, key, amount=1): prometheus_counter.inc(amount) def dec_counter(self, key, amount=1): - """ Decrement metric + """ + Decrement metric """ prometheus_counter = Gauge( # pylint: disable=no-value-for-parameter key ) prometheus_counter.dec(amount) + + + def set_gauge(self, key, value): + """ + Set gauge value. + """ + check_key(key) + assert isinstance(value, Number) + + gauge = Gauge(key) + gauge.set(value) + + def incr_gauge(self, key, amount=1): + """ + Increment gauge value. + """ + check_key(key) + assert isinstance(amount, Number) + + gauge = Gauge(key) + gauge.incr(amount) + + def decr_gauge(self, key, amount=1): + """ + Decrement gauge value. + """ + check_key(key) + assert isinstance(amount, Number) + + gauge = Gauge(key) + gauge.decr(amount) diff --git a/st2common/st2common/metrics/drivers/statsd_driver.py b/st2common/st2common/metrics/drivers/statsd_driver.py index 7646c1d2dd..47c5322544 100644 --- a/st2common/st2common/metrics/drivers/statsd_driver.py +++ b/st2common/st2common/metrics/drivers/statsd_driver.py @@ -12,15 +12,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from numbers import Number -from oslo_config import cfg + import statsd +from oslo_config import cfg + +from st2common.metrics.base import BaseMetricsDriver +from st2common.metrics.base import check_key -from st2common.metrics.base import BaseMetricsDriver, check_key +__all__ = [ + 'StatsdDriver' +] class StatsdDriver(BaseMetricsDriver): - """ StatsD Implementation of the metrics driver + """ + StatsD Implementation of the metrics driver """ def __init__(self): statsd.Connection.set_defaults(host=cfg.CONF.metrics.host, port=cfg.CONF.metrics.port) @@ -49,3 +57,33 @@ def dec_counter(self, key, amount=1): assert isinstance(amount, Number) self._counters[key] = self._counters.get(key, statsd.Counter(key)) self._counters[key] -= amount + + def set_gauge(self, key, value): + """ + Set gauge value. + """ + check_key(key) + assert isinstance(value, Number) + + gauge = statsd.Gauge(key) + gauge.send(None, value) + + def incr_gauge(self, key, amount=1): + """ + Increment gauge value. + """ + check_key(key) + assert isinstance(amount, Number) + + gauge = statsd.Gauge(key) + gauge.increment(None, amount) + + def decr_gauge(self, key, amount=1): + """ + Decrement gauge value. + """ + check_key(key) + assert isinstance(amount, Number) + + gauge = statsd.Gauge(key) + gauge.decrement(None, amount) From 0b4a4e5867562cc78f070afa3c7fa4a6d7764b16 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 14:17:50 +0200 Subject: [PATCH 06/38] Add new instrumentation middleware which allows us to instrument our API services with various metrics. --- .../st2common/middleware/instrumentation.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 st2common/st2common/middleware/instrumentation.py diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py new file mode 100644 index 0000000000..8259ec9cac --- /dev/null +++ b/st2common/st2common/middleware/instrumentation.py @@ -0,0 +1,81 @@ +# Licensed to the StackStorm, Inc ('StackStorm') under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'RequestInstrumentationMiddleware', + 'ResponseInstrumentationMiddleware' +] + +from st2common.router import Request +from st2common.metrics.base import CounterWithTimer +from st2common.metrics.base import get_driver + + +class RequestInstrumentationMiddleware(object): + """ + Instrumentation middleware which records various request related metrics. + """ + + def __init__(self, app, service_name): + """ + :param service_name: Service name (e.g. api, stream, auth). + :type service_name: ``str`` + """ + self.app = app + self._service_name = service_name + + def __call__(self, environ, start_response): + request = Request(environ) + + metrics_driver = get_driver() + + key = 'st2.%s.requests.method.%s' % (self._service_name, request.method) + metrics_driver.inc_counter(key) + + path = request.path.replace('/', '_') + key = 'st2.%s.requests.path.%s' % (self._service_name, path) + metrics_driver.inc_counter(key) + + # Track and time current number of processing requests + key = 'st2.%s.requests' % (self._service_name) + with CounterWithTimer(key=key): + return self.app(environ, start_response) + + +class ResponseInstrumentationMiddleware(object): + """ + Instrumentation middleware which records various response related metrics. + """ + + def __init__(self, app, service_name): + """ + :param service_name: Service name (e.g. api, stream, auth). + :type service_name: ``str`` + """ + self.app = app + self._service_name = service_name + + def __call__(self, environ, start_response): + # Track and time current number of processing requests + def custom_start_response(status, headers, exc_info=None): + status_code = int(status.split(' ')[0]) + + metrics_driver = get_driver() + metrics_driver.incr_gauge('st2.%s.responses.status.%s' % (self._service_name, + status_code)) + + return start_response(status, headers, exc_info) + + return self.app(environ, custom_start_response) From 5d78d6777c7db5cffd22884c6c7b6eedaf128df0 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 14:26:15 +0200 Subject: [PATCH 07/38] Add new instrumentation middleware to all the API services. --- st2api/st2api/app.py | 4 ++++ st2auth/st2auth/app.py | 4 ++++ st2stream/st2stream/app.py | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/st2api/st2api/app.py b/st2api/st2api/app.py index 5fd756ca53..4526e0212d 100644 --- a/st2api/st2api/app.py +++ b/st2api/st2api/app.py @@ -22,6 +22,8 @@ from st2common.middleware.cors import CorsMiddleware from st2common.middleware.request_id import RequestIDMiddleware from st2common.middleware.logging import LoggingMiddleware +from st2common.middleware.instrumentation import RequestInstrumentationMiddleware +from st2common.middleware.instrumentation import ResponseInstrumentationMiddleware from st2common.router import Router from st2common.util.monkey_patch import monkey_patch from st2common.constants.system import VERSION_STRING @@ -75,6 +77,8 @@ def setup_app(config={}): app = ErrorHandlingMiddleware(app) app = CorsMiddleware(app) app = LoggingMiddleware(app, router) + app = ResponseInstrumentationMiddleware(app, service_name='api') app = RequestIDMiddleware(app) + app = RequestInstrumentationMiddleware(app, service_name='api') return app diff --git a/st2auth/st2auth/app.py b/st2auth/st2auth/app.py index e7050ffb6f..2bbf725ab5 100644 --- a/st2auth/st2auth/app.py +++ b/st2auth/st2auth/app.py @@ -20,6 +20,8 @@ from st2common.middleware.cors import CorsMiddleware from st2common.middleware.request_id import RequestIDMiddleware from st2common.middleware.logging import LoggingMiddleware +from st2common.middleware.instrumentation import RequestInstrumentationMiddleware +from st2common.middleware.instrumentation import ResponseInstrumentationMiddleware from st2common.router import Router from st2common.util.monkey_patch import monkey_patch from st2common.constants.system import VERSION_STRING @@ -69,6 +71,8 @@ def setup_app(config={}): app = ErrorHandlingMiddleware(app) app = CorsMiddleware(app) app = LoggingMiddleware(app, router) + app = ResponseInstrumentationMiddleware(app, service_name='auth') app = RequestIDMiddleware(app) + app = RequestInstrumentationMiddleware(app, service_name='auth') return app diff --git a/st2stream/st2stream/app.py b/st2stream/st2stream/app.py index 75ff60c636..5b106ca90d 100644 --- a/st2stream/st2stream/app.py +++ b/st2stream/st2stream/app.py @@ -31,6 +31,8 @@ from st2common.middleware.cors import CorsMiddleware from st2common.middleware.request_id import RequestIDMiddleware from st2common.middleware.logging import LoggingMiddleware +from st2common.middleware.instrumentation import RequestInstrumentationMiddleware +from st2common.middleware.instrumentation import ResponseInstrumentationMiddleware from st2common.router import Router from st2common.util.monkey_patch import monkey_patch from st2common.constants.system import VERSION_STRING @@ -77,6 +79,8 @@ def setup_app(config={}): app = ErrorHandlingMiddleware(app) app = CorsMiddleware(app) app = LoggingMiddleware(app, router) + app = ResponseInstrumentationMiddleware(app, service_name='stream') app = RequestIDMiddleware(app) + app = RequestInstrumentationMiddleware(app, service_name='stream') return app From b483f7549a8e22c54c6789d32df16305797c9562 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 14:42:48 +0200 Subject: [PATCH 08/38] Add new echo metrics driver which prints out metric calls and use it in development environment. --- st2common/setup.py | 1 + .../st2common/metrics/drivers/echo_driver.py | 47 +++++++++++++++++++ .../st2common/metrics/drivers/noop_driver.py | 9 +++- 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 st2common/st2common/metrics/drivers/echo_driver.py diff --git a/st2common/setup.py b/st2common/setup.py index 283fa5ddb9..473ed83c8a 100644 --- a/st2common/setup.py +++ b/st2common/setup.py @@ -68,6 +68,7 @@ 'st2common.metrics.driver': [ 'statsd = st2common.metrics.drivers.statsd_driver:StatsdDriver', 'noop = st2common.metrics.drivers.noop_driver:NoopDriver', + 'echo = st2common.metrics.drivers.echo_driver:EchoDriver', ], } ) diff --git a/st2common/st2common/metrics/drivers/echo_driver.py b/st2common/st2common/metrics/drivers/echo_driver.py new file mode 100644 index 0000000000..7d9faa2bd9 --- /dev/null +++ b/st2common/st2common/metrics/drivers/echo_driver.py @@ -0,0 +1,47 @@ +# Licensed to the StackStorm, Inc ('StackStorm') under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from st2common import log as logging +from st2common.metrics.base import BaseMetricsDriver + +__all__ = [ + 'EchoDriver' +] + +LOG = logging.getLogger(__name__) + + +class EchoDriver(BaseMetricsDriver): + """ + Driver which logs / LOG.debugs out each metrics operation which would have been performed. + """ + + def time(self, key, time): + LOG.debug('[metrics] time(key=%s, time=%s)' % (key, time)) + + def inc_counter(self, key, amount=1): + LOG.debug('[metrics] counter.incr(%s, %s)' % (key, amount)) + + def decr_counter(self, key, amount=1): + LOG.debug('[metrics] counter.decr(%s, %s)' % (key, amount)) + + def set_gauge(self, key, value): + LOG.debug('[metrics] set_gauge(%s, %s)' % (key, value)) + + def inc_gauge(self, key, amount=1): + LOG.debug('[metrics] gauge.incr(%s, %s)' % (key, amount)) + + def decr_gauge(self, key, amount=1): + LOG.debug('[metrics] gauge.decr(%s, %s)' % (key, amount)) diff --git a/st2common/st2common/metrics/drivers/noop_driver.py b/st2common/st2common/metrics/drivers/noop_driver.py index 08ca257bb5..32c52277b4 100644 --- a/st2common/st2common/metrics/drivers/noop_driver.py +++ b/st2common/st2common/metrics/drivers/noop_driver.py @@ -12,11 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from st2common.metrics.base import BaseMetricsDriver +__all__ = [ + 'NoopDriver' +] + class NoopDriver(BaseMetricsDriver): - """ Dummy implementation of BaseMetricsDriver """ + Dummy implementation of BaseMetricsDriver + """ + def __init__(self, *_args, **_kwargs): pass From 24f5d1ada8b4cc8040497ea8ba1f9e437c17f3aa Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 16:23:20 +0200 Subject: [PATCH 09/38] Also track total number of the incoming requests. --- st2common/st2common/middleware/instrumentation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index 8259ec9cac..9f57ddffca 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -41,6 +41,9 @@ def __call__(self, environ, start_response): metrics_driver = get_driver() + key = 'st2.%s.requests.total' % (self._service_name) + metrics_driver.inc_counter(key) + key = 'st2.%s.requests.method.%s' % (self._service_name, request.method) metrics_driver.inc_counter(key) From b16fb32e704a14c9fe924f7b2885f2df387046c1 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 17:03:31 +0200 Subject: [PATCH 10/38] Fix lint. --- st2common/st2common/metrics/drivers/prometheus_driver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/metrics/drivers/prometheus_driver.py b/st2common/st2common/metrics/drivers/prometheus_driver.py index 3078bfc38c..ea67449ded 100644 --- a/st2common/st2common/metrics/drivers/prometheus_driver.py +++ b/st2common/st2common/metrics/drivers/prometheus_driver.py @@ -26,8 +26,10 @@ class PrometheusDriver(BaseMetricsDriver): - """ Base class for driver implementations for metric collection """ + Base class for driver implementations for metric collection + """ + def __init__(self): pass @@ -58,7 +60,6 @@ def dec_counter(self, key, amount=1): ) prometheus_counter.dec(amount) - def set_gauge(self, key, value): """ Set gauge value. From c759b63c30066e9f5b808b09a3b694c3571f95ba Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 17:11:49 +0200 Subject: [PATCH 11/38] Add tests for new gauge methods. --- st2common/tests/unit/test_metrics.py | 42 +++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index 75977340ef..835d20523f 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -25,7 +25,11 @@ from st2common.util.date import get_datetime_utc_now __all__ = [ - 'TestBaseMetricsDriver' + 'TestBaseMetricsDriver', + 'TestStatsDMetricsDriver', + 'TestCounterContextManager', + 'TestTimerContextManager', + 'TestCounterWithTimerContextManager' ] cfg.CONF.set_override('driver', 'noop', group='metrics') @@ -137,6 +141,42 @@ def test_dec_timer_with_invalid_amount(self): with self.assertRaises(AssertionError): self._driver.dec_counter(*params) + @patch('st2common.metrics.drivers.statsd_driver.statsd') + def test_set_gauge_success(self, statsd): + params = ('key', 100) + mock_gauge = MagicMock() + statsd.Gauge().send.side_effect = mock_gauge + self._driver.set_gauge(*params) + mock_gauge.assert_called_once_with(None, params[1]) + + @patch('st2common.metrics.drivers.statsd_driver.statsd') + def test_incr_gauge_success(self, statsd): + params = ('key1',) + mock_gauge = MagicMock() + statsd.Gauge().increment.side_effect = mock_gauge + self._driver.incr_gauge(*params) + mock_gauge.assert_called_once_with(None, 1) + + params = ('key2', 100) + mock_gauge = MagicMock() + statsd.Gauge().increment.side_effect = mock_gauge + self._driver.incr_gauge(*params) + mock_gauge.assert_called_once_with(None, params[1]) + + @patch('st2common.metrics.drivers.statsd_driver.statsd') + def test_decr_gauge_success(self, statsd): + params = ('key1',) + mock_gauge = MagicMock() + statsd.Gauge().decrement.side_effect = mock_gauge + self._driver.incr_gauge(*params) + mock_gauge.assert_called_once_with(None, 1) + + params = ('key2', 100) + mock_gauge = MagicMock() + statsd.Gauge().decrement.side_effect = mock_gauge + self._driver.decr_gauge(*params) + mock_gauge.assert_called_once_with(None, params[1]) + class TestCounterContextManager(unittest2.TestCase): @patch('st2common.metrics.base.METRICS') From 179768ee68ed9d82bb8008d4e384cbb30eed2cd2 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 17 Aug 2018 17:11:56 +0200 Subject: [PATCH 12/38] Use echo driver by default in dev environments. --- conf/st2.dev.conf | 2 +- st2common/tests/unit/test_metrics.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/st2.dev.conf b/conf/st2.dev.conf index e98322121e..21c5afc984 100644 --- a/conf/st2.dev.conf +++ b/conf/st2.dev.conf @@ -119,6 +119,6 @@ jitter_interval = 0 enable_common_libs = True [metrics] -driver = noop +driver = echo host = 127.0.0.1 port = 8125 diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index 835d20523f..d9c9ed427d 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -168,7 +168,7 @@ def test_decr_gauge_success(self, statsd): params = ('key1',) mock_gauge = MagicMock() statsd.Gauge().decrement.side_effect = mock_gauge - self._driver.incr_gauge(*params) + self._driver.decr_gauge(*params) mock_gauge.assert_called_once_with(None, 1) params = ('key2', 100) From 8d207ea06f0a6650d3739cdee43fd24a00be4590 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 20 Aug 2018 15:52:27 +0200 Subject: [PATCH 13/38] Use consistent metric names, add some additional instrumentation. --- st2actions/st2actions/container/base.py | 8 +++++--- st2common/st2common/middleware/instrumentation.py | 12 ++++++------ st2common/st2common/services/action.py | 9 +++------ st2common/st2common/util/action_db.py | 15 +++------------ st2reactor/st2reactor/rules/engine.py | 9 ++++----- st2reactor/st2reactor/rules/worker.py | 7 +++---- 6 files changed, 24 insertions(+), 36 deletions(-) diff --git a/st2actions/st2actions/container/base.py b/st2actions/st2actions/container/base.py index 9559dc0553..6667fa8a77 100644 --- a/st2actions/st2actions/container/base.py +++ b/st2actions/st2actions/container/base.py @@ -122,9 +122,11 @@ def _do_run(self, runner): extra = {'runner': runner, 'parameters': resolved_action_params} LOG.debug('Performing run for runner: %s' % (runner.runner_id), extra=extra) - with CounterWithTimer(key=format_metrics_key(action_db=runner.action, key='action')): - (status, result, context) = runner.run(action_params) - result = jsonify.try_loads(result) + with CounterWithTimer(key=format_metrics_key('action.executions')): + with CounterWithTimer(key=format_metrics_key('action.%s.executions' % + (runner.action.ref))): + (status, result, context) = runner.run(action_params) + result = jsonify.try_loads(result) action_completed = status in action_constants.LIVEACTION_COMPLETED_STATES diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index 9f57ddffca..39905e37f4 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -41,18 +41,18 @@ def __call__(self, environ, start_response): metrics_driver = get_driver() - key = 'st2.%s.requests.total' % (self._service_name) + key = 'st2.%s.request.total' % (self._service_name) metrics_driver.inc_counter(key) - key = 'st2.%s.requests.method.%s' % (self._service_name, request.method) + key = 'st2.%s.request.method.%s' % (self._service_name, request.method) metrics_driver.inc_counter(key) path = request.path.replace('/', '_') - key = 'st2.%s.requests.path.%s' % (self._service_name, path) + key = 'st2.%s.request.path.%s' % (self._service_name, path) metrics_driver.inc_counter(key) # Track and time current number of processing requests - key = 'st2.%s.requests' % (self._service_name) + key = 'st2.%s.request' % (self._service_name) with CounterWithTimer(key=key): return self.app(environ, start_response) @@ -76,8 +76,8 @@ def custom_start_response(status, headers, exc_info=None): status_code = int(status.split(' ')[0]) metrics_driver = get_driver() - metrics_driver.incr_gauge('st2.%s.responses.status.%s' % (self._service_name, - status_code)) + metrics_driver.incr_gauge('st2.%s.response.status.%s' % (self._service_name, + status_code)) return start_response(status, headers, exc_info) diff --git a/st2common/st2common/services/action.py b/st2common/st2common/services/action.py index a7cb27238a..1cbdb42bf9 100644 --- a/st2common/st2common/services/action.py +++ b/st2common/st2common/services/action.py @@ -64,6 +64,7 @@ def create_request(liveaction): # We import this here to avoid conflicts w/ runners that might import this # file since the runners don't have the config context by default. from st2common.metrics.base import get_driver, format_metrics_key + # Use the user context from the parent action execution. Subtasks in a workflow # action can be invoked by a system user and so we want to use the user context # from the original workflow action. @@ -135,12 +136,8 @@ def create_request(liveaction): trace_service.get_trace_component_for_action_execution(execution, liveaction) ]) - get_driver().inc_counter( - format_metrics_key( - action_db=action_db, - key='action.%s' % (liveaction.status) - ) - ) + get_driver().inc_counter(format_metrics_key('action.executions.%s' % (liveaction.status))) + return liveaction, execution diff --git a/st2common/st2common/util/action_db.py b/st2common/st2common/util/action_db.py index 5c3bbf915e..043a69d719 100644 --- a/st2common/st2common/util/action_db.py +++ b/st2common/st2common/util/action_db.py @@ -197,22 +197,13 @@ def update_liveaction_status(status=None, result=None, context=None, end_timesta # If liveaction_db status is set then we need to decrement the counter # because it is transitioning to a new state if liveaction_db.status: - get_driver().dec_counter( - format_metrics_key( - liveaction_db=liveaction_db, - key='action.%s' % liveaction_db.status - ) - ) + get_driver().dec_counter(format_metrics_key('action.executions.%s' % + (liveaction_db.status))) # If status is provided then we need to increment the timer because the action # is transitioning into this new state if status: - get_driver().inc_counter( - format_metrics_key( - liveaction_db=liveaction_db, - key='action.%s' % status - ) - ) + get_driver().dec_counter(format_metrics_key('action.executions.%s' % (status))) extra = {'liveaction_db': liveaction_db} LOG.debug('Updating ActionExection: "%s" with status="%s"', liveaction_db.id, status, diff --git a/st2reactor/st2reactor/rules/engine.py b/st2reactor/st2reactor/rules/engine.py index ac8b6e190a..d11e8a7b38 100644 --- a/st2reactor/st2reactor/rules/engine.py +++ b/st2reactor/st2reactor/rules/engine.py @@ -74,13 +74,12 @@ def create_rule_enforcers(self, trigger_instance, matching_rules): This method is trigger_instance specific therefore if creation of 1 RuleEnforcer fails it is likely that all wil be broken. """ + metrics_driver = get_driver() + enforcers = [] for matching_rule in matching_rules: - get_driver().inc_counter( - format_metrics_key( - key='rule.%s' % matching_rule - ) - ) + driver.inc_counter(format_metrics_key('rule.matched')) + driver.inc_counter(format_metrics_key(key='rule.%s.matched' % (matching_rule.ref))) enforcers.append(RuleEnforcer(trigger_instance, matching_rule)) return enforcers diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index 72da3aad3f..00295ccdc9 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -61,14 +61,13 @@ def pre_ack_process(self, message): return self._compose_pre_ack_process_response(trigger_instance, message) def process(self, pre_ack_response): - trigger_instance, message = self._decompose_pre_ack_process_response(pre_ack_response) if not trigger_instance: raise ValueError('No trigger_instance provided for processing.') get_driver().inc_counter( format_metrics_key( - key='trigger.%s' % (trigger_instance.trigger) + key='trigger.%s.processed' % (trigger_instance.trigger) ) ) @@ -91,8 +90,8 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - with CounterWithTimer(key="st2.rule.processed"): - with Timer(key='st2.rule.processed.trigger_instance.%s' % (trigger_instance.id)): + with CounterWithTimer(key=format_metrics_key('rule.processed')): + with Timer(key='st2.trigger_instance.%s.processed' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) container_utils.update_trigger_instance_status( From 9f8cb89c37cf1b13f290401871f2144a7c257c6e Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 20 Aug 2018 15:54:18 +0200 Subject: [PATCH 14/38] Use consistent method names. --- st2common/st2common/metrics/base.py | 4 ++-- .../st2common/metrics/drivers/prometheus_driver.py | 4 ++-- st2common/st2common/metrics/drivers/statsd_driver.py | 4 ++-- st2common/st2common/middleware/instrumentation.py | 4 ++-- st2common/tests/unit/test_metrics.py | 12 ++++++------ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index d2cb02d9f0..bc656a5211 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -124,13 +124,13 @@ def set_gauge(self, key, value): """ pass - def incr_gauge(self, key, amount=1): + def inc_gauge(self, key, amount=1): """ Increment gauge value. """ pass - def decr_gauge(self, key, amount=1): + def dec_gauge(self, key, amount=1): """ Decrement gauge value. """ diff --git a/st2common/st2common/metrics/drivers/prometheus_driver.py b/st2common/st2common/metrics/drivers/prometheus_driver.py index ea67449ded..baabe8ef25 100644 --- a/st2common/st2common/metrics/drivers/prometheus_driver.py +++ b/st2common/st2common/metrics/drivers/prometheus_driver.py @@ -70,7 +70,7 @@ def set_gauge(self, key, value): gauge = Gauge(key) gauge.set(value) - def incr_gauge(self, key, amount=1): + def inc_gauge(self, key, amount=1): """ Increment gauge value. """ @@ -80,7 +80,7 @@ def incr_gauge(self, key, amount=1): gauge = Gauge(key) gauge.incr(amount) - def decr_gauge(self, key, amount=1): + def dec_gauge(self, key, amount=1): """ Decrement gauge value. """ diff --git a/st2common/st2common/metrics/drivers/statsd_driver.py b/st2common/st2common/metrics/drivers/statsd_driver.py index 47c5322544..56927f6b14 100644 --- a/st2common/st2common/metrics/drivers/statsd_driver.py +++ b/st2common/st2common/metrics/drivers/statsd_driver.py @@ -68,7 +68,7 @@ def set_gauge(self, key, value): gauge = statsd.Gauge(key) gauge.send(None, value) - def incr_gauge(self, key, amount=1): + def inc_gauge(self, key, amount=1): """ Increment gauge value. """ @@ -78,7 +78,7 @@ def incr_gauge(self, key, amount=1): gauge = statsd.Gauge(key) gauge.increment(None, amount) - def decr_gauge(self, key, amount=1): + def dec_gauge(self, key, amount=1): """ Decrement gauge value. """ diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index 39905e37f4..241b5f34eb 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -76,8 +76,8 @@ def custom_start_response(status, headers, exc_info=None): status_code = int(status.split(' ')[0]) metrics_driver = get_driver() - metrics_driver.incr_gauge('st2.%s.response.status.%s' % (self._service_name, - status_code)) + metrics_driver.inc_counter('st2.%s.response.status.%s' % (self._service_name, + status_code)) return start_response(status, headers, exc_info) diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index d9c9ed427d..783f759761 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -150,31 +150,31 @@ def test_set_gauge_success(self, statsd): mock_gauge.assert_called_once_with(None, params[1]) @patch('st2common.metrics.drivers.statsd_driver.statsd') - def test_incr_gauge_success(self, statsd): + def test_inc_gauge_success(self, statsd): params = ('key1',) mock_gauge = MagicMock() statsd.Gauge().increment.side_effect = mock_gauge - self._driver.incr_gauge(*params) + self._driver.inc_gauge(*params) mock_gauge.assert_called_once_with(None, 1) params = ('key2', 100) mock_gauge = MagicMock() statsd.Gauge().increment.side_effect = mock_gauge - self._driver.incr_gauge(*params) + self._driver.inc_gauge(*params) mock_gauge.assert_called_once_with(None, params[1]) @patch('st2common.metrics.drivers.statsd_driver.statsd') - def test_decr_gauge_success(self, statsd): + def test_dec_gauge_success(self, statsd): params = ('key1',) mock_gauge = MagicMock() statsd.Gauge().decrement.side_effect = mock_gauge - self._driver.decr_gauge(*params) + self._driver.dec_gauge(*params) mock_gauge.assert_called_once_with(None, 1) params = ('key2', 100) mock_gauge = MagicMock() statsd.Gauge().decrement.side_effect = mock_gauge - self._driver.decr_gauge(*params) + self._driver.dec_gauge(*params) mock_gauge.assert_called_once_with(None, params[1]) From 27e85dcfb8a9f7ab130558c56adcbf1e56d0a70c Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 20 Aug 2018 16:05:41 +0200 Subject: [PATCH 15/38] Fix method arguments. --- st2common/st2common/services/action.py | 2 +- st2common/st2common/util/action_db.py | 4 ++-- st2reactor/st2reactor/rules/engine.py | 4 ++-- st2reactor/st2reactor/rules/worker.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/st2common/st2common/services/action.py b/st2common/st2common/services/action.py index 1cbdb42bf9..d0443df11c 100644 --- a/st2common/st2common/services/action.py +++ b/st2common/st2common/services/action.py @@ -136,7 +136,7 @@ def create_request(liveaction): trace_service.get_trace_component_for_action_execution(execution, liveaction) ]) - get_driver().inc_counter(format_metrics_key('action.executions.%s' % (liveaction.status))) + get_driver().inc_counter(format_metrics_key(key='action.executions.%s' % (liveaction.status))) return liveaction, execution diff --git a/st2common/st2common/util/action_db.py b/st2common/st2common/util/action_db.py index 043a69d719..2d1d2dadcd 100644 --- a/st2common/st2common/util/action_db.py +++ b/st2common/st2common/util/action_db.py @@ -197,13 +197,13 @@ def update_liveaction_status(status=None, result=None, context=None, end_timesta # If liveaction_db status is set then we need to decrement the counter # because it is transitioning to a new state if liveaction_db.status: - get_driver().dec_counter(format_metrics_key('action.executions.%s' % + get_driver().dec_counter(format_metrics_key(key='action.executions.%s' % (liveaction_db.status))) # If status is provided then we need to increment the timer because the action # is transitioning into this new state if status: - get_driver().dec_counter(format_metrics_key('action.executions.%s' % (status))) + get_driver().dec_counter(format_metrics_key(key='action.executions.%s' % (status))) extra = {'liveaction_db': liveaction_db} LOG.debug('Updating ActionExection: "%s" with status="%s"', liveaction_db.id, status, diff --git a/st2reactor/st2reactor/rules/engine.py b/st2reactor/st2reactor/rules/engine.py index d11e8a7b38..ef734685ce 100644 --- a/st2reactor/st2reactor/rules/engine.py +++ b/st2reactor/st2reactor/rules/engine.py @@ -78,8 +78,8 @@ def create_rule_enforcers(self, trigger_instance, matching_rules): enforcers = [] for matching_rule in matching_rules: - driver.inc_counter(format_metrics_key('rule.matched')) - driver.inc_counter(format_metrics_key(key='rule.%s.matched' % (matching_rule.ref))) + metrics_driver.inc_counter(format_metrics_key(key='rule.matched')) + metrics_driver.inc_counter(format_metrics_key(key='rule.%s.matched' % (matching_rule.ref))) enforcers.append(RuleEnforcer(trigger_instance, matching_rule)) return enforcers diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index 00295ccdc9..33e3386075 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -90,7 +90,7 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - with CounterWithTimer(key=format_metrics_key('rule.processed')): + with CounterWithTimer(key=format_metrics_key(key='rule.processed')): with Timer(key='st2.trigger_instance.%s.processed' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) From 478744e94f28d4a317d12d39d3d4a00ce5efc5ca Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 20 Aug 2018 16:11:04 +0200 Subject: [PATCH 16/38] Get rid of format_metric_key() function calls which provide no value and just cause additional confusion and harder to actually find out the actual metric name. --- st2actions/st2actions/container/base.py | 7 +++---- st2common/st2common/services/action.py | 4 ++-- st2common/st2common/util/action_db.py | 7 +++---- st2reactor/st2reactor/rules/engine.py | 6 +++--- st2reactor/st2reactor/rules/worker.py | 10 +++------- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/st2actions/st2actions/container/base.py b/st2actions/st2actions/container/base.py index 6667fa8a77..a40e42ba14 100644 --- a/st2actions/st2actions/container/base.py +++ b/st2actions/st2actions/container/base.py @@ -33,7 +33,7 @@ from st2common.util.action_db import (update_liveaction_status, get_liveaction_by_id) from st2common.util import param as param_utils from st2common.util.config_loader import ContentPackConfigLoader -from st2common.metrics.base import CounterWithTimer, format_metrics_key +from st2common.metrics.base import CounterWithTimer from st2common.util import jsonify from st2common.runners.base import get_runner @@ -122,9 +122,8 @@ def _do_run(self, runner): extra = {'runner': runner, 'parameters': resolved_action_params} LOG.debug('Performing run for runner: %s' % (runner.runner_id), extra=extra) - with CounterWithTimer(key=format_metrics_key('action.executions')): - with CounterWithTimer(key=format_metrics_key('action.%s.executions' % - (runner.action.ref))): + with CounterWithTimer(key=('st2.action.executions')): + with CounterWithTimer(key=('st2.action.%s.executions' % (runner.action.ref))): (status, result, context) = runner.run(action_params) result = jsonify.try_loads(result) diff --git a/st2common/st2common/services/action.py b/st2common/st2common/services/action.py index d0443df11c..11961514d7 100644 --- a/st2common/st2common/services/action.py +++ b/st2common/st2common/services/action.py @@ -63,7 +63,7 @@ def create_request(liveaction): """ # We import this here to avoid conflicts w/ runners that might import this # file since the runners don't have the config context by default. - from st2common.metrics.base import get_driver, format_metrics_key + from st2common.metrics.base import get_driver # Use the user context from the parent action execution. Subtasks in a workflow # action can be invoked by a system user and so we want to use the user context @@ -136,7 +136,7 @@ def create_request(liveaction): trace_service.get_trace_component_for_action_execution(execution, liveaction) ]) - get_driver().inc_counter(format_metrics_key(key='action.executions.%s' % (liveaction.status))) + get_driver().inc_counter('st2.action.executions.%s' % (liveaction.status)) return liveaction, execution diff --git a/st2common/st2common/util/action_db.py b/st2common/st2common/util/action_db.py index 2d1d2dadcd..b41aefef77 100644 --- a/st2common/st2common/util/action_db.py +++ b/st2common/st2common/util/action_db.py @@ -30,7 +30,7 @@ from st2common.persistence.action import Action from st2common.persistence.liveaction import LiveAction from st2common.persistence.runner import RunnerType -from st2common.metrics.base import format_metrics_key, get_driver +from st2common.metrics.base import get_driver LOG = logging.getLogger(__name__) @@ -197,13 +197,12 @@ def update_liveaction_status(status=None, result=None, context=None, end_timesta # If liveaction_db status is set then we need to decrement the counter # because it is transitioning to a new state if liveaction_db.status: - get_driver().dec_counter(format_metrics_key(key='action.executions.%s' % - (liveaction_db.status))) + get_driver().dec_counter('st2.action.executions.%s' % (liveaction_db.status)) # If status is provided then we need to increment the timer because the action # is transitioning into this new state if status: - get_driver().dec_counter(format_metrics_key(key='action.executions.%s' % (status))) + get_driver().dec_counter('st2.action.executions.%s' % (status)) extra = {'liveaction_db': liveaction_db} LOG.debug('Updating ActionExection: "%s" with status="%s"', liveaction_db.id, status, diff --git a/st2reactor/st2reactor/rules/engine.py b/st2reactor/st2reactor/rules/engine.py index ef734685ce..dbf98f839d 100644 --- a/st2reactor/st2reactor/rules/engine.py +++ b/st2reactor/st2reactor/rules/engine.py @@ -19,7 +19,7 @@ from st2common.services.triggers import get_trigger_db_by_ref from st2reactor.rules.enforcer import RuleEnforcer from st2reactor.rules.matcher import RulesMatcher -from st2common.metrics.base import format_metrics_key, get_driver +from st2common.metrics.base import get_driver LOG = logging.getLogger('st2reactor.rules.RulesEngine') @@ -78,8 +78,8 @@ def create_rule_enforcers(self, trigger_instance, matching_rules): enforcers = [] for matching_rule in matching_rules: - metrics_driver.inc_counter(format_metrics_key(key='rule.matched')) - metrics_driver.inc_counter(format_metrics_key(key='rule.%s.matched' % (matching_rule.ref))) + metrics_driver.inc_counter('st2.rule.matched') + metrics_driver.inc_counter('st2.rule.%s.matched' % (matching_rule.ref)) enforcers.append(RuleEnforcer(trigger_instance, matching_rule)) return enforcers diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index 33e3386075..d81a04a1f5 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -29,7 +29,7 @@ from st2common.transport.queues import RULESENGINE_WORK_QUEUE from st2common.metrics.base import CounterWithTimer from st2common.metrics.base import Timer -from st2common.metrics.base import format_metrics_key, get_driver +from st2common.metrics.base import get_driver LOG = logging.getLogger(__name__) @@ -65,11 +65,7 @@ def process(self, pre_ack_response): if not trigger_instance: raise ValueError('No trigger_instance provided for processing.') - get_driver().inc_counter( - format_metrics_key( - key='trigger.%s.processed' % (trigger_instance.trigger) - ) - ) + get_driver().inc_counter('st2.trigger.%s.processed' % (trigger_instance.trigger)) try: # Use trace_context from the message and if not found create a new context @@ -90,7 +86,7 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - with CounterWithTimer(key=format_metrics_key(key='rule.processed')): + with CounterWithTimer(key='st2.rule.processed'): with Timer(key='st2.trigger_instance.%s.processed' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) From 4086b5c235015aa2e0b41d245cafc0148e1fa953 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Mon, 20 Aug 2018 16:15:58 +0200 Subject: [PATCH 17/38] Fix typo. --- st2common/st2common/metrics/drivers/echo_driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/metrics/drivers/echo_driver.py b/st2common/st2common/metrics/drivers/echo_driver.py index 7d9faa2bd9..55878e9b9a 100644 --- a/st2common/st2common/metrics/drivers/echo_driver.py +++ b/st2common/st2common/metrics/drivers/echo_driver.py @@ -34,7 +34,7 @@ def time(self, key, time): def inc_counter(self, key, amount=1): LOG.debug('[metrics] counter.incr(%s, %s)' % (key, amount)) - def decr_counter(self, key, amount=1): + def dec_counter(self, key, amount=1): LOG.debug('[metrics] counter.decr(%s, %s)' % (key, amount)) def set_gauge(self, key, value): @@ -43,5 +43,5 @@ def set_gauge(self, key, value): def inc_gauge(self, key, amount=1): LOG.debug('[metrics] gauge.incr(%s, %s)' % (key, amount)) - def decr_gauge(self, key, amount=1): + def dec_gauge(self, key, amount=1): LOG.debug('[metrics] gauge.decr(%s, %s)' % (key, amount)) From 55b16f94a18c2d041f046b76f4f67d18745757a3 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 11:28:03 +0200 Subject: [PATCH 18/38] Reduce code duplication. --- st2common/st2common/metrics/base.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index bc656a5211..bb7972dbab 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -150,15 +150,17 @@ class Timer(object): """ def __init__(self, key, include_parameter=False): check_key(key) + self.key = key self._metrics = get_driver() self._include_parameter = include_parameter self._start_time = None def send_time(self, key=None): - """ Send current time from start time. """ - time_delta = get_datetime_utc_now() - self._start_time + Send current time from start time. + """ + time_delta = self.get_time_delta() if key: check_key(key) @@ -167,8 +169,10 @@ def send_time(self, key=None): self._metrics.time(self.key, time_delta.total_seconds()) def get_time_delta(self): - """ Get current time delta. """ + Get current time delta. + """ + return get_datetime_utc_now() - self._start_time def __enter__(self): @@ -213,9 +217,11 @@ def wrapper(*args, **kw): class CounterWithTimer(object): - """ Timer and counter context manager for easily sending timer statistics + """ + Timer and counter context manager for easily sending counter statistics with builtin timer. """ + def __init__(self, key, include_parameter=False): check_key(key) self.key = key @@ -224,9 +230,10 @@ def __init__(self, key, include_parameter=False): self._start_time = None def send_time(self, key=None): - """ Send current time from start time. """ - time_delta = get_datetime_utc_now() - self._start_time + Send current time from start time. + """ + time_delta = self.get_time_delta() if key: check_key(key) @@ -236,7 +243,8 @@ def send_time(self, key=None): time_delta.total_seconds()) def get_time_delta(self): - """ Get current time delta. + """ + Get current time delta. """ return get_datetime_utc_now() - self._start_time From abe6ca18641460ae00ec23f72a0d4a68d8033887 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 11:36:17 +0200 Subject: [PATCH 19/38] Don't decrease counter value on context manager exit. statsd counters are of a special type which is aggregated, sampled and calculated into rate so decreasing those will result in invalid / unexpected values. Decreasing them would only make sense if statsd wouldn't do any processing on them and treat them as raw values (e.g. gauges). --- st2common/st2common/metrics/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index bb7972dbab..a8234cf1df 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -205,9 +205,6 @@ def __enter__(self): self._metrics.inc_counter(self.key) return self - def __exit__(self, *args): - self._metrics.dec_counter(self.key) - def __call__(self, func): @wraps(func) def wrapper(*args, **kw): @@ -255,7 +252,6 @@ def __enter__(self): def __exit__(self, *args): self.send_time() - self._metrics.dec_counter("%s_counter" % (self.key)) def __call__(self, func): @wraps(func) From 74efba2b5d4f3c11636ff60dd676705205fa2216 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 11:40:01 +0200 Subject: [PATCH 20/38] Increase _counter and _timer suffixes since statsd already correctly groups metrics based on the type so the suffix is redundant. --- st2common/st2common/constants/metrics.py | 3 --- st2common/st2common/metrics/base.py | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/st2common/st2common/constants/metrics.py b/st2common/st2common/constants/metrics.py index 0604680d15..e50f57c023 100644 --- a/st2common/st2common/constants/metrics.py +++ b/st2common/st2common/constants/metrics.py @@ -13,9 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -METRICS_COUNTER_SUFFIX = "_counter" -METRICS_TIMER_SUFFIX = "_timer" - PYTHON_RUNNER_EXECUTION = "python_runner_execution" PYTHON_WRAPPER_EXECUTION = "python_wrapper_execution" diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index a8234cf1df..3cf705e01d 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -21,7 +21,6 @@ from oslo_config.cfg import NoSuchOptError from stevedore.exception import NoMatches, MultipleMatches -from st2common.constants.metrics import METRICS_COUNTER_SUFFIX, METRICS_TIMER_SUFFIX from st2common.util.loader import get_plugin_instance from st2common.util.date import get_datetime_utc_now from st2common.exceptions.plugins import PluginLoadError @@ -236,8 +235,7 @@ def send_time(self, key=None): check_key(key) self._metrics.time(key, time_delta.total_seconds()) else: - self._metrics.time("%s%s" % (self.key, METRICS_TIMER_SUFFIX), - time_delta.total_seconds()) + self._metrics.time(self.key, time_delta.total_seconds()) def get_time_delta(self): """ @@ -246,7 +244,7 @@ def get_time_delta(self): return get_datetime_utc_now() - self._start_time def __enter__(self): - self._metrics.inc_counter("%s%s" % (self.key, METRICS_COUNTER_SUFFIX)) + self._metrics.inc_counter(self.key) self._start_time = get_datetime_utc_now() return self From 8354248d0f712735e7f832744fc9d932ad6e2dd1 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 12:07:15 +0200 Subject: [PATCH 21/38] Remove unused module. --- st2common/st2common/constants/metrics.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 st2common/st2common/constants/metrics.py diff --git a/st2common/st2common/constants/metrics.py b/st2common/st2common/constants/metrics.py deleted file mode 100644 index e50f57c023..0000000000 --- a/st2common/st2common/constants/metrics.py +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the StackStorm, Inc ('StackStorm') under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -PYTHON_RUNNER_EXECUTION = "python_runner_execution" -PYTHON_WRAPPER_EXECUTION = "python_wrapper_execution" - -METRICS_REGISTER_RUNNER = "register_runner" From 82772cc64026b83792a344561844c4dcb09946e7 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 12:24:00 +0200 Subject: [PATCH 22/38] Remove unused driver for now since it's just causing confusion. --- .../metrics/drivers/prometheus_driver.py | 91 ------------------- 1 file changed, 91 deletions(-) delete mode 100644 st2common/st2common/metrics/drivers/prometheus_driver.py diff --git a/st2common/st2common/metrics/drivers/prometheus_driver.py b/st2common/st2common/metrics/drivers/prometheus_driver.py deleted file mode 100644 index baabe8ef25..0000000000 --- a/st2common/st2common/metrics/drivers/prometheus_driver.py +++ /dev/null @@ -1,91 +0,0 @@ -# Licensed to the StackStorm, Inc ('StackStorm') under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from numbers import Number - -from prometheus_client import Histogram, Gauge - -from st2common.metrics.base import BaseMetricsDriver -from st2common.metrics.base import check_key - -__all__ = [ - 'PrometheusDriver' -] - - -class PrometheusDriver(BaseMetricsDriver): - """ - Base class for driver implementations for metric collection - """ - - def __init__(self): - pass - - def time(self, key, time): - """ - Timer metric - """ - prometheus_histogram = Histogram( # pylint: disable=no-value-for-parameter - key - ) - prometheus_histogram.observe(time) - - def inc_counter(self, key, amount=1): - """ - Increment counter - """ - prometheus_counter = Gauge( # pylint: disable=no-value-for-parameter - key - ) - prometheus_counter.inc(amount) - - def dec_counter(self, key, amount=1): - """ - Decrement metric - """ - prometheus_counter = Gauge( # pylint: disable=no-value-for-parameter - key - ) - prometheus_counter.dec(amount) - - def set_gauge(self, key, value): - """ - Set gauge value. - """ - check_key(key) - assert isinstance(value, Number) - - gauge = Gauge(key) - gauge.set(value) - - def inc_gauge(self, key, amount=1): - """ - Increment gauge value. - """ - check_key(key) - assert isinstance(amount, Number) - - gauge = Gauge(key) - gauge.incr(amount) - - def dec_gauge(self, key, amount=1): - """ - Decrement gauge value. - """ - check_key(key) - assert isinstance(amount, Number) - - gauge = Gauge(key) - gauge.decr(amount) From e2883d40a5c4bd49676a91da0a9aab54231634fe Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 12:26:47 +0200 Subject: [PATCH 23/38] Fix metric name. --- st2reactor/st2reactor/rules/worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index d81a04a1f5..a95c50751e 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -87,7 +87,7 @@ def process(self, pre_ack_response): trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) with CounterWithTimer(key='st2.rule.processed'): - with Timer(key='st2.trigger_instance.%s.processed' % (trigger_instance.id)): + with Timer(key='st2.triggerinstance.%s.processed' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) container_utils.update_trigger_instance_status( From 0110721b648264dcd7d9bfe5f1d92a8403301e53 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 12:38:05 +0200 Subject: [PATCH 24/38] Update affected tests. --- st2common/st2common/metrics/base.py | 3 +++ st2common/tests/unit/test_metrics.py | 24 ++++++------------------ 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 3cf705e01d..1bb6848e54 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -204,6 +204,9 @@ def __enter__(self): self._metrics.inc_counter(self.key) return self + def __exit__(self, *args): + pass + def __call__(self, func): @wraps(func) def wrapper(*args, **kw): diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index 783f759761..819f677a0e 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -21,7 +21,6 @@ from st2common.metrics import base from st2common.metrics.drivers.statsd_driver import StatsdDriver -from st2common.constants.metrics import METRICS_COUNTER_SUFFIX, METRICS_TIMER_SUFFIX from st2common.util.date import get_datetime_utc_now __all__ = [ @@ -185,7 +184,6 @@ def test_counter(self, metrics_patch): with base.Counter(test_key): metrics_patch.inc_counter.assert_called_with(test_key) metrics_patch.dec_counter.assert_not_called() - metrics_patch.dec_counter.assert_called_with(test_key) class TestTimerContextManager(unittest2.TestCase): @@ -249,8 +247,7 @@ def test_time(self, metrics_patch, datetime_patch): self.assertTrue(isinstance(timer._start_time, datetime)) metrics_patch.time.assert_not_called() timer.send_time() - metrics_patch.time.assert_called_with( - "%s%s" % (test_key, METRICS_TIMER_SUFFIX), + metrics_patch.time.assert_called_with(test_key, (self.end_time - self.middle_time).total_seconds() ) second_test_key = "lakshmi_has_a_nose" @@ -264,13 +261,10 @@ def test_time(self, metrics_patch, datetime_patch): time_delta.total_seconds(), (self.end_time - self.middle_time).total_seconds() ) - metrics_patch.inc_counter.assert_called_with( - "%s%s" % (test_key, METRICS_COUNTER_SUFFIX) - ) + metrics_patch.inc_counter.assert_called_with(test_key) metrics_patch.dec_counter.assert_not_called() - metrics_patch.dec_counter.assert_called_with("%s%s" % (test_key, METRICS_COUNTER_SUFFIX)) metrics_patch.time.assert_called_with( - "%s%s" % (test_key, METRICS_TIMER_SUFFIX), + test_key, (self.end_time - self.start_time).total_seconds() ) @@ -296,8 +290,7 @@ def _get_tested(metrics_counter_with_timer=None): self.assertTrue(isinstance(metrics_counter_with_timer._start_time, datetime)) metrics_patch.time.assert_not_called() metrics_counter_with_timer.send_time() - metrics_patch.time.assert_called_with( - "%s%s" % (test_key, METRICS_TIMER_SUFFIX), + metrics_patch.time.assert_called_with(test_key, (end_time - middle_time).total_seconds() ) second_test_key = "lakshmi_has_a_nose" @@ -311,16 +304,12 @@ def _get_tested(metrics_counter_with_timer=None): time_delta.total_seconds(), (end_time - middle_time).total_seconds() ) - metrics_patch.inc_counter.assert_called_with( - "%s%s" % (test_key, METRICS_COUNTER_SUFFIX) - ) + metrics_patch.inc_counter.assert_called_with(test_key) metrics_patch.dec_counter.assert_not_called() _get_tested() - metrics_patch.dec_counter.assert_called_with("%s%s" % (test_key, METRICS_COUNTER_SUFFIX)) - metrics_patch.time.assert_called_with( - "%s%s" % (test_key, METRICS_TIMER_SUFFIX), + metrics_patch.time.assert_called_with(test_key, (end_time - start_time).total_seconds() ) @@ -335,7 +324,6 @@ def _get_tested(): metrics_patch.inc_counter.assert_called_with(test_key) metrics_patch.dec_counter.assert_not_called() _get_tested() - metrics_patch.dec_counter.assert_called_with(test_key) class TestTimerDecorator(unittest2.TestCase): From efb78560f7a911fad0e6e299c42b3a6a09809bee Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 21 Aug 2018 12:40:30 +0200 Subject: [PATCH 25/38] Update changelog. --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a6107c7374..b389770a50 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -83,6 +83,8 @@ Changed ``st2rulesengine`` service. This would make such issues very hard to troubleshoot because only way to find out about this failure would be to inspect the ``st2rulesengine`` service logs. (improvement) #4231 +* Improve code metric instrumentation and instrument code and various services with more metrics. + improvement) #4310 Fixed ~~~~~ From ebc124547b0f230702b6215ccc8c048457ab9d7d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 12:56:37 +0200 Subject: [PATCH 26/38] Add sample statsd config. --- conf/metrics.sample/statsd.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 conf/metrics.sample/statsd.js diff --git a/conf/metrics.sample/statsd.js b/conf/metrics.sample/statsd.js new file mode 100644 index 0000000000..44cb9e03ec --- /dev/null +++ b/conf/metrics.sample/statsd.js @@ -0,0 +1,19 @@ +// Sample statsd config for usage with metrics instrumentation +{ + // IP and port of a local or remote graphite instance to which statsd will + // submit metrics + graphiteHost: "127.0.0.1", + graphitePort: 2003, + + // statsd listen IP and port + address: "0.0.0.0", + port: 8125, + + // Enable debug mode for easier debugging, disable in production + debug: true, + + // Disable legacy name prefix + graphite: { + legacyNamespace: false + } +} From cacaa42fba7f102f6ae01d9ebd416b10d6ae94b5 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 12:59:07 +0200 Subject: [PATCH 27/38] Add sample metrics configs for statsd config and carbon cache. --- conf/metrics/carbon/storage-aggregation.conf | 52 +++++++++++++++++++ conf/metrics/carbon/storage-schemas.conf | 20 +++++++ .../statsd/localConfig.json} | 0 3 files changed, 72 insertions(+) create mode 100644 conf/metrics/carbon/storage-aggregation.conf create mode 100644 conf/metrics/carbon/storage-schemas.conf rename conf/{metrics.sample/statsd.js => metrics/statsd/localConfig.json} (100%) diff --git a/conf/metrics/carbon/storage-aggregation.conf b/conf/metrics/carbon/storage-aggregation.conf new file mode 100644 index 0000000000..08ad358d20 --- /dev/null +++ b/conf/metrics/carbon/storage-aggregation.conf @@ -0,0 +1,52 @@ +# Aggregation methods for whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds +# +# [name] +# pattern = +# xFilesFactor = +# aggregationMethod = +# +# name: Arbitrary unique name for the rule +# pattern: Regex pattern to match against the metric name +# xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur +# aggregationMethod: function to apply to data points for aggregation +# +[min] +pattern = \.min$ +xFilesFactor = 0.1 +aggregationMethod = min + +[max] +pattern = \.max$ +xFilesFactor = 0.1 +aggregationMethod = max + +[count] +pattern = \.count$ +xFilesFactor = 0 +aggregationMethod = sum + +[count_legacy] +pattern = ^stats_counts.* +xFilesFactor = 0 +aggregationMethod = sum + +[lower] +pattern = \.lower(_\d+)?$ +xFilesFactor = 0.1 +aggregationMethod = min + +[upper] +pattern = \.upper(_\d+)?$ +xFilesFactor = 0.1 +aggregationMethod = max + +[sum] +pattern = \.sum$ +xFilesFactor = 0 +aggregationMethod = sum + +[default_average] +pattern = .* +xFilesFactor = 0.5 +aggregationMethod = average diff --git a/conf/metrics/carbon/storage-schemas.conf b/conf/metrics/carbon/storage-schemas.conf new file mode 100644 index 0000000000..aa7fccde9c --- /dev/null +++ b/conf/metrics/carbon/storage-schemas.conf @@ -0,0 +1,20 @@ +# Schema definitions for Whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds. +# +# [name] +# pattern = regex +# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... + +# Carbon's internal metrics. This entry should match what is specified in +# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings +[stats] +pattern = ^stats.* +retentions = 10s:1d,1m:7d,10m:1y + +[carbon] +pattern = ^carbon\. +retentions = 60:90d + +[default_1min_for_1day] +pattern = .* +retentions = 60s:1d diff --git a/conf/metrics.sample/statsd.js b/conf/metrics/statsd/localConfig.json similarity index 100% rename from conf/metrics.sample/statsd.js rename to conf/metrics/statsd/localConfig.json From 7d04e4eef1d6e67110cbd1f8d9a58b4617016559 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 13:00:42 +0200 Subject: [PATCH 28/38] Fix file extension. --- conf/metrics/statsd/{localConfig.json => localConfig.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename conf/metrics/statsd/{localConfig.json => localConfig.js} (100%) diff --git a/conf/metrics/statsd/localConfig.json b/conf/metrics/statsd/localConfig.js similarity index 100% rename from conf/metrics/statsd/localConfig.json rename to conf/metrics/statsd/localConfig.js From ee8996f799ca475d9dea28db70bb21596f42f1fd Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 13:47:32 +0200 Subject: [PATCH 29/38] Add new metrics.prefix config option. This option can specify an optional prefix which is prepended to each metric key / name. This comes handy when you want to use the same statsd or other backend instance for multiple environments (each environment would specify a different prefix). --- st2common/st2common/config.py | 4 +++ .../metrics/drivers/statsd_driver.py | 28 +++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/st2common/st2common/config.py b/st2common/st2common/config.py index d712c313bf..5b7c0c94b7 100644 --- a/st2common/st2common/config.py +++ b/st2common/st2common/config.py @@ -538,6 +538,10 @@ def register_opts(ignore_errors=False): cfg.IntOpt( 'port', default=8125, help='Destination port to connect to if driver requires connection.'), + cfg.StrOpt( + 'prefix', default=None, + help='Prefix for all the metrics name. Comes handy when you want to submit metrics ' + 'from various environment to the same backend instance.') ] do_register_opts(metrics_opts, group='metrics', ignore_errors=ignore_errors) diff --git a/st2common/st2common/metrics/drivers/statsd_driver.py b/st2common/st2common/metrics/drivers/statsd_driver.py index 56927f6b14..0936c6c7e7 100644 --- a/st2common/st2common/metrics/drivers/statsd_driver.py +++ b/st2common/st2common/metrics/drivers/statsd_driver.py @@ -16,6 +16,7 @@ from numbers import Number import statsd +import six from oslo_config import cfg from st2common.metrics.base import BaseMetricsDriver @@ -32,18 +33,37 @@ class StatsdDriver(BaseMetricsDriver): """ def __init__(self): statsd.Connection.set_defaults(host=cfg.CONF.metrics.host, port=cfg.CONF.metrics.port) + + original_send = statsd.Connection.send + + def send_with_prefix(self, data, *args, **kwargs): + # If defined, include (set) prefix for each metric key (name) + if not cfg.CONF.metrics.prefix: + return original_send(self, data, *args, **kwargs) + + data_with_prefix = {} + for key, value in six.iteritems(data): + key = '%s.%s' % (cfg.CONF.metrics.prefix, key) + data_with_prefix[key] = value + + original_send(self, data_with_prefix, *args, **kwargs) + statsd.Connection.send = send_with_prefix + self._counters = {} self._timer = statsd.Timer('') def time(self, key, time): - """ Timer metric + """ + Timer metric """ check_key(key) assert isinstance(time, Number) + self._timer.send(key, time) def inc_counter(self, key, amount=1): - """ Increment counter + """ + Increment counter """ check_key(key) assert isinstance(amount, Number) @@ -51,10 +71,12 @@ def inc_counter(self, key, amount=1): self._counters[key] += amount def dec_counter(self, key, amount=1): - """ Decrement metric + """ + Decrement metric """ check_key(key) assert isinstance(amount, Number) + self._counters[key] = self._counters.get(key, statsd.Counter(key)) self._counters[key] -= amount From eb2646dd2176934838e4976f4345b76a7e633f47 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 13:50:45 +0200 Subject: [PATCH 30/38] Add changelog entry. --- CHANGELOG.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b389770a50..7e5734a415 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -83,8 +83,12 @@ Changed ``st2rulesengine`` service. This would make such issues very hard to troubleshoot because only way to find out about this failure would be to inspect the ``st2rulesengine`` service logs. (improvement) #4231 -* Improve code metric instrumentation and instrument code and various services with more metrics. - improvement) #4310 +* Improve code metric instrumentation and instrument code and various services with more metrics. + (improvement) #4310 +* Add new ``metrics.prefix`` config option. With this option user can specify an optional prefix + which is prepended to each metric key (name). This comes handy in scenarios where user wants to + submit metrics from multiple environments / deployments (e.g. testing, staging, dev) to the same + backend instance. (improvement) #4310 Fixed ~~~~~ From 673247150211918548f629e484aa7df2848a5f9d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 15:44:57 +0200 Subject: [PATCH 31/38] Remove unused code. --- st2common/st2common/metrics/base.py | 47 ----------------- st2common/tests/unit/test_metrics.py | 76 ---------------------------- 2 files changed, 123 deletions(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 1bb6848e54..08fbd41d8a 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -47,53 +47,6 @@ METRICS = None -def _strip_pack(action, pack): - formatted_pack = "%s." % (pack) - - if formatted_pack in action: - return action.replace(formatted_pack, '') - - return action - - -def _format_metrics_key_for_action_db(action_db): - action_pack = action_db.pack if action_db.pack else 'unknown' - action_name = _strip_pack(action_db.name, action_pack) - return [action_pack, action_name] - - -def _format_metrics_key_for_liveaction_db(liveaction_db): - action_pack = liveaction_db.context.get('pack', 'unknown') - action_name = _strip_pack(liveaction_db.action, action_pack) - return [action_pack, action_name] - - -def format_metrics_key(action_db=None, liveaction_db=None, key=None): - """ - Return a string for usage as metrics key. - """ - assert (action_db or key or liveaction_db), """Must supply one of key, action_db, or - liveaction_db""" - metrics_key_items = ['st2'] - - if action_db: - metrics_key_items.extend(_format_metrics_key_for_action_db(action_db)) - - if liveaction_db: - metrics_key_items.extend( - _format_metrics_key_for_liveaction_db(liveaction_db) - ) - - if key: - metrics_key_items.append('%s' % key) - - metrics_key = '.'.join(metrics_key_items) - - LOG.debug("Generated Metrics Key: %s", metrics_key) - - return metrics_key - - class BaseMetricsDriver(object): """ Base class for driver implementations for metric collection diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index 819f677a0e..ea3614a9f6 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -367,79 +367,3 @@ def _get_tested(metrics_timer=None): test_key, (end_time - start_time).total_seconds() ) - - -class TestFormatMetrics(unittest2.TestCase): - def test_format_metrics_liveaction_db_without_key(self): - pack = 'test' - action = 'lakface' - - liveaction_db = MagicMock() - liveaction_db.context = {'pack': pack} - liveaction_db.action = action - - key = base.format_metrics_key(liveaction_db=liveaction_db) - - self.assertEquals(key, "st2.%s.%s" % (pack, action)) - - def test_format_metrics_liveaction_db_with_key(self): - pack = 'test' - action = 'lakface' - test_key = 'meh' - - liveaction_db = MagicMock() - liveaction_db.context = {'pack': pack} - liveaction_db.action = "%s.%s" % (pack, action) - - key = base.format_metrics_key(liveaction_db=liveaction_db, key=test_key) - - self.assertEquals(key, "st2.%s.%s.%s" % (pack, action, test_key)) - - def test_format_metrics_liveaction_db_without_pack(self): - action = 'lakface' - pack = 'unknown' - - liveaction_db = MagicMock() - liveaction_db.context = {} - liveaction_db.action = "%s.%s" % (pack, action) - - key = base.format_metrics_key(liveaction_db=liveaction_db) - - self.assertEquals(key, "st2.%s.%s" % (pack, action)) - - def test_format_metrics_action_db_without_key(self): - pack = 'test' - action = 'lakface' - - action_db = MagicMock() - action_db.pack = pack - action_db.name = action - - key = base.format_metrics_key(action_db=action_db) - - self.assertEquals(key, "st2.%s.%s" % (pack, action)) - - def test_format_metrics_action_db_with_key(self): - pack = 'test' - action = 'lakface' - test_key = 'meh' - - action_db = MagicMock() - action_db.pack = pack - action_db.name = action - - key = base.format_metrics_key(action_db=action_db, key=test_key) - - self.assertEquals(key, "st2.%s.%s.%s" % (pack, action, test_key)) - - def test_format_metrics_action_db_without_pack(self): - action = 'lakface' - pack = 'unknown' - - action_db = MagicMock() - action_db.pack = None - action_db.name = action - - key = base.format_metrics_key(action_db=action_db) - - self.assertEquals(key, "st2.%s.%s" % (pack, action)) From 1170280e7d3ac55d686f9cb5126ced7b21592863 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 15:45:55 +0200 Subject: [PATCH 32/38] Add a comment. --- st2common/st2common/metrics/base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 08fbd41d8a..9b5b2a500c 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -12,11 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import absolute_import + from functools import wraps import logging -from six import string_types +from six import string_types from oslo_config import cfg from oslo_config.cfg import NoSuchOptError from stevedore.exception import NoMatches, MultipleMatches @@ -34,6 +36,7 @@ 'metrics_initialize', 'get_driver', + 'check_key' ] @@ -44,6 +47,9 @@ LOG = logging.getLogger(__name__) PLUGIN_NAMESPACE = 'st2common.metrics.driver' + +# Stores reference to the metrics driver class instance. +# NOTE: This value is populated lazily on the first get_driver() function call METRICS = None From f4b3ca3be2dd32d580085d713894a15086daecfb Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 15:50:54 +0200 Subject: [PATCH 33/38] Make metric key generation more robust, include prefix after "st2" and before metric name. --- st2common/st2common/config.py | 5 ++-- st2common/st2common/metrics/base.py | 20 +++++--------- .../metrics/drivers/statsd_driver.py | 26 +++++++------------ 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/st2common/st2common/config.py b/st2common/st2common/config.py index 5b7c0c94b7..f977509e80 100644 --- a/st2common/st2common/config.py +++ b/st2common/st2common/config.py @@ -540,8 +540,9 @@ def register_opts(ignore_errors=False): help='Destination port to connect to if driver requires connection.'), cfg.StrOpt( 'prefix', default=None, - help='Prefix for all the metrics name. Comes handy when you want to submit metrics ' - 'from various environment to the same backend instance.') + help='Optional prefix which is prepended to all the metric names. Comes handy when ' + 'you want to submit metrics from various environment to the same metric' + 'backend instance.') ] do_register_opts(metrics_opts, group='metrics', ignore_errors=ignore_errors) diff --git a/st2common/st2common/metrics/base.py b/st2common/st2common/metrics/base.py index 9b5b2a500c..5c822aa351 100644 --- a/st2common/st2common/metrics/base.py +++ b/st2common/st2common/metrics/base.py @@ -18,11 +18,11 @@ from functools import wraps import logging -from six import string_types from oslo_config import cfg from oslo_config.cfg import NoSuchOptError from stevedore.exception import NoMatches, MultipleMatches +from st2common.metrics.utils import check_key from st2common.util.loader import get_plugin_instance from st2common.util.date import get_datetime_utc_now from st2common.exceptions.plugins import PluginLoadError @@ -35,9 +35,7 @@ 'CounterWithTimer', 'metrics_initialize', - 'get_driver', - - 'check_key' + 'get_driver' ] if not hasattr(cfg.CONF, 'metrics'): @@ -95,13 +93,6 @@ def dec_gauge(self, key, amount=1): pass -def check_key(key): - """Ensure key meets requirements. - """ - assert isinstance(key, string_types), "Key not a string. Got %s" % type(key) - assert key, "Key cannot be empty string." - - class Timer(object): """ Timer context manager for easily sending timer statistics. @@ -224,9 +215,11 @@ def wrapper(*args, **kw): def metrics_initialize(): - """Initialize metrics constant + """ + Initialize metrics constant """ global METRICS + try: METRICS = get_plugin_instance(PLUGIN_NAMESPACE, cfg.CONF.metrics.driver) except (NoMatches, MultipleMatches, NoSuchOptError) as error: @@ -236,7 +229,8 @@ def metrics_initialize(): def get_driver(): - """Return metrics driver instance + """ + Return metrics driver instance """ if not METRICS: return metrics_initialize() diff --git a/st2common/st2common/metrics/drivers/statsd_driver.py b/st2common/st2common/metrics/drivers/statsd_driver.py index 0936c6c7e7..a4bce08be0 100644 --- a/st2common/st2common/metrics/drivers/statsd_driver.py +++ b/st2common/st2common/metrics/drivers/statsd_driver.py @@ -16,11 +16,11 @@ from numbers import Number import statsd -import six from oslo_config import cfg from st2common.metrics.base import BaseMetricsDriver -from st2common.metrics.base import check_key +from st2common.metrics.utils import check_key +from st2common.metrics.utils import get_full_key_name __all__ = [ 'StatsdDriver' @@ -34,21 +34,6 @@ class StatsdDriver(BaseMetricsDriver): def __init__(self): statsd.Connection.set_defaults(host=cfg.CONF.metrics.host, port=cfg.CONF.metrics.port) - original_send = statsd.Connection.send - - def send_with_prefix(self, data, *args, **kwargs): - # If defined, include (set) prefix for each metric key (name) - if not cfg.CONF.metrics.prefix: - return original_send(self, data, *args, **kwargs) - - data_with_prefix = {} - for key, value in six.iteritems(data): - key = '%s.%s' % (cfg.CONF.metrics.prefix, key) - data_with_prefix[key] = value - - original_send(self, data_with_prefix, *args, **kwargs) - statsd.Connection.send = send_with_prefix - self._counters = {} self._timer = statsd.Timer('') @@ -59,6 +44,7 @@ def time(self, key, time): check_key(key) assert isinstance(time, Number) + key = get_full_key_name(key) self._timer.send(key, time) def inc_counter(self, key, amount=1): @@ -67,6 +53,8 @@ def inc_counter(self, key, amount=1): """ check_key(key) assert isinstance(amount, Number) + + key = get_full_key_name(key) self._counters[key] = self._counters.get(key, statsd.Counter(key)) self._counters[key] += amount @@ -77,6 +65,7 @@ def dec_counter(self, key, amount=1): check_key(key) assert isinstance(amount, Number) + key = get_full_key_name(key) self._counters[key] = self._counters.get(key, statsd.Counter(key)) self._counters[key] -= amount @@ -87,6 +76,7 @@ def set_gauge(self, key, value): check_key(key) assert isinstance(value, Number) + key = get_full_key_name(key) gauge = statsd.Gauge(key) gauge.send(None, value) @@ -97,6 +87,7 @@ def inc_gauge(self, key, amount=1): check_key(key) assert isinstance(amount, Number) + key = get_full_key_name(key) gauge = statsd.Gauge(key) gauge.increment(None, amount) @@ -107,5 +98,6 @@ def dec_gauge(self, key, amount=1): check_key(key) assert isinstance(amount, Number) + key = get_full_key_name(key) gauge = statsd.Gauge(key) gauge.decrement(None, amount) From 505106e1e54c5ad50b29e80b17438d2a4c467094 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 15:52:23 +0200 Subject: [PATCH 34/38] Update affected code and tests, add new tests. --- st2actions/st2actions/container/base.py | 6 ++--- .../st2common/middleware/instrumentation.py | 12 ++++----- st2common/st2common/services/action.py | 2 +- st2common/st2common/util/action_db.py | 4 +-- st2common/tests/unit/test_metrics.py | 25 +++++++++++++++++-- st2reactor/st2reactor/rules/engine.py | 4 +-- st2reactor/st2reactor/rules/worker.py | 6 ++--- 7 files changed, 40 insertions(+), 19 deletions(-) diff --git a/st2actions/st2actions/container/base.py b/st2actions/st2actions/container/base.py index a40e42ba14..ee231a1a65 100644 --- a/st2actions/st2actions/container/base.py +++ b/st2actions/st2actions/container/base.py @@ -82,7 +82,7 @@ def dispatch(self, liveaction_db): 'in an unsupported status of "%s".' % liveaction_db.status ) - with CounterWithTimer(key="st2.action.executions"): + with CounterWithTimer(key="action.executions"): liveaction_db = funcs[liveaction_db.status](runner) return liveaction_db.result @@ -122,8 +122,8 @@ def _do_run(self, runner): extra = {'runner': runner, 'parameters': resolved_action_params} LOG.debug('Performing run for runner: %s' % (runner.runner_id), extra=extra) - with CounterWithTimer(key=('st2.action.executions')): - with CounterWithTimer(key=('st2.action.%s.executions' % (runner.action.ref))): + with CounterWithTimer(key='action.executions'): + with CounterWithTimer(key='action.%s.executions' % (runner.action.ref)): (status, result, context) = runner.run(action_params) result = jsonify.try_loads(result) diff --git a/st2common/st2common/middleware/instrumentation.py b/st2common/st2common/middleware/instrumentation.py index 241b5f34eb..1def26f9e1 100644 --- a/st2common/st2common/middleware/instrumentation.py +++ b/st2common/st2common/middleware/instrumentation.py @@ -41,18 +41,18 @@ def __call__(self, environ, start_response): metrics_driver = get_driver() - key = 'st2.%s.request.total' % (self._service_name) + key = '%s.request.total' % (self._service_name) metrics_driver.inc_counter(key) - key = 'st2.%s.request.method.%s' % (self._service_name, request.method) + key = '%s.request.method.%s' % (self._service_name, request.method) metrics_driver.inc_counter(key) path = request.path.replace('/', '_') - key = 'st2.%s.request.path.%s' % (self._service_name, path) + key = '%s.request.path.%s' % (self._service_name, path) metrics_driver.inc_counter(key) # Track and time current number of processing requests - key = 'st2.%s.request' % (self._service_name) + key = '%s.request' % (self._service_name) with CounterWithTimer(key=key): return self.app(environ, start_response) @@ -76,8 +76,8 @@ def custom_start_response(status, headers, exc_info=None): status_code = int(status.split(' ')[0]) metrics_driver = get_driver() - metrics_driver.inc_counter('st2.%s.response.status.%s' % (self._service_name, - status_code)) + metrics_driver.inc_counter('%s.response.status.%s' % (self._service_name, + status_code)) return start_response(status, headers, exc_info) diff --git a/st2common/st2common/services/action.py b/st2common/st2common/services/action.py index 3ef09a7e36..02970e09eb 100644 --- a/st2common/st2common/services/action.py +++ b/st2common/st2common/services/action.py @@ -136,7 +136,7 @@ def create_request(liveaction): trace_service.get_trace_component_for_action_execution(execution, liveaction) ]) - get_driver().inc_counter('st2.action.executions.%s' % (liveaction.status)) + get_driver().inc_counter('action.executions.%s' % (liveaction.status)) return liveaction, execution diff --git a/st2common/st2common/util/action_db.py b/st2common/st2common/util/action_db.py index b41aefef77..95c00466d6 100644 --- a/st2common/st2common/util/action_db.py +++ b/st2common/st2common/util/action_db.py @@ -197,12 +197,12 @@ def update_liveaction_status(status=None, result=None, context=None, end_timesta # If liveaction_db status is set then we need to decrement the counter # because it is transitioning to a new state if liveaction_db.status: - get_driver().dec_counter('st2.action.executions.%s' % (liveaction_db.status)) + get_driver().dec_counter('action.executions.%s' % (liveaction_db.status)) # If status is provided then we need to increment the timer because the action # is transitioning into this new state if status: - get_driver().dec_counter('st2.action.executions.%s' % (status)) + get_driver().dec_counter('action.executions.%s' % (status)) extra = {'liveaction_db': liveaction_db} LOG.debug('Updating ActionExection: "%s" with status="%s"', liveaction_db.id, status, diff --git a/st2common/tests/unit/test_metrics.py b/st2common/tests/unit/test_metrics.py index ea3614a9f6..b33f5bdac3 100644 --- a/st2common/tests/unit/test_metrics.py +++ b/st2common/tests/unit/test_metrics.py @@ -20,6 +20,7 @@ from oslo_config import cfg from st2common.metrics import base +from st2common.metrics.utils import get_full_key_name from st2common.metrics.drivers.statsd_driver import StatsdDriver from st2common.util.date import get_datetime_utc_now @@ -57,6 +58,8 @@ class TestStatsDMetricsDriver(unittest2.TestCase): @patch('st2common.metrics.drivers.statsd_driver.statsd') def setUp(self, statsd): + cfg.CONF.set_override(name='prefix', group='metrics', override=None) + self._driver = StatsdDriver() statsd.Connection.set_defaults.assert_called_once_with( @@ -69,14 +72,14 @@ def test_time(self): self._driver._timer = statsd.Timer('') params = ('test', 10) self._driver.time(*params) - statsd.Timer().send.assert_called_with(*params) + statsd.Timer().send.assert_called_with('st2.test', 10) def test_time_with_float(self): statsd = MagicMock() self._driver._timer = statsd.Timer('') params = ('test', 10.5) self._driver.time(*params) - statsd.Timer().send.assert_called_with(*params) + statsd.Timer().send.assert_called_with('st2.test', 10.5) def test_time_with_invalid_key(self): params = (2, 2) @@ -176,6 +179,24 @@ def test_dec_gauge_success(self, statsd): self._driver.dec_gauge(*params) mock_gauge.assert_called_once_with(None, params[1]) + def test_get_full_key_name(self): + # No prefix specified in the config + cfg.CONF.set_override(name='prefix', group='metrics', override=None) + + result = get_full_key_name('api.requests') + self.assertEqual(result, 'st2.api.requests') + + # Prefix is defined in the config + cfg.CONF.set_override(name='prefix', group='metrics', override='staging') + + result = get_full_key_name('api.requests') + self.assertEqual(result, 'st2.staging.api.requests') + + cfg.CONF.set_override(name='prefix', group='metrics', override='prod') + + result = get_full_key_name('api.requests') + self.assertEqual(result, 'st2.prod.api.requests') + class TestCounterContextManager(unittest2.TestCase): @patch('st2common.metrics.base.METRICS') diff --git a/st2reactor/st2reactor/rules/engine.py b/st2reactor/st2reactor/rules/engine.py index dbf98f839d..0731c3a1f3 100644 --- a/st2reactor/st2reactor/rules/engine.py +++ b/st2reactor/st2reactor/rules/engine.py @@ -78,8 +78,8 @@ def create_rule_enforcers(self, trigger_instance, matching_rules): enforcers = [] for matching_rule in matching_rules: - metrics_driver.inc_counter('st2.rule.matched') - metrics_driver.inc_counter('st2.rule.%s.matched' % (matching_rule.ref)) + metrics_driver.inc_counter('rule.matched') + metrics_driver.inc_counter('rule.%s.matched' % (matching_rule.ref)) enforcers.append(RuleEnforcer(trigger_instance, matching_rule)) return enforcers diff --git a/st2reactor/st2reactor/rules/worker.py b/st2reactor/st2reactor/rules/worker.py index a95c50751e..709b239d4a 100644 --- a/st2reactor/st2reactor/rules/worker.py +++ b/st2reactor/st2reactor/rules/worker.py @@ -65,7 +65,7 @@ def process(self, pre_ack_response): if not trigger_instance: raise ValueError('No trigger_instance provided for processing.') - get_driver().inc_counter('st2.trigger.%s.processed' % (trigger_instance.trigger)) + get_driver().inc_counter('trigger.%s.processed' % (trigger_instance.trigger)) try: # Use trace_context from the message and if not found create a new context @@ -86,8 +86,8 @@ def process(self, pre_ack_response): container_utils.update_trigger_instance_status( trigger_instance, trigger_constants.TRIGGER_INSTANCE_PROCESSING) - with CounterWithTimer(key='st2.rule.processed'): - with Timer(key='st2.triggerinstance.%s.processed' % (trigger_instance.id)): + with CounterWithTimer(key='rule.processed'): + with Timer(key='triggerinstance.%s.processed' % (trigger_instance.id)): self.rules_engine.handle_trigger_instance(trigger_instance) container_utils.update_trigger_instance_status( From 4fb318eb483ecf8c54a0a3d85e1862a4d35f987c Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 16:25:57 +0200 Subject: [PATCH 35/38] Add missing module. --- st2common/st2common/metrics/utils.py | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 st2common/st2common/metrics/utils.py diff --git a/st2common/st2common/metrics/utils.py b/st2common/st2common/metrics/utils.py new file mode 100644 index 0000000000..a790fc8d25 --- /dev/null +++ b/st2common/st2common/metrics/utils.py @@ -0,0 +1,45 @@ +# Licensed to the StackStorm, Inc ('StackStorm') under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +from oslo_config import cfg + +__all__ = [ + 'get_full_key_name', + 'check_key' +] + + +def get_full_key_name(key): + """ + Return full metric key name, taking into account optional prefix which can be specified in the + config. + """ + parts = ['st2'] + + if cfg.CONF.metrics.prefix: + parts.append(cfg.CONF.metrics.prefix) + + parts.append(key) + + return '.'.join(parts) + + +def check_key(key): + """ + Ensure key meets requirements. + """ + assert isinstance(key, six.string_types), "Key not a string. Got %s" % type(key) + assert key, "Key cannot be empty string." From d4c3aaf429bcee1a018df9841d28d0c39a513cb5 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 16:45:35 +0200 Subject: [PATCH 36/38] Fix typo. --- st2common/st2common/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/st2common/st2common/config.py b/st2common/st2common/config.py index f977509e80..347e6145ad 100644 --- a/st2common/st2common/config.py +++ b/st2common/st2common/config.py @@ -541,7 +541,7 @@ def register_opts(ignore_errors=False): cfg.StrOpt( 'prefix', default=None, help='Optional prefix which is prepended to all the metric names. Comes handy when ' - 'you want to submit metrics from various environment to the same metric' + 'you want to submit metrics from various environment to the same metric ' 'backend instance.') ] From fb54c2ae4625f1564d9f3a3e0af519f90e16cc8b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 16:45:48 +0200 Subject: [PATCH 37/38] Re-gen sample config. --- conf/st2.conf.sample | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/st2.conf.sample b/conf/st2.conf.sample index 413ce14106..511262c8f5 100644 --- a/conf/st2.conf.sample +++ b/conf/st2.conf.sample @@ -183,6 +183,8 @@ cluster_urls = # comma separated list allowed here. [metrics] # Destination server to connect to if driver requires connection. host = 127.0.0.1 +# Optional prefix which is prepended to all the metric names. Comes handy when you want to submit metrics from various environment to the same metricbackend instance. +prefix = None # Driver type for metrics collection. driver = noop # Destination port to connect to if driver requires connection. From 1f3085283e253463685abcdaa76901a10eab917b Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 22 Aug 2018 16:56:26 +0200 Subject: [PATCH 38/38] Re-generate sample config. --- conf/st2.conf.sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/st2.conf.sample b/conf/st2.conf.sample index 511262c8f5..55ad4e3680 100644 --- a/conf/st2.conf.sample +++ b/conf/st2.conf.sample @@ -183,7 +183,7 @@ cluster_urls = # comma separated list allowed here. [metrics] # Destination server to connect to if driver requires connection. host = 127.0.0.1 -# Optional prefix which is prepended to all the metric names. Comes handy when you want to submit metrics from various environment to the same metricbackend instance. +# Optional prefix which is prepended to all the metric names. Comes handy when you want to submit metrics from various environment to the same metric backend instance. prefix = None # Driver type for metrics collection. driver = noop