diff --git a/graphite_beacon/alerts.py b/graphite_beacon/alerts.py index 7e3738b..908153c 100644 --- a/graphite_beacon/alerts.py +++ b/graphite_beacon/alerts.py @@ -125,6 +125,10 @@ def configure(self, name=None, rules=None, query=None, **options): 'request_timeout', self.reactor.options['request_timeout']) self.connect_timeout = options.get( 'connect_timeout', self.reactor.options['connect_timeout']) + self.url_retries = options.get( + 'url_retries', self.reactor.options['url_retries']) + self.url_retry_delay = parse_interval(options.get( + 'url_retry_delay', self.reactor.options['url_retry_delay'])) / 1000 self.history_size = options.get('history_size', self.reactor.options['history_size']) self.history_size = parse_interval(self.history_size) @@ -161,22 +165,29 @@ def stop(self): self.callback.stop() return self - def check(self, records): - """Check current value.""" + def check(self, records, final=True): + """Check current value. Returns true if check succeeded.""" + rvalue = True for value, target in records: LOGGER.info("%s [%s]: %s", self.name, target, value) if value is None: - self.notify(self.no_data, value, target) + if final: + self.notify(self.no_data, value, target) + rvalue = False continue for rule in self.rules: if self.evaluate_rule(rule, value, target): - self.notify(rule['level'], value, target, rule=rule) + if final: + self.notify(rule['level'], value, target, rule=rule) + rvalue = False break else: self.notify('normal', value, target, rule=rule) self.history[target].append(value) + return rvalue + def evaluate_rule(self, rule, value, target): """Calculate the value.""" def evaluate(expr): @@ -313,16 +324,29 @@ def load(self): self.notify('warning', 'Process takes too much time', target='waiting', ntype='common') else: self.waiting = True - try: - response = yield self.client.fetch( - self.query, method=self.options.get('method', 'GET'), - request_timeout=self.request_timeout, - connect_timeout=self.connect_timeout, - validate_cert=self.options.get('validate_cert', True)) - self.check([(self.get_data(response), self.query)]) - self.notify('normal', 'Metrics are loaded', target='loading', ntype='common') - - except Exception as e: - self.notify('critical', str(e), target='loading', ntype='common') + retries_remaining = self.url_retries + while True: + final = retries_remaining <= 0 + retries_remaining -= 1 + try: + response = yield self.client.fetch( + self.query, method=self.options.get('method', 'GET'), + request_timeout=self.request_timeout, + connect_timeout=self.connect_timeout, + validate_cert=self.options.get('validate_cert', True)) + if self.check([(self.get_data(response), self.query)], final): + self.notify('normal', 'Metrics are loaded', target='loading', ntype='common') + break + elif final: + break + elif self.url_retry_delay > 0: + yield gen.sleep(self.url_retry_delay) + + except Exception as e: + if final: + self.notify('critical', str(e), target='loading', ntype='common') + break + elif self.url_retry_delay > 0: + yield gen.sleep(self.url_retry_delay) self.waiting = False diff --git a/graphite_beacon/core.py b/graphite_beacon/core.py index afd1152..6ceff28 100644 --- a/graphite_beacon/core.py +++ b/graphite_beacon/core.py @@ -50,7 +50,9 @@ class Reactor(object): 'default_nan_value': 0, 'ignore_nan': False, 'loading_error': 'critical', - 'alerts': [] + 'alerts': [], + 'url_retries': 0, + 'url_retry_delay': 0 } def __init__(self, **options): diff --git a/graphite_beacon/templates/graphite/message.html b/graphite_beacon/templates/graphite/message.html index 5e4e07e..da9d2b5 100644 --- a/graphite_beacon/templates/graphite/message.html +++ b/graphite_beacon/templates/graphite/message.html @@ -49,7 +49,7 @@ Tested value: {{ alert.convert(alert.get_value_for_expr(expr, target)) }}
{% end %} {% end %} - + Errors : {{ alert.options.get('errorstr') }}