From fcf80fd16b473d8c079b1b715afe0e35071b1777 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 16 Dec 2024 12:05:58 -0300 Subject: [PATCH 1/9] refactor: rename ethereum_metrics to prometheus_metrics --- .../telemetry_api/{ethereum_metrics.ex => prometheus_metrics.ex} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename telemetry_api/lib/telemetry_api/{ethereum_metrics.ex => prometheus_metrics.ex} (100%) diff --git a/telemetry_api/lib/telemetry_api/ethereum_metrics.ex b/telemetry_api/lib/telemetry_api/prometheus_metrics.ex similarity index 100% rename from telemetry_api/lib/telemetry_api/ethereum_metrics.ex rename to telemetry_api/lib/telemetry_api/prometheus_metrics.ex From 3a33fa419a6f4ab25ab0bc71f8964d7c36e6c620 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 16 Dec 2024 13:52:30 -0300 Subject: [PATCH 2/9] feat: add operator missing tasks panel --- telemetry_api/lib/telemetry_api/periodically.ex | 4 ++-- telemetry_api/lib/telemetry_api/prometheus_metrics.ex | 10 +++++++++- telemetry_api/lib/telemetry_api/traces.ex | 9 +++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/telemetry_api/lib/telemetry_api/periodically.ex b/telemetry_api/lib/telemetry_api/periodically.ex index 6aae119823..81bd8240ee 100644 --- a/telemetry_api/lib/telemetry_api/periodically.ex +++ b/telemetry_api/lib/telemetry_api/periodically.ex @@ -1,7 +1,7 @@ defmodule TelemetryApi.Periodically do use GenServer alias TelemetryApi.Operators - alias TelemetryApi.EthereumMetrics + alias TelemetryApi.PrometheusMetrics alias TelemetryApi.ContractManagers.RegistryCoordinatorManager require Logger @@ -45,7 +45,7 @@ defmodule TelemetryApi.Periodically do def handle_info(:gas_price, _state) do case Ethers.current_gas_price() do {:ok, gas_price} -> - EthereumMetrics.new_gas_price(gas_price) + PrometheusMetrics.new_gas_price(gas_price) {:error, error} -> IO.inspect("Error fetching gas price: #{error}") diff --git a/telemetry_api/lib/telemetry_api/prometheus_metrics.ex b/telemetry_api/lib/telemetry_api/prometheus_metrics.ex index 2db78d6d18..97b69e3c76 100644 --- a/telemetry_api/lib/telemetry_api/prometheus_metrics.ex +++ b/telemetry_api/lib/telemetry_api/prometheus_metrics.ex @@ -1,7 +1,8 @@ -defmodule TelemetryApi.EthereumMetrics do +defmodule TelemetryApi.PrometheusMetrics do use Prometheus.Metric @gauge [name: :gas_price, help: "Ethereum Gas Price.", labels: []] + @counter [name: :missing_operator_count, help: "Missing Operators", labels: [:operator]] def new_gas_price(gas_price) do Gauge.set( @@ -9,4 +10,11 @@ defmodule TelemetryApi.EthereumMetrics do gas_price ) end + + def missing_operator(operator) do + Counter.inc( + name: :missing_operator_count, + labels: [operator] + ) + end end diff --git a/telemetry_api/lib/telemetry_api/traces.ex b/telemetry_api/lib/telemetry_api/traces.ex index e611f386a0..d8480eed88 100644 --- a/telemetry_api/lib/telemetry_api/traces.ex +++ b/telemetry_api/lib/telemetry_api/traces.ex @@ -5,6 +5,7 @@ defmodule TelemetryApi.Traces do alias TelemetryApi.Traces.Trace alias TelemetryApi.Operators alias TelemetryApi.ContractManagers.StakeRegistry + alias TelemetryApi.PrometheusMetrics require OpenTelemetry.Tracer require OpenTelemetry.Ctx @@ -208,7 +209,6 @@ defmodule TelemetryApi.Traces do :ok end end - @doc """ Registers the sending of a batcher task to Ethereum in the task trace. @@ -298,7 +298,7 @@ defmodule TelemetryApi.Traces do :ok end end - + @doc """ Registers a bump in the gas price when the aggregator tries to respond to a task in the task trace. @@ -368,6 +368,11 @@ defmodule TelemetryApi.Traces do defp add_missing_operators([]), do: :ok defp add_missing_operators(missing_operators) do + # Send to prometheus + missing_operators + |> Enum.map(fn o -> PrometheusMetrics.missing_operator(o.name) end) + |> Enum.join(";") + missing_operators = missing_operators |> Enum.map(fn o -> o.name end) |> Enum.join(";") From 3da81f3f7bdea3cf6d97ae608c26319f7e18e084 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 16 Dec 2024 14:04:01 -0300 Subject: [PATCH 3/9] feat: update dashboard --- .../aligned/aggregator_batcher.json | 76 ++++++++++++++++++- telemetry_api/lib/telemetry_api/traces.ex | 1 - 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json index e11a694a3a..ec386847bd 100644 --- a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json +++ b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json @@ -2721,6 +2721,78 @@ ], "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 49, + "options": { + "displayMode": "gradient", + "minVizHeight": 13, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "valueMode": "color" + }, + "pluginVersion": "10.1.10", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "floor(increase(missing_operator_count{job=\"aligned-tracker\"}[$__range]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{operator}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "# Operator Missing Tasks", + "transformations": [], + "type": "bargauge" + }, { "datasource": { "type": "prometheus", @@ -3176,13 +3248,13 @@ "list": [] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "System Data", "uid": "aggregator", - "version": 7, + "version": 4, "weekStart": "" } diff --git a/telemetry_api/lib/telemetry_api/traces.ex b/telemetry_api/lib/telemetry_api/traces.ex index d8480eed88..b1e4f138fe 100644 --- a/telemetry_api/lib/telemetry_api/traces.ex +++ b/telemetry_api/lib/telemetry_api/traces.ex @@ -371,7 +371,6 @@ defmodule TelemetryApi.Traces do # Send to prometheus missing_operators |> Enum.map(fn o -> PrometheusMetrics.missing_operator(o.name) end) - |> Enum.join(";") missing_operators = missing_operators |> Enum.map(fn o -> o.name end) |> Enum.join(";") From 4a3c093a8567df9bcea405b9646470248bec9ab0 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 16 Dec 2024 14:05:18 -0300 Subject: [PATCH 4/9] fix: dashboard --- grafana/provisioning/dashboards/aligned/aggregator_batcher.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json index ec386847bd..8d99dfba66 100644 --- a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json +++ b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json @@ -3248,7 +3248,7 @@ "list": [] }, "time": { - "from": "now-30m", + "from": "now-5m", "to": "now" }, "timepicker": {}, From b463b7580aeaadf5820ce219a5d698a77f5170be Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 16 Dec 2024 14:48:45 -0300 Subject: [PATCH 5/9] fix: add total field --- .../dashboards/aligned/aggregator_batcher.json | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json index 8d99dfba66..c782a25f5b 100644 --- a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json +++ b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json @@ -2732,6 +2732,7 @@ "mode": "continuous-GrYlRd" }, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ @@ -2768,6 +2769,7 @@ "values": false }, "showUnfilled": true, + "text": {}, "valueMode": "color" }, "pluginVersion": "10.1.10", @@ -2779,10 +2781,13 @@ }, "disableTextWrap": false, "editorMode": "code", + "exemplar": false, "expr": "floor(increase(missing_operator_count{job=\"aligned-tracker\"}[$__range]))", + "format": "time_series", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, + "interval": "1", "legendFormat": "{{operator}}", "range": true, "refId": "A", @@ -2790,7 +2795,12 @@ } ], "title": "# Operator Missing Tasks", - "transformations": [], + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], "type": "bargauge" }, { @@ -3255,6 +3265,6 @@ "timezone": "browser", "title": "System Data", "uid": "aggregator", - "version": 4, + "version": 7, "weekStart": "" } From d8c1276e5c8999d78c59d168533489f9b3623496 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Wed, 18 Dec 2024 16:48:34 -0300 Subject: [PATCH 6/9] fix: initialize operator metrics --- .../lib/telemetry_api/prometheus_metrics.ex | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/telemetry_api/lib/telemetry_api/prometheus_metrics.ex b/telemetry_api/lib/telemetry_api/prometheus_metrics.ex index 97b69e3c76..877a0f5c3d 100644 --- a/telemetry_api/lib/telemetry_api/prometheus_metrics.ex +++ b/telemetry_api/lib/telemetry_api/prometheus_metrics.ex @@ -17,4 +17,19 @@ defmodule TelemetryApi.PrometheusMetrics do labels: [operator] ) end + + def initialize_operator_metrics(operator) do + value = + Counter.value( + name: :missing_operator_count, + labels: [operator] + ) + + if value == :undefined do + Counter.inc( + [name: :missing_operator_count, labels: [operator]], + 0 + ) + end + end end From 6db2f819bdc3454f64222e30d5645637472a6725 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Wed, 18 Dec 2024 16:50:53 -0300 Subject: [PATCH 7/9] fix: call initialize from periodically --- telemetry_api/lib/telemetry_api/operators.ex | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/telemetry_api/lib/telemetry_api/operators.ex b/telemetry_api/lib/telemetry_api/operators.ex index c3b2864f9f..6113ef88c3 100644 --- a/telemetry_api/lib/telemetry_api/operators.ex +++ b/telemetry_api/lib/telemetry_api/operators.ex @@ -9,6 +9,7 @@ defmodule TelemetryApi.Operators do alias TelemetryApi.Operators.Operator alias TelemetryApi.ContractManagers.OperatorStateRetriever alias TelemetryApi.ContractManagers.DelegationManager + alias TelemetryApi.PrometheusMetrics @doc """ Returns the list of operators. @@ -95,6 +96,16 @@ defmodule TelemetryApi.Operators do |> Enum.filter(fn {status, _} -> status == :ok end) |> Enum.map(fn {_, data} -> data end) + # Initialize new_operators metrics + Enum.map(new_operators, fn {_, op_data} -> + PrometheusMetrics.initialize_operator_metrics(op_data.name) + end) + + # If the server was restarted, initialize old_operators metrics + Enum.map(old_operators, fn {op, _} -> + PrometheusMetrics.initialize_operator_metrics(op.name) + end) + # Merge both lists operators = (new_operators ++ old_operators) From a64fac346ed3369f4dda14f0dd7e35e2266ac326 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Thu, 19 Dec 2024 19:31:34 -0300 Subject: [PATCH 8/9] feat: add addresses to missing operator names --- telemetry_api/lib/telemetry_api/operators.ex | 6 ++++-- telemetry_api/lib/telemetry_api/traces.ex | 9 +++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/telemetry_api/lib/telemetry_api/operators.ex b/telemetry_api/lib/telemetry_api/operators.ex index 6113ef88c3..21ea013b22 100644 --- a/telemetry_api/lib/telemetry_api/operators.ex +++ b/telemetry_api/lib/telemetry_api/operators.ex @@ -98,12 +98,14 @@ defmodule TelemetryApi.Operators do # Initialize new_operators metrics Enum.map(new_operators, fn {_, op_data} -> - PrometheusMetrics.initialize_operator_metrics(op_data.name) + op_name_address = op_data.name <> " - " <> String.slice(op_data.address, 0..7) + PrometheusMetrics.initialize_operator_metrics(op_name_address) end) # If the server was restarted, initialize old_operators metrics Enum.map(old_operators, fn {op, _} -> - PrometheusMetrics.initialize_operator_metrics(op.name) + op_name_address = op.name <> " - " <> String.slice(op.address, 0..7) + PrometheusMetrics.initialize_operator_metrics(op_name_address) end) # Merge both lists diff --git a/telemetry_api/lib/telemetry_api/traces.ex b/telemetry_api/lib/telemetry_api/traces.ex index b1e4f138fe..d2937991b9 100644 --- a/telemetry_api/lib/telemetry_api/traces.ex +++ b/telemetry_api/lib/telemetry_api/traces.ex @@ -368,12 +368,17 @@ defmodule TelemetryApi.Traces do defp add_missing_operators([]), do: :ok defp add_missing_operators(missing_operators) do + # Concatenate name + address + missing_operators = + missing_operators + |> Enum.map(fn op -> op.name <> " - " <> String.slice(op.address, 0..7) end) + # Send to prometheus missing_operators - |> Enum.map(fn o -> PrometheusMetrics.missing_operator(o.name) end) + |> Enum.map(fn o -> PrometheusMetrics.missing_operator(o) end) missing_operators = - missing_operators |> Enum.map(fn o -> o.name end) |> Enum.join(";") + missing_operators |> Enum.join(";") Tracer.add_event("Missing Operators", [{:operators, missing_operators}]) end From 15a53ae9ef60afd1e8d5124d485bde5ceb08f804 Mon Sep 17 00:00:00 2001 From: avilagaston9 Date: Mon, 23 Dec 2024 12:57:06 -0300 Subject: [PATCH 9/9] refactor: use table + bar display instead of bar gauge --- .../aligned/aggregator_batcher.json | 71 ++++++++++++++----- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json index c782a25f5b..d77269e749 100644 --- a/grafana/provisioning/dashboards/aligned/aggregator_batcher.json +++ b/grafana/provisioning/dashboards/aligned/aggregator_batcher.json @@ -2731,6 +2731,13 @@ "color": { "mode": "continuous-GrYlRd" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "noValue": "none", "thresholds": { @@ -2747,7 +2754,22 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "gauge" + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -2757,20 +2779,16 @@ }, "id": 49, "options": { - "displayMode": "gradient", - "minVizHeight": 13, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false }, - "showUnfilled": true, - "text": {}, - "valueMode": "color" + "showHeader": false }, "pluginVersion": "10.1.10", "targets": [ @@ -2797,11 +2815,28 @@ "title": "# Operator Missing Tasks", "transformations": [ { - "id": "calculateField", - "options": {} + "id": "reduce", + "options": { + "labelsToFields": false, + "reducers": [ + "max" + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Max" + } + ] + } } ], - "type": "bargauge" + "type": "table" }, { "datasource": { @@ -3258,13 +3293,13 @@ "list": [] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "System Data", "uid": "aggregator", - "version": 7, + "version": 1, "weekStart": "" }