From d3b871bf836ed586c49cd5d49f1a66fa94c2e8a7 Mon Sep 17 00:00:00 2001 From: Dave Neuman Date: Wed, 19 Oct 2016 07:25:15 -0600 Subject: [PATCH 1/3] updated server scripted dashboard to add netstat and wrap_count, fixed read/write time to be a derivative --- traffic_stats/grafana/traffic_ops_server.js | 684 ++++++++++++++++++-- 1 file changed, 641 insertions(+), 43 deletions(-) diff --git a/traffic_stats/grafana/traffic_ops_server.js b/traffic_stats/grafana/traffic_ops_server.js index 10ef058065..2178095dc2 100644 --- a/traffic_stats/grafana/traffic_ops_server.js +++ b/traffic_stats/grafana/traffic_ops_server.js @@ -60,8 +60,7 @@ dashboard.refresh = "30s"; { - dashboard.rows.push( - { + dashboard.rows.push( { "height": "250px", "panels": [ { @@ -73,19 +72,7 @@ dashboard.refresh = "30s"; "id": 1, "datasource": "cache_stats", "renderer": "flot", - "x-axis": true, - "y-axis": true, - "y_formats": [ - "bps", - "short" - ], "grid": { - "leftLogBase": 1, - "leftMax": null, - "rightMax": null, - "leftMin": null, - "rightMin": null, - "rightLogBase": 1, "threshold1": null, "threshold2": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", @@ -112,7 +99,9 @@ dashboard.refresh = "30s"; "steppedLine": false, "tooltip": { "value_type": "cumulative", - "shared": true + "shared": true, + "sort": 0, + "msResolution": false }, "timeFrom": null, "timeShift": null, @@ -120,13 +109,65 @@ dashboard.refresh = "30s"; { "measurement": "bandwidth.1min", "tags": {}, - "query": "SELECT mean(value)*1000 FROM \"monthly\".\"bandwidth.1min\" WHERE hostname='" + which + "' and $timeFilter GROUP BY time(60s)", - "rawQuery": true + "query": "SELECT mean(value) FROM \"monthly\".\"bandwidth.1min\" WHERE hostname= '" + which + "' and $timeFilter GROUP BY time(60s)", + "rawQuery": true, + "refId": "A", + "policy": "default", + "dsType": "influxdb", + "resultFormat": "time_series", + "groupBy": [ + { + "type": "time", + "params": [ + "$interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "alias": "bandwidth" } ], "aliasColors": {}, "seriesOverrides": [], - "links": [] + "links": [], + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "Kbits" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + } + ], + "xaxis": { + "show": true + } } ], "title": "Row", @@ -145,18 +186,7 @@ dashboard.refresh = "30s"; "id": 2, "datasource": "cache_stats", "renderer": "flot", - "x-axis": true, - "y-axis": true, - "y_formats": [ - "short" - ], "grid": { - "leftLogBase": 1, - "leftMax": null, - "rightMax": null, - "leftMin": null, - "rightMin": null, - "rightLogBase": 1, "threshold1": null, "threshold2": null, "threshold1Color": "rgba(216, 200, 27, 0.27)", @@ -183,7 +213,9 @@ dashboard.refresh = "30s"; "steppedLine": false, "tooltip": { "value_type": "cumulative", - "shared": true + "shared": true, + "sort": 0, + "msResolution": false }, "timeFrom": null, "timeShift": null, @@ -191,13 +223,64 @@ dashboard.refresh = "30s"; { "measurement": "connections.1min", "tags": {}, - "query": "SELECT mean(value) FROM \"monthly\".\"connections.1min\" WHERE hostname='" + which + "' and $timeFilter GROUP BY time(60s)", - "rawQuery": true + "query": "SELECT mean(value) FROM \"monthly\".\"connections.1min\" WHERE hostname= '" + which + "' and $timeFilter GROUP BY time(60s)", + "rawQuery": true, + "refId": "A", + "policy": "default", + "dsType": "influxdb", + "resultFormat": "time_series", + "groupBy": [ + { + "type": "time", + "params": [ + "$interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "value" + ] + }, + { + "type": "mean", + "params": [] + } + ] + ], + "alias": "connections" } ], "aliasColors": {}, "seriesOverrides": [], - "links": [] + "links": [], + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1 + } + ], + "xaxis": { + "show": true + } } ], "title": "Row", @@ -299,9 +382,46 @@ dashboard.refresh = "30s"; "cpu_user" ] } + ], + [ + { + "type": "field", + "params": [ + "usage_guest" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "cpu_guest" + ] + } + ], + [ + { + "type": "field", + "params": [ + "usage_steal" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "cpu_steal" + ] + } ] ], - "measurement": "cpu" + "measurement": "cpu", + "alias": "$col" } ], "datasource": "telegraf", @@ -355,7 +475,8 @@ dashboard.refresh = "30s"; "tooltip": { "value_type": "individual", "shared": true, - "msResolution": true + "msResolution": true, + "sort": 2 }, "timeFrom": null, "timeShift": null, @@ -418,7 +539,8 @@ dashboard.refresh = "30s"; } ] ], - "measurement": "mem" + "measurement": "mem", + "alias": "$col" } ], "datasource": "telegraf", @@ -472,7 +594,8 @@ dashboard.refresh = "30s"; "tooltip": { "value_type": "individual", "shared": true, - "msResolution": true + "msResolution": true, + "sort": 0 }, "timeFrom": null, "timeShift": null, @@ -579,7 +702,8 @@ dashboard.refresh = "30s"; } ] ], - "measurement": "system" + "measurement": "system", + "alias": "$col" } ], "datasource": "telegraf", @@ -633,7 +757,8 @@ dashboard.refresh = "30s"; "tooltip": { "value_type": "cumulative", "shared": true, - "msResolution": true + "msResolution": true, + "sort": 0 }, "timeFrom": null, "timeShift": null, @@ -685,16 +810,53 @@ dashboard.refresh = "30s"; ] }, { - "type": "mean", + "type": "sum", "params": [] }, + { + "type": "non_negative_derivative", + "params": [ + "10s" + ] + }, { "type": "alias", "params": [ "read_time" ] } - ], + ] + ], + "measurement": "diskio", + "alias": "$col" + }, + { + "refId": "B", + "policy": "default", + "dsType": "influxdb", + "resultFormat": "time_series", + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/" + which + "/" + } + ], + "groupBy": [ + { + "type": "time", + "params": [ + "$interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ [ { "type": "field", @@ -703,9 +865,15 @@ dashboard.refresh = "30s"; ] }, { - "type": "mean", + "type": "sum", "params": [] }, + { + "type": "non_negative_derivative", + "params": [ + "10s" + ] + }, { "type": "alias", "params": [ @@ -714,7 +882,8 @@ dashboard.refresh = "30s"; } ] ], - "measurement": "diskio" + "measurement": "diskio", + "alias": "$col" } ], "datasource": "telegraf", @@ -768,6 +937,435 @@ dashboard.refresh = "30s"; "tooltip": { "value_type": "cumulative", "shared": true, + "msResolution": true, + "sort": 0 + }, + "timeFrom": null, + "timeShift": null, + "aliasColors": {}, + "seriesOverrides": [], + "links": [] + } + ] + }, + { + "title": "Wrap Count and netstat", + "height": "250px", + "editable": true, + "collapse": false, + "panels": [ + { + "title": "wrap count", + "error": false, + "span": 6, + "editable": true, + "type": "graph", + "isNew": true, + "id": 7, + "targets": [ + { + "refId": "A", + "policy": "monthly", + "dsType": "influxdb", + "resultFormat": "time_series", + "tags": [ + { + "key": "hostname", + "operator": "=~", + "value": "/" + which + "/" + } + ], + "groupBy": [ + { + "type": "time", + "params": [ + "$interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "vol1_wrap_count" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "vol1" + ] + } + ], + [ + { + "type": "field", + "params": [ + "vol2_wrap_count" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "vol2" + ] + } + ] + ], + "measurement": "wrap_count.1min", + "alias": "$col" + } + ], + "datasource": "cache_stats", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short" + } + ], + "xaxis": { + "show": true + }, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "lines": true, + "fill": 1, + "linewidth": 2, + "points": false, + "pointradius": 5, + "bars": false, + "stack": false, + "percentage": false, + "legend": { + "show": true, + "values": false, + "min": false, + "max": false, + "current": false, + "total": false, + "avg": false + }, + "nullPointMode": "connected", + "steppedLine": false, + "tooltip": { + "value_type": "cumulative", + "shared": true, + "sort": 0, + "msResolution": true + }, + "timeFrom": null, + "timeShift": null, + "aliasColors": {}, + "seriesOverrides": [], + "links": [] + }, + { + "title": "netstat", + "error": false, + "span": 6, + "editable": true, + "type": "graph", + "isNew": true, + "id": 8, + "targets": [ + { + "refId": "A", + "policy": "default", + "dsType": "influxdb", + "resultFormat": "time_series", + "tags": [ + { + "key": "host", + "operator": "=~", + "value": "/" + which + "/" + } + ], + "groupBy": [ + { + "type": "time", + "params": [ + "$interval" + ] + }, + { + "type": "fill", + "params": [ + "null" + ] + } + ], + "select": [ + [ + { + "type": "field", + "params": [ + "tcp_close" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_close" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_close_wait" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_close_wait" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_established" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_established" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_time_wait" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_time_wait" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_closing" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_closing" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_fin_wait1" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_fin_wait1" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_fin_wait2" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_fin_wait2" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_last_ack" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_last_ack" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_syn_recv" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_syn_recv" + ] + } + ], + [ + { + "type": "field", + "params": [ + "tcp_syn_sent" + ] + }, + { + "type": "mean", + "params": [] + }, + { + "type": "alias", + "params": [ + "tcp_syn_sent" + ] + } + ] + ], + "measurement": "netstat", + "alias": "$col" + } + ], + "datasource": "telegraf", + "renderer": "flot", + "yaxes": [ + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short" + }, + { + "label": null, + "show": true, + "logBase": 1, + "min": null, + "max": null, + "format": "short" + } + ], + "xaxis": { + "show": true + }, + "grid": { + "threshold1": null, + "threshold2": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "lines": true, + "fill": 1, + "linewidth": 2, + "points": false, + "pointradius": 5, + "bars": false, + "stack": false, + "percentage": false, + "legend": { + "show": true, + "values": false, + "min": false, + "max": false, + "current": false, + "total": false, + "avg": false, + "hideEmpty": true, + "hideZero": true + }, + "nullPointMode": "connected", + "steppedLine": false, + "tooltip": { + "value_type": "cumulative", + "shared": true, + "sort": 2, "msResolution": true }, "timeFrom": null, From b5e56a4ba6226d5f19c82c4172075442142d3d11 Mon Sep 17 00:00:00 2001 From: Dave Neuman Date: Wed, 19 Oct 2016 07:28:39 -0600 Subject: [PATCH 2/3] add wrap_count stats to seeds.sql --- traffic_ops/app/db/seeds.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/traffic_ops/app/db/seeds.sql b/traffic_ops/app/db/seeds.sql index e6f90783a6..4f47c4e38d 100644 --- a/traffic_ops/app/db/seeds.sql +++ b/traffic_ops/app/db/seeds.sql @@ -149,6 +149,8 @@ insert ignore into profile (name, description) values ('INFLUXDB', 'InfluxDb pro insert into parameter (name, config_file, value) select * from (select 'CacheStats', 'traffic_stats.config', 'bandwidth') as temp where not exists (select name from parameter where name = 'CacheStats' and config_file = 'traffic_stats.config' and value = 'bandwidth') limit 1; insert into parameter (name, config_file, value) select * from (select 'CacheStats', 'traffic_stats.config', 'maxKbps') as temp where not exists (select name from parameter where name = 'CacheStats' and config_file = 'traffic_stats.config' and value = 'maxKbps') limit 1; insert into parameter (name, config_file, value) select * from (select 'CacheStats', 'traffic_stats.config', 'ats.proxy.process.http.current_client_connections') as temp where not exists (select name from parameter where name = 'CacheStats' and config_file = 'traffic_stats.config' and value = 'ats.proxy.process.http.current_client_connections') limit 1; +insert into parameter (name, config_file, value) select * from (select 'CacheStats', 'traffic_stats.config', 'ats.proxy.process.http.current_client_connections') as temp where not exists (select name from parameter where name = 'CacheStats' and config_file = 'traffic_stats.config' and value = 'ats.proxy.process.cache.volume_1.wrap_count') limit 1; +insert into parameter (name, config_file, value) select * from (select 'CacheStats', 'traffic_stats.config', 'ats.proxy.process.http.current_client_connections') as temp where not exists (select name from parameter where name = 'CacheStats' and config_file = 'traffic_stats.config' and value = 'ats.proxy.process.cache.volume_1.wrap_count') limit 1; insert into parameter (name, config_file, value) select * from (select 'DsStats', 'traffic_stats.config', 'kbps') as temp where not exists (select name from parameter where name = 'DsStats' and config_file = 'traffic_stats.config' and value = 'kbps') limit 1; insert into parameter (name, config_file, value) select * from (select 'DsStats', 'traffic_stats.config', 'tps_2xx') as temp where not exists (select name from parameter where name = 'DsStats' and config_file = 'traffic_stats.config' and value = 'tps_2xx') limit 1; insert into parameter (name, config_file, value) select * from (select 'DsStats', 'traffic_stats.config', 'status_4xx') as temp where not exists (select name from parameter where name = 'DsStats' and config_file = 'traffic_stats.config' and value = 'status_4xx') limit 1; From db4a25dd39a5f5a6ff6435dfc8ebf5d96eccce00 Mon Sep 17 00:00:00 2001 From: Dave Neuman Date: Wed, 19 Oct 2016 07:45:07 -0600 Subject: [PATCH 3/3] create continuous queries for wrap_count --- traffic_stats/influxdb_tools/create_ts_databases.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/traffic_stats/influxdb_tools/create_ts_databases.go b/traffic_stats/influxdb_tools/create_ts_databases.go index 8e02220dc0..e065b30c57 100644 --- a/traffic_stats/influxdb_tools/create_ts_databases.go +++ b/traffic_stats/influxdb_tools/create_ts_databases.go @@ -83,6 +83,8 @@ func createCacheStats(client influx.Client, replication *string) { createContinuousQuery(client, "connections_cdn_type_1min", `CREATE CONTINUOUS QUERY connections_cdn_type_1min ON cache_stats RESAMPLE FOR 5m BEGIN SELECT sum(value) AS "value" INTO "cache_stats"."monthly"."connections.cdn.type.1min" FROM "cache_stats"."monthly"."connections.1min" GROUP BY time(1m), cdn, type END`) createContinuousQuery(client, "maxKbps_1min", `CREATE CONTINUOUS QUERY maxKbps_1min ON cache_stats RESAMPLE FOR 2m BEGIN SELECT mean(value) AS value INTO cache_stats.monthly."maxkbps.1min" FROM cache_stats.daily.maxKbps GROUP BY time(1m), * END`) createContinuousQuery(client, "maxkbps_cdn_1min", `CREATE CONTINUOUS QUERY maxkbps_cdn_1min ON cache_stats RESAMPLE FOR 5m BEGIN SELECT sum(value) AS value INTO cache_stats.monthly."maxkbps.cdn.1min" FROM cache_stats.monthly."maxkbps.1min" GROUP BY time(1m), cdn END`) + createContinuousQuery(client, "wrap_count_vol1_1m", `CREATE CONTINUOUS QUERY wrap_count_vol1_1m ON cache_stats RESAMPLE FOR 2m BEGIN SELECT mean(value) AS vol1_wrap_count INTO cache_stats.monthly."wrap_count.1min" FROM cache_stats.daily."ats.proxy.process.cache.volume_1.wrap_count" GROUP BY time(1m), * END`) + createContinuousQuery(client, "wrap_count_vol2_1m", `CREATE CONTINUOUS QUERY wrap_count_vol2_1m ON cache_stats RESAMPLE FOR 2m BEGIN SELECT mean(value) AS vol2_wrap_count INTO cache_stats.monthly."wrap_count.1min" FROM cache_stats.daily."ats.proxy.process.cache.volume_2.wrap_count" GROUP BY time(1m), * END`) } func createDeliveryServiceStats(client influx.Client, replication *string) {