diff --git a/README.md b/README.md index ca36932..f35341b 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ name | Pgpool-II Version | Description :---|:---|:--- pgpool2_frontend_total | 3.6+ | Number of total child processes pgpool2_frontend_used | 3.6+ | Number of used child processes +pgpool2_frontend_used_ratio | 3.6+ | Ratio of used child processes to total child processes (0.0 to 1.0) pgpool2_pool_nodes_status | 3.6+ | Backend node Status (1 for up or waiting, 0 for down or unused) pgpool2_pool_nodes_replication_delay | 3.6+ | Replication delay pgpool2_pool_nodes_select_cnt | 3.6+ | SELECT query counts issued to each backend diff --git a/pgpool2_exporter.go b/pgpool2_exporter.go index eb757a9..0f5e5a4 100755 --- a/pgpool2_exporter.go +++ b/pgpool2_exporter.go @@ -23,22 +23,21 @@ SOFTWARE. package main import ( + "database/sql" + "errors" "fmt" + "math" "net/http" "net/url" "os" - "database/sql" - "errors" - "math" "regexp" "strconv" "sync" "time" - + "github.com/blang/semver" "github.com/go-kit/kit/log/level" _ "github.com/lib/pq" - "github.com/blang/semver" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/promlog" @@ -48,14 +47,14 @@ import ( ) var ( - listenAddress = kingpin.Flag("web.listen-address", "Address on which to expose metrics and web interface.").Default(":9719").String() - metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").String() - logger = promlog.New(&promlog.Config{}) + listenAddress = kingpin.Flag("web.listen-address", "Address on which to expose metrics and web interface.").Default(":9719").String() + metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").String() + logger = promlog.New(&promlog.Config{}) ) const ( - namespace = "pgpool2" - exporter = "exporter" + namespace = "pgpool2" + exporter = "exporter" landingPage = ` @@ -153,69 +152,69 @@ type ColumnMapping struct { // Exporter collects Pgpool-II stats from the given server and exports // them using the prometheus metrics package. type Exporter struct { - dsn string - namespace string - mutex sync.RWMutex - duration prometheus.Gauge - up prometheus.Gauge - error prometheus.Gauge - totalScrapes prometheus.Counter - metricMap map[string]MetricMapNamespace - db *sql.DB + dsn string + namespace string + mutex sync.RWMutex + duration prometheus.Gauge + up prometheus.Gauge + error prometheus.Gauge + totalScrapes prometheus.Counter + metricMap map[string]MetricMapNamespace + db *sql.DB } var ( metricMaps = map[string]map[string]ColumnMapping{ "pool_nodes": { - "hostname": {LABEL, "Backend hostname"}, - "port": {LABEL, "Backend port"}, - "role": {LABEL, "Role (primary or standby)"}, - "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, - "select_cnt": {GAUGE, "SELECT statement counts issued to each backend"}, - "replication_delay": {GAUGE, "Replication delay"}, + "hostname": {LABEL, "Backend hostname"}, + "port": {LABEL, "Backend port"}, + "role": {LABEL, "Role (primary or standby)"}, + "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, + "select_cnt": {GAUGE, "SELECT statement counts issued to each backend"}, + "replication_delay": {GAUGE, "Replication delay"}, }, "pool_backend_stats": { - "hostname": {LABEL, "Backend hostname"}, - "port": {LABEL, "Backend port"}, - "role": {LABEL, "Role (primary or standby)"}, - "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, - "select_cnt": {GAUGE, "SELECT statement counts issued to each backend"}, - "insert_cnt": {GAUGE, "INSERT statement counts issued to each backend"}, - "update_cnt": {GAUGE, "UPDATE statement counts issued to each backend"}, - "delete_cnt": {GAUGE, "DELETE statement counts issued to each backend"}, - "ddl_cnt": {GAUGE, "DDL statement counts issued to each backend"}, - "other_cnt": {GAUGE, "other statement counts issued to each backend"}, - "panic_cnt": {GAUGE, "Panic message counts returned from backend"}, - "fatal_cnt": {GAUGE, "Fatal message counts returned from backend)"}, - "error_cnt": {GAUGE, "Error message counts returned from backend"}, + "hostname": {LABEL, "Backend hostname"}, + "port": {LABEL, "Backend port"}, + "role": {LABEL, "Role (primary or standby)"}, + "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, + "select_cnt": {GAUGE, "SELECT statement counts issued to each backend"}, + "insert_cnt": {GAUGE, "INSERT statement counts issued to each backend"}, + "update_cnt": {GAUGE, "UPDATE statement counts issued to each backend"}, + "delete_cnt": {GAUGE, "DELETE statement counts issued to each backend"}, + "ddl_cnt": {GAUGE, "DDL statement counts issued to each backend"}, + "other_cnt": {GAUGE, "other statement counts issued to each backend"}, + "panic_cnt": {GAUGE, "Panic message counts returned from backend"}, + "fatal_cnt": {GAUGE, "Fatal message counts returned from backend)"}, + "error_cnt": {GAUGE, "Error message counts returned from backend"}, }, "pool_health_check_stats": { - "hostname": {LABEL, "Backend hostname"}, - "port": {LABEL, "Backend port"}, - "role": {LABEL, "Role (primary or standby)"}, - "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, - "total_count": {GAUGE, "Number of health check count in total"}, - "success_count": {GAUGE, "Number of successful health check count in total"}, - "fail_count": {GAUGE, "Number of failed health check count in total"}, - "skip_count": {GAUGE, "Number of skipped health check count in total"}, - "retry_count": {GAUGE, "Number of retried health check count in total"}, - "average_retry_count": {GAUGE, "Number of average retried health check count in a health check session"}, - "max_retry_count": {GAUGE, "Number of maximum retried health check count in a health check session"}, - "max_duration": {GAUGE, "Maximum health check duration in Millie seconds"}, - "min_duration": {GAUGE, "Minimum health check duration in Millie seconds"}, - "average_duration": {GAUGE, "Average health check duration in Millie seconds"}, + "hostname": {LABEL, "Backend hostname"}, + "port": {LABEL, "Backend port"}, + "role": {LABEL, "Role (primary or standby)"}, + "status": {GAUGE, "Backend node Status (1 for up or waiting, 0 for down or unused)"}, + "total_count": {GAUGE, "Number of health check count in total"}, + "success_count": {GAUGE, "Number of successful health check count in total"}, + "fail_count": {GAUGE, "Number of failed health check count in total"}, + "skip_count": {GAUGE, "Number of skipped health check count in total"}, + "retry_count": {GAUGE, "Number of retried health check count in total"}, + "average_retry_count": {GAUGE, "Number of average retried health check count in a health check session"}, + "max_retry_count": {GAUGE, "Number of maximum retried health check count in a health check session"}, + "max_duration": {GAUGE, "Maximum health check duration in Millie seconds"}, + "min_duration": {GAUGE, "Minimum health check duration in Millie seconds"}, + "average_duration": {GAUGE, "Average health check duration in Millie seconds"}, }, "pool_processes": { - "pool_pid": {DISCARD, "PID of Pgpool-II child processes"}, - "database": {DISCARD, "Database name of the currently active backend connection"}, + "pool_pid": {DISCARD, "PID of Pgpool-II child processes"}, + "database": {DISCARD, "Database name of the currently active backend connection"}, }, "pool_cache": { - "cache_hit_ratio": {GAUGE, "Query cache hit ratio"}, - "num_hash_entries": {GAUGE, "Number of total hash entries"}, - "used_hash_entries": {GAUGE, "Number of used hash entries"}, - "num_cache_entries": {GAUGE, "Number of used cache entries"}, - "used_cache_entries_size": {GAUGE, "Total size of used cache size"}, - "free_cache_entries_size": {GAUGE, "Total size of free cache size"}, + "cache_hit_ratio": {GAUGE, "Query cache hit ratio"}, + "num_hash_entries": {GAUGE, "Number of total hash entries"}, + "used_hash_entries": {GAUGE, "Number of used hash entries"}, + "num_cache_entries": {GAUGE, "Number of used cache entries"}, + "used_cache_entries_size": {GAUGE, "Total size of used cache size"}, + "free_cache_entries_size": {GAUGE, "Total size of free cache size"}, }, } ) @@ -300,6 +299,7 @@ func queryNamespaceMapping(ch chan<- prometheus.Metric, db *sql.DB, namespace st // Read from the result of "SHOW pool_processes" if namespace == "pool_processes" { + frontendByUserDb := make(map[string]map[string]int) var frontend_total float64 var frontend_used float64 @@ -310,12 +310,37 @@ func queryNamespaceMapping(ch chan<- prometheus.Metric, db *sql.DB, namespace st } frontend_total++ // Loop over column names to find currently connected backend database + var valueDatabase string + var valueUsername string for idx, columnName := range columnNames { - if columnName == "database" { - if valueDatabase, _ := dbToString(columnData[idx]); len(valueDatabase) != 0 { - frontend_used++ - } + switch columnName { + case "database": + valueDatabase, _ = dbToString(columnData[idx]) + case "username": + valueUsername, _ = dbToString(columnData[idx]) + } + } + if len(valueDatabase) > 0 && len(valueUsername) > 0 { + frontend_used++ + dbCount, ok := frontendByUserDb[valueUsername] + if !ok { + dbCount = map[string]int{valueDatabase: 0} } + dbCount[valueDatabase]++ + frontendByUserDb[valueUsername] = dbCount + } + } + + variableLabels := []string{"username", "database"} + for userName, dbs := range frontendByUserDb { + for dbName, count := range dbs { + labels := []string{userName, dbName} + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc(prometheus.BuildFQName("pgpool2", "", "frontend_used"), "Number of used child processes", variableLabels, nil), + prometheus.GaugeValue, + float64(count), + labels..., + ) } } @@ -326,9 +351,9 @@ func queryNamespaceMapping(ch chan<- prometheus.Metric, db *sql.DB, namespace st frontend_total, ) ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc(prometheus.BuildFQName("pgpool2", "", "frontend_used"), "Number of used child processes", nil, nil), + prometheus.NewDesc(prometheus.BuildFQName("pgpool2", "", "frontend_used_ratio"), "Ratio of child processes to total processes", nil, nil), prometheus.GaugeValue, - frontend_used, + frontend_used/frontend_total, ) return nonfatalErrors, nil @@ -473,9 +498,9 @@ func dbToString(t interface{}) (string, bool) { } // Convert bool to int. -func parseStatusField(value string) (float64) { +func parseStatusField(value string) float64 { switch value { - case "true", "up", "waiting": + case "true", "up", "waiting": return 1.0 case "false", "unused", "down": return 0.0