From 5d2f34c4ee73033946fe84150d7091d638eb8115 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 11 Feb 2024 11:15:07 +0800 Subject: [PATCH 01/10] Signed-off-by: Pamela Mei i540369 MOD: surface filesystem device error #2918 --- collector/filesystem_common.go | 18 +++++++++--------- collector/filesystem_linux.go | 3 +++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index f5dde59ab7..7bbcf58859 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -60,7 +60,7 @@ var ( "Regexp of filesystem types to ignore for filesystem collector.", ).Hidden().String() - filesystemLabelNames = []string{"device", "mountpoint", "fstype"} + filesystemLabelNames = []string{"device", "mountpoint", "fstype", "device_error"} ) type filesystemCollector struct { @@ -73,7 +73,7 @@ type filesystemCollector struct { } type filesystemLabels struct { - device, mountPoint, fsType, options string + device, mountPoint, fsType, options, device_error string } type filesystemStats struct { @@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.deviceErrorDesc, prometheus.GaugeValue, - s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.roDesc, prometheus.GaugeValue, - s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) if s.deviceError > 0 { @@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, - s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.freeDesc, prometheus.GaugeValue, - s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.availDesc, prometheus.GaugeValue, - s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.filesDesc, prometheus.GaugeValue, - s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.filesFreeDesc, prometheus.GaugeValue, - s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) } return nil diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 6e7623e203..d9d5c2bdc4 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,6 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { + labels.device_error = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -133,6 +134,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta stuckMountsMtx.Unlock() if err != nil { + labels.device_error = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) return filesystemStats{ labels: labels, @@ -211,6 +213,7 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { mountPoint: rootfsStripPrefix(parts[1]), fsType: parts[2], options: parts[3], + device_error: "", }) } From 3c969c40a4f0ff7ce0ffee588c907b122dcb9487 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 11 Feb 2024 12:12:42 +0800 Subject: [PATCH 02/10] MOD: filesystem_linux.go due to go version difference 1.22.0&1.21.7 to pass circleci gofmt check Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index d9d5c2bdc4..bb4496cada 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,7 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { - labels.device_error = "mountpoint timeout" + labels.device_error = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -209,11 +209,11 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) filesystems = append(filesystems, filesystemLabels{ - device: parts[0], - mountPoint: rootfsStripPrefix(parts[1]), - fsType: parts[2], - options: parts[3], - device_error: "", + device: parts[0], + mountPoint: rootfsStripPrefix(parts[1]), + fsType: parts[2], + options: parts[3], + device_error: "", }) } From 4d72dbdd2d45576fe88e5df9c1cd5713b197d6f0 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 16:51:55 +0800 Subject: [PATCH 03/10] MOD: update variable naming Signed-off-by: Pamela Mei i540369 --- collector/filesystem_common.go | 16 ++++++++-------- collector/filesystem_linux.go | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index 7bbcf58859..f5d5135241 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -73,7 +73,7 @@ type filesystemCollector struct { } type filesystemLabels struct { - device, mountPoint, fsType, options, device_error string + device, mountPoint, fsType, options, deviceError string } type filesystemStats struct { @@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.deviceErrorDesc, prometheus.GaugeValue, - s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.roDesc, prometheus.GaugeValue, - s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) if s.deviceError > 0 { @@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, - s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.freeDesc, prometheus.GaugeValue, - s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.availDesc, prometheus.GaugeValue, - s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesDesc, prometheus.GaugeValue, - s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesFreeDesc, prometheus.GaugeValue, - s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) } return nil diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index bb4496cada..5911d754ec 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,7 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { - labels.device_error = "mountpoint timeout" + labels.deviceError = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -134,7 +134,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta stuckMountsMtx.Unlock() if err != nil { - labels.device_error = err.Error() + labels.deviceError = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) return filesystemStats{ labels: labels, @@ -213,7 +213,7 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { mountPoint: rootfsStripPrefix(parts[1]), fsType: parts[2], options: parts[3], - device_error: "", + deviceError: "", }) } From 4cdaf46b49278edaf15bbbc5e9a30cfe43b38f09 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 11 Feb 2024 11:15:07 +0800 Subject: [PATCH 04/10] Signed-off-by: Pamela Mei i540369 MOD: surface filesystem device error #2918 --- collector/filesystem_common.go | 18 +++++++++--------- collector/filesystem_linux.go | 3 +++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index f5dde59ab7..7bbcf58859 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -60,7 +60,7 @@ var ( "Regexp of filesystem types to ignore for filesystem collector.", ).Hidden().String() - filesystemLabelNames = []string{"device", "mountpoint", "fstype"} + filesystemLabelNames = []string{"device", "mountpoint", "fstype", "device_error"} ) type filesystemCollector struct { @@ -73,7 +73,7 @@ type filesystemCollector struct { } type filesystemLabels struct { - device, mountPoint, fsType, options string + device, mountPoint, fsType, options, device_error string } type filesystemStats struct { @@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.deviceErrorDesc, prometheus.GaugeValue, - s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.roDesc, prometheus.GaugeValue, - s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) if s.deviceError > 0 { @@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, - s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.freeDesc, prometheus.GaugeValue, - s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.availDesc, prometheus.GaugeValue, - s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.filesDesc, prometheus.GaugeValue, - s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) ch <- prometheus.MustNewConstMetric( c.filesFreeDesc, prometheus.GaugeValue, - s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, ) } return nil diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 2251cc4b8f..fe3f34d835 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,6 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { + labels.device_error = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -125,6 +126,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta close(success) if err != nil { + labels.device_error = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) return filesystemStats{ labels: labels, @@ -215,6 +217,7 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { mountPoint: rootfsStripPrefix(parts[1]), fsType: parts[2], options: parts[3], + device_error: "", }) } From c0cceef4332cbda7d808e04c971af77ccaea616b Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 11 Feb 2024 12:12:42 +0800 Subject: [PATCH 05/10] MOD: filesystem_linux.go due to go version difference 1.22.0&1.21.7 to pass circleci gofmt check Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index fe3f34d835..c388e18b28 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,7 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { - labels.device_error = "mountpoint timeout" + labels.device_error = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -213,11 +213,11 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) filesystems = append(filesystems, filesystemLabels{ - device: parts[0], - mountPoint: rootfsStripPrefix(parts[1]), - fsType: parts[2], - options: parts[3], - device_error: "", + device: parts[0], + mountPoint: rootfsStripPrefix(parts[1]), + fsType: parts[2], + options: parts[3], + device_error: "", }) } From d2b9ad84ef13e6796ab53e829c5c8f5fb030ce02 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 16:51:55 +0800 Subject: [PATCH 06/10] MOD: update variable naming Signed-off-by: Pamela Mei i540369 --- collector/filesystem_common.go | 16 ++++++++-------- collector/filesystem_linux.go | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index 7bbcf58859..f5d5135241 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -73,7 +73,7 @@ type filesystemCollector struct { } type filesystemLabels struct { - device, mountPoint, fsType, options, device_error string + device, mountPoint, fsType, options, deviceError string } type filesystemStats struct { @@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.deviceErrorDesc, prometheus.GaugeValue, - s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.roDesc, prometheus.GaugeValue, - s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) if s.deviceError > 0 { @@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, - s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.freeDesc, prometheus.GaugeValue, - s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.availDesc, prometheus.GaugeValue, - s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesDesc, prometheus.GaugeValue, - s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesFreeDesc, prometheus.GaugeValue, - s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.device_error, + s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) } return nil diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index c388e18b28..bde193747f 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,7 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { - labels.device_error = "mountpoint timeout" + labels.deviceError = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -126,7 +126,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta close(success) if err != nil { - labels.device_error = err.Error() + labels.deviceError = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) return filesystemStats{ labels: labels, @@ -217,7 +217,7 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { mountPoint: rootfsStripPrefix(parts[1]), fsType: parts[2], options: parts[3], - device_error: "", + deviceError: "", }) } From 5ac4ad3833fac9e0cbe833ce92817808f541c7e6 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 17:11:04 +0800 Subject: [PATCH 07/10] MOD: gofmt to pass circleci Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index bde193747f..23e5359d11 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -213,10 +213,10 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) filesystems = append(filesystems, filesystemLabels{ - device: parts[0], - mountPoint: rootfsStripPrefix(parts[1]), - fsType: parts[2], - options: parts[3], + device: parts[0], + mountPoint: rootfsStripPrefix(parts[1]), + fsType: parts[2], + options: parts[3], deviceError: "", }) } From 2acba9e17fef3e1103efe25da47b423d698b29c3 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 17:35:15 +0800 Subject: [PATCH 08/10] MOD: gofmt update pass circleci Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 23e5359d11..1d0c8493b9 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -123,8 +123,16 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta buf := new(unix.Statfs_t) err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) + stuckMountsMtx.Lock() close(success) + // If the mount has been marked as stuck, unmark it and log it's recovery. + if _, ok := stuckMounts[labels.mountPoint]; ok { + level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint) + delete(stuckMounts, labels.mountPoint) + } + stuckMountsMtx.Unlock() + if err != nil { labels.deviceError = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) @@ -155,29 +163,17 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg select { case <-success: // Success - // If the mount has been marked as stuck, unmark it and log it's recovery. - stuckMountsMtx.Lock() - defer stuckMountsMtx.Unlock() - if _, ok := stuckMounts[mountPoint]; ok { - level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) - delete(stuckMounts, mountPoint) - } case <-mountCheckTimer.C: // Timed out, mark mount as stuck stuckMountsMtx.Lock() - defer stuckMountsMtx.Unlock() select { case <-success: // Success came in just after the timeout was reached, don't label the mount as stuck - // If the mount has been marked as stuck, unmark it and log it's recovery. - if _, ok := stuckMounts[mountPoint]; ok { - level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) - delete(stuckMounts, mountPoint) - } default: level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) stuckMounts[mountPoint] = struct{}{} } + stuckMountsMtx.Unlock() } } From 6cd76ed3b41b0b1981ac006e69df42e0df202bb9 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 18:05:22 +0800 Subject: [PATCH 09/10] MOD: update merge error Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 16ce0eb7fb..1d0c8493b9 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -209,17 +209,10 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) filesystems = append(filesystems, filesystemLabels{ -<<<<<<< HEAD device: parts[0], mountPoint: rootfsStripPrefix(parts[1]), fsType: parts[2], options: parts[3], -======= - device: parts[0], - mountPoint: rootfsStripPrefix(parts[1]), - fsType: parts[2], - options: parts[3], ->>>>>>> 8d495980343a6ad1177b53b6fb709e1ee6e24736 deviceError: "", }) } From 0c7391d3b59763832e4b21a378649b175b1c39d3 Mon Sep 17 00:00:00 2001 From: Pamela Mei i540369 Date: Sun, 18 Feb 2024 18:32:43 +0800 Subject: [PATCH 10/10] MOD: update merge wrong version Signed-off-by: Pamela Mei i540369 --- collector/filesystem_linux.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 1d0c8493b9..23e5359d11 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -123,16 +123,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta buf := new(unix.Statfs_t) err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) - stuckMountsMtx.Lock() close(success) - // If the mount has been marked as stuck, unmark it and log it's recovery. - if _, ok := stuckMounts[labels.mountPoint]; ok { - level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint) - delete(stuckMounts, labels.mountPoint) - } - stuckMountsMtx.Unlock() - if err != nil { labels.deviceError = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) @@ -163,17 +155,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg select { case <-success: // Success + // If the mount has been marked as stuck, unmark it and log it's recovery. + stuckMountsMtx.Lock() + defer stuckMountsMtx.Unlock() + if _, ok := stuckMounts[mountPoint]; ok { + level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) + delete(stuckMounts, mountPoint) + } case <-mountCheckTimer.C: // Timed out, mark mount as stuck stuckMountsMtx.Lock() + defer stuckMountsMtx.Unlock() select { case <-success: // Success came in just after the timeout was reached, don't label the mount as stuck + // If the mount has been marked as stuck, unmark it and log it's recovery. + if _, ok := stuckMounts[mountPoint]; ok { + level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint) + delete(stuckMounts, mountPoint) + } default: level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) stuckMounts[mountPoint] = struct{}{} } - stuckMountsMtx.Unlock() } }