From 995cf834b55eeb4abf953f43bba407f1a4ff6259 Mon Sep 17 00:00:00 2001 From: Zenghui Shi Date: Mon, 28 Nov 2022 16:43:08 +0800 Subject: [PATCH 1/2] Watch firewall reload in sysconfigwatcher Related-Issue: https://issues.redhat.com/browse/NP-641 --- .../openshift-router/deployment.yaml | 2 ++ assets/components/ovn/master/daemonset.yaml | 2 +- docs/howto_sysconf_watch.md | 24 ++++++++++++-- pkg/cmd/run.go | 2 ++ pkg/components/controllers.go | 5 ++- pkg/components/networking.go | 7 +++-- pkg/config/config.go | 2 ++ pkg/sysconfwatch/sysconfwatch_linux.go | 31 ++++++++++++++++--- scripts/auto-rebase/rebase.sh | 2 ++ 9 files changed, 66 insertions(+), 11 deletions(-) diff --git a/assets/components/openshift-router/deployment.yaml b/assets/components/openshift-router/deployment.yaml index 60d3d5ad8b..6c61f1acc5 100644 --- a/assets/components/openshift-router/deployment.yaml +++ b/assets/components/openshift-router/deployment.yaml @@ -63,6 +63,8 @@ spec: value: 1s - name: ROUTER_DOMAIN value: apps.{{ .BaseDomain }} + - name: ROUTER_CREATION_TIMESTAMP + value: {{ .CreationTimestamp }} livenessProbe: httpGet: path: /healthz diff --git a/assets/components/ovn/master/daemonset.yaml b/assets/components/ovn/master/daemonset.yaml index 28c9945577..1b66753e9f 100644 --- a/assets/components/ovn/master/daemonset.yaml +++ b/assets/components/ovn/master/daemonset.yaml @@ -328,7 +328,7 @@ spec: fi # K8S_NODE_IP triggers reconcilation of this daemon when node IP changes - echo "$(date -Iseconds) - starting ovnkube-master, Node: ${K8S_NODE} IP: ${K8S_NODE_IP}" + echo "$(date -Iseconds) - starting ovnkube-master at {{ .CreationTimestamp }}, Node: ${K8S_NODE} IP: ${K8S_NODE_IP}" echo "I$(date "+%m%d %H:%M:%S.%N") - copy ovn-k8s-cni-overlay" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ diff --git a/docs/howto_sysconf_watch.md b/docs/howto_sysconf_watch.md index 213d9b5d49..e3b0abd64a 100644 --- a/docs/howto_sysconf_watch.md +++ b/docs/howto_sysconf_watch.md @@ -1,6 +1,10 @@ # MicroShift Mitigation of System Configuration Changes -MicroShift depends on the device IP address and system-wide clock settings to remain consistent during its runtime. However, these settings may occasionally change on edge devices (i.e. DHCP or NTP updates). When such changes occur, some MicroShift components may stop functioning properly. To mitigate this situation, MicroShift monitors the mentioned system configuration settings and restarts if a setting change is detected. +MicroShift depends on the following system settings to remain consistent during its runtime: +- Device IP address +- System-wide clock settings +- Iptable configurations. +However, these settings may occasionally change on edge devices (i.e. DHCP or NTP updates). When such changes occur, some MicroShift components may stop functioning properly. To mitigate this situation, MicroShift monitors the mentioned system configuration settings and restarts if a setting change is detected. This document describes how to simulate system configuration changes in a virtual environment and verify that MicroShift service reacts by restarting when necessary. @@ -138,4 +142,20 @@ The below (non-proportional!) graph shows when certificates are rotated. certificate will be rotated for a new one. If the rotated certificate is a CA, all of the certificates it signed get rotated -as well. \ No newline at end of file +as well. + +## Firewall Changes + +Reload the firewall rules with the following command to trigger the MicroShift service restart. + +```bash +sudo firewall-cmd --reload +``` + +Firewall reload action flushes the existing iptable configurations which results in failed network traffic.
+Run the `journalctl` command to verify that the service was restarted. The logs should contain restart and startup messages. + +``` +Dec 11 08:23:30 localhost.localdomain microshift[168331]: sysconfwatch-controller W1211 08:23:30.914772 168331 sysconfwatch_linux.go:112] iptables flush is detected, restarting MicroShift +Dec 11 08:23:31 localhost.localdomain systemd[1]: Starting MicroShift... +``` diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index 2e1e41f54a..b8290e45d9 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -81,6 +81,8 @@ func RunMicroshift(cfg *config.MicroshiftConfig, flags *pflag.FlagSet) error { os.MkdirAll(microshiftDataDir, 0700) + cfg.CreationTimestamp = time.Now().Format("2006-01-02 15:04:05") + // TODO: change to only initialize what is strictly necessary for the selected role(s) certChains, err := initCerts(cfg) if err != nil { diff --git a/pkg/components/controllers.go b/pkg/components/controllers.go index 4a50f1ded7..ddfa0ea82b 100644 --- a/pkg/components/controllers.go +++ b/pkg/components/controllers.go @@ -162,7 +162,10 @@ func startIngressController(cfg *config.MicroshiftConfig, kubeconfigPath string) return err } - if err := assets.ApplyDeployments(apps, renderTemplate, renderParamsFromConfig(cfg, nil), kubeconfigPath); err != nil { + extraParams := assets.RenderParams{ + "CreationTimestamp": cfg.CreationTimestamp, + } + if err := assets.ApplyDeployments(apps, renderTemplate, renderParamsFromConfig(cfg, extraParams), kubeconfigPath); err != nil { klog.Warningf("Failed to apply apps %v: %v", apps, err) return err } diff --git a/pkg/components/networking.go b/pkg/components/networking.go index 3084a030b6..eceb3688c9 100644 --- a/pkg/components/networking.go +++ b/pkg/components/networking.go @@ -77,9 +77,10 @@ func startCNIPlugin(cfg *config.MicroshiftConfig, kubeconfigPath string) error { return err } extraParams := assets.RenderParams{ - "OVNConfig": ovnConfig, - "KubeconfigPath": kubeconfigPath, - "KubeconfigDir": filepath.Join(microshiftDataDir, "/resources/kubeadmin"), + "OVNConfig": ovnConfig, + "KubeconfigPath": kubeconfigPath, + "KubeconfigDir": filepath.Join(microshiftDataDir, "/resources/kubeadmin"), + "CreationTimestamp": cfg.CreationTimestamp, } if err := assets.ApplyConfigMaps(cm, renderTemplate, renderParamsFromConfig(cfg, extraParams), kubeconfigPath); err != nil { klog.Warningf("Failed to apply configMap %v %v", cm, err) diff --git a/pkg/config/config.go b/pkg/config/config.go index 31db8c64db..73897d8d96 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -64,6 +64,8 @@ type MicroshiftConfig struct { NodeIP string `json:"nodeIP"` BaseDomain string `json:"baseDomain"` Cluster ClusterConfig `json:"cluster"` + // Used to trigger a force rollout on network components upon MicroShift restart + CreationTimestamp string `json:"creationTimestamp"` Ingress IngressConfig `json:"-"` } diff --git a/pkg/sysconfwatch/sysconfwatch_linux.go b/pkg/sysconfwatch/sysconfwatch_linux.go index b68d159bd6..6495a8b328 100644 --- a/pkg/sysconfwatch/sysconfwatch_linux.go +++ b/pkg/sysconfwatch/sysconfwatch_linux.go @@ -25,15 +25,19 @@ import ( "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/util" "golang.org/x/sys/unix" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" + utilexec "k8s.io/utils/exec" ) const sysConfigCheckInterval = time.Second * 5 const sysConfigAllowedTimeDrift = time.Second * 10 type SysConfWatchController struct { - NodeIP string - timerFd int + NodeIP string + timerFd int + iptClients []utiliptables.Interface } func NewSysConfWatchController(cfg *config.MicroshiftConfig) *SysConfWatchController { @@ -54,9 +58,17 @@ func NewSysConfWatchController(cfg *config.MicroshiftConfig) *SysConfWatchContro klog.Fatalf("failed to start a realtime clock timer %v", err) } + // Initialize network iptables util + exec := utilexec.New() + iptClients := []utiliptables.Interface{ + utiliptables.New(exec, utiliptables.ProtocolIPv4), + utiliptables.New(exec, utiliptables.ProtocolIPv6), + } + return &SysConfWatchController{ - NodeIP: cfg.NodeIP, - timerFd: fd, + NodeIP: cfg.NodeIP, + timerFd: fd, + iptClients: iptClients, } } @@ -92,6 +104,17 @@ func (c *SysConfWatchController) Run(ctx context.Context, ready chan<- struct{}, // Take a snapshot of the system and monototic clocks as a base reference stimeRef, mtimeRef := getSysMonTimes() + for i := range c.iptClients { + iptClient := c.iptClients[i] + go iptClient.Monitor( + utiliptables.Chain("MICROSHIFT-SYSCONF-CANARY"), + []utiliptables.Table{utiliptables.TableMangle, utiliptables.TableNAT, utiliptables.TableFilter}, + func() { klog.Warningf("iptables flush is detected, restarting MicroShift"); os.Exit(0) }, + sysConfigCheckInterval, + wait.NeverStop, + ) + } + klog.Infof("sysconfwatch-controller is ready") close(ready) for { diff --git a/scripts/auto-rebase/rebase.sh b/scripts/auto-rebase/rebase.sh index 7e1dbb986c..403dd94e64 100755 --- a/scripts/auto-rebase/rebase.sh +++ b/scripts/auto-rebase/rebase.sh @@ -598,6 +598,8 @@ update_manifests() { yq -i '.spec.template.spec.containers[0].env += {"name": "ROUTER_USE_PROXY_PROTOCOL", "value": "false"}' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml yq -i '.spec.template.spec.containers[0].env += {"name": "GRACEFUL_SHUTDOWN_DELAY", "value": "1s"}' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml yq -i '.spec.template.spec.containers[0].env += {"name": "ROUTER_DOMAIN", "value": "apps.REPLACE_CLUSTER_DOMAIN"}' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml + # Add creation timestamp to trigger force rollout on MicroShift restart + yq -i '.spec.template.spec.containers[0].env += {"name": "ROUTER_CREATION_TIMESTAMP", "value": "{{ .CreationTimestamp }}"}' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml # 4) Replace MicroShift templating vars (do this last, as yq trips over Go templates) sed -i 's|REPLACE_CLUSTER_DOMAIN|{{ .BaseDomain }}|g' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml sed -i 's|REPLACE_ROUTER_IMAGE|{{ .ReleaseImage.haproxy_router }}|' "${REPOROOT}"/assets/components/openshift-router/deployment.yaml From 694a1671af6eaf272ff808f0e71ac1ca7c6da803 Mon Sep 17 00:00:00 2001 From: Zenghui Shi Date: Wed, 21 Dec 2022 17:14:02 +0800 Subject: [PATCH 2/2] Move firewall watch logic to infrastructure manager --- docs/howto_sysconf_watch.md | 16 ++++---- pkg/cmd/run.go | 2 - pkg/components/components.go | 40 +++++++++++++++++++- pkg/components/controllers.go | 3 +- pkg/components/networking.go | 3 +- pkg/config/config.go | 2 - pkg/controllers/infra-services-controller.go | 14 ++++++- pkg/sysconfwatch/sysconfwatch_linux.go | 31 ++------------- 8 files changed, 68 insertions(+), 43 deletions(-) diff --git a/docs/howto_sysconf_watch.md b/docs/howto_sysconf_watch.md index e3b0abd64a..a5533ea28f 100644 --- a/docs/howto_sysconf_watch.md +++ b/docs/howto_sysconf_watch.md @@ -3,8 +3,8 @@ MicroShift depends on the following system settings to remain consistent during its runtime: - Device IP address - System-wide clock settings -- Iptable configurations. -However, these settings may occasionally change on edge devices (i.e. DHCP or NTP updates). When such changes occur, some MicroShift components may stop functioning properly. To mitigate this situation, MicroShift monitors the mentioned system configuration settings and restarts if a setting change is detected. +- Iptable configurations +However, these settings may occasionally change on edge devices (i.e. DHCP or NTP updates). When such changes occur, some MicroShift components may stop functioning properly. To mitigate this situation, MicroShift monitors the mentioned system configuration settings, restarts or reloads components if a setting change is detected. This document describes how to simulate system configuration changes in a virtual environment and verify that MicroShift service reacts by restarting when necessary. @@ -146,16 +146,18 @@ as well. ## Firewall Changes -Reload the firewall rules with the following command to trigger the MicroShift service restart. +Reload the firewall rules with the following command to trigger the reloading of MicroShift components. ```bash sudo firewall-cmd --reload ``` -Firewall reload action flushes the existing iptable configurations which results in failed network traffic.
-Run the `journalctl` command to verify that the service was restarted. The logs should contain restart and startup messages. +Firewall reload action flushes the iptable configurations which results in failed network traffic.
+Run the `journalctl -xu microshift` command to verify that the components are reloaded. The logs should contain reload messages. ``` -Dec 11 08:23:30 localhost.localdomain microshift[168331]: sysconfwatch-controller W1211 08:23:30.914772 168331 sysconfwatch_linux.go:112] iptables flush is detected, restarting MicroShift -Dec 11 08:23:31 localhost.localdomain systemd[1]: Starting MicroShift... +Dec 21 08:57:01 localhost.localdomain microshift[2005232]: infrastructure-services-manager I1221 08:57:01.567046 2005232 iptables.go:590] iptables canary mangle/MICROSHIFT-CANARY deleted +Dec 21 08:57:01 localhost.localdomain microshift[2005232]: infrastructure-services-manager W1221 08:57:01.582233 2005232 components.go:25] Iptables flush is detected, reloading affected components +Dec 21 08:57:01 localhost.localdomain microshift[2005232]: infrastructure-services-manager I1221 08:57:01.582276 2005232 components.go:64] Reload ingress controller +Dec 21 08:57:01 localhost.localdomain microshift[2005232]: infrastructure-services-manager I1221 08:57:01.644365 2005232 components.go:69] Reload CNI plugin ``` diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index b8290e45d9..2e1e41f54a 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -81,8 +81,6 @@ func RunMicroshift(cfg *config.MicroshiftConfig, flags *pflag.FlagSet) error { os.MkdirAll(microshiftDataDir, 0700) - cfg.CreationTimestamp = time.Now().Format("2006-01-02 15:04:05") - // TODO: change to only initialize what is strictly necessary for the selected role(s) certChains, err := initCerts(cfg) if err != nil { diff --git a/pkg/components/components.go b/pkg/components/components.go index c20b5f2fc9..f86a88ef91 100755 --- a/pkg/components/components.go +++ b/pkg/components/components.go @@ -1,15 +1,37 @@ package components import ( + "time" + "github.com/openshift/microshift/pkg/config" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/util/iptables" ) +const iptablesCheckInterval = time.Second * 5 + var microshiftDataDir = config.GetDataDir() -func StartComponents(cfg *config.MicroshiftConfig) error { +func StartComponents(cfg *config.MicroshiftConfig, iptClients []iptables.Interface) error { kubeAdminConfig := cfg.KubeConfigPath(config.KubeAdmin) + for i := range iptClients { + iptClient := iptClients[i] + go iptClient.Monitor( + iptables.Chain("MICROSHIFT-CANARY"), + []iptables.Table{iptables.TableMangle, iptables.TableNAT, iptables.TableFilter}, + func() { + klog.Warningf("Iptables flush is detected, reloading affected components") + if err := reloadOnIptableFlush(cfg); err != nil { + klog.Errorf("Failed to reload affected components: %v", err) + } + }, + iptablesCheckInterval, + wait.NeverStop, + ) + } + if err := startServiceCAController(cfg, kubeAdminConfig); err != nil { klog.Warningf("Failed to start service-ca controller: %v", err) return err @@ -35,3 +57,19 @@ func StartComponents(cfg *config.MicroshiftConfig) error { } return nil } + +func reloadOnIptableFlush(cfg *config.MicroshiftConfig) error { + kubeAdminConfig := cfg.KubeConfigPath(config.KubeAdmin) + + klog.Infof("Reload ingress controller") + if err := startIngressController(cfg, kubeAdminConfig); err != nil { + klog.Warningf("Failed to reload ingress router controller: %v", err) + return err + } + klog.Infof("Reload CNI plugin") + if err := startCNIPlugin(cfg, kubeAdminConfig); err != nil { + klog.Warningf("Failed to reload CNI plugin: %v", err) + return err + } + return nil +} diff --git a/pkg/components/controllers.go b/pkg/components/controllers.go index ddfa0ea82b..da014eadc2 100644 --- a/pkg/components/controllers.go +++ b/pkg/components/controllers.go @@ -2,6 +2,7 @@ package components import ( "os" + "time" "github.com/openshift/microshift/pkg/assets" "github.com/openshift/microshift/pkg/config" @@ -163,7 +164,7 @@ func startIngressController(cfg *config.MicroshiftConfig, kubeconfigPath string) } extraParams := assets.RenderParams{ - "CreationTimestamp": cfg.CreationTimestamp, + "CreationTimestamp": time.Now().Format("2006-01-02 15:04:05"), } if err := assets.ApplyDeployments(apps, renderTemplate, renderParamsFromConfig(cfg, extraParams), kubeconfigPath); err != nil { klog.Warningf("Failed to apply apps %v: %v", apps, err) diff --git a/pkg/components/networking.go b/pkg/components/networking.go index eceb3688c9..dcbd901ac9 100644 --- a/pkg/components/networking.go +++ b/pkg/components/networking.go @@ -3,6 +3,7 @@ package components import ( "fmt" "path/filepath" + "time" "github.com/openshift/microshift/pkg/assets" "github.com/openshift/microshift/pkg/config" @@ -80,7 +81,7 @@ func startCNIPlugin(cfg *config.MicroshiftConfig, kubeconfigPath string) error { "OVNConfig": ovnConfig, "KubeconfigPath": kubeconfigPath, "KubeconfigDir": filepath.Join(microshiftDataDir, "/resources/kubeadmin"), - "CreationTimestamp": cfg.CreationTimestamp, + "CreationTimestamp": time.Now().Format("2006-01-02 15:04:05"), } if err := assets.ApplyConfigMaps(cm, renderTemplate, renderParamsFromConfig(cfg, extraParams), kubeconfigPath); err != nil { klog.Warningf("Failed to apply configMap %v %v", cm, err) diff --git a/pkg/config/config.go b/pkg/config/config.go index 73897d8d96..31db8c64db 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -64,8 +64,6 @@ type MicroshiftConfig struct { NodeIP string `json:"nodeIP"` BaseDomain string `json:"baseDomain"` Cluster ClusterConfig `json:"cluster"` - // Used to trigger a force rollout on network components upon MicroShift restart - CreationTimestamp string `json:"creationTimestamp"` Ingress IngressConfig `json:"-"` } diff --git a/pkg/controllers/infra-services-controller.go b/pkg/controllers/infra-services-controller.go index 08a5c85298..74f8f30ceb 100644 --- a/pkg/controllers/infra-services-controller.go +++ b/pkg/controllers/infra-services-controller.go @@ -23,15 +23,25 @@ import ( "github.com/openshift/microshift/pkg/assets" "github.com/openshift/microshift/pkg/components" "github.com/openshift/microshift/pkg/config" + "k8s.io/kubernetes/pkg/util/iptables" + "k8s.io/utils/exec" ) type InfrastructureServicesManager struct { - cfg *config.MicroshiftConfig + cfg *config.MicroshiftConfig + iptClients []iptables.Interface } func NewInfrastructureServices(cfg *config.MicroshiftConfig) *InfrastructureServicesManager { s := &InfrastructureServicesManager{} s.cfg = cfg + + // Initialize iptables util + exec := exec.New() + s.iptClients = []iptables.Interface{ + iptables.New(exec, iptables.ProtocolIPv4), + iptables.New(exec, iptables.ProtocolIPv6), + } return s } @@ -55,7 +65,7 @@ func (s *InfrastructureServicesManager) Run(ctx context.Context, ready chan<- st } // TO-DO add readiness check - if err := components.StartComponents(s.cfg); err != nil { + if err := components.StartComponents(s.cfg, s.iptClients); err != nil { return err } klog.Infof("%s launched ocp componets", s.Name()) diff --git a/pkg/sysconfwatch/sysconfwatch_linux.go b/pkg/sysconfwatch/sysconfwatch_linux.go index 6495a8b328..b68d159bd6 100644 --- a/pkg/sysconfwatch/sysconfwatch_linux.go +++ b/pkg/sysconfwatch/sysconfwatch_linux.go @@ -25,19 +25,15 @@ import ( "github.com/openshift/microshift/pkg/config" "github.com/openshift/microshift/pkg/util" "golang.org/x/sys/unix" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" - utiliptables "k8s.io/kubernetes/pkg/util/iptables" - utilexec "k8s.io/utils/exec" ) const sysConfigCheckInterval = time.Second * 5 const sysConfigAllowedTimeDrift = time.Second * 10 type SysConfWatchController struct { - NodeIP string - timerFd int - iptClients []utiliptables.Interface + NodeIP string + timerFd int } func NewSysConfWatchController(cfg *config.MicroshiftConfig) *SysConfWatchController { @@ -58,17 +54,9 @@ func NewSysConfWatchController(cfg *config.MicroshiftConfig) *SysConfWatchContro klog.Fatalf("failed to start a realtime clock timer %v", err) } - // Initialize network iptables util - exec := utilexec.New() - iptClients := []utiliptables.Interface{ - utiliptables.New(exec, utiliptables.ProtocolIPv4), - utiliptables.New(exec, utiliptables.ProtocolIPv6), - } - return &SysConfWatchController{ - NodeIP: cfg.NodeIP, - timerFd: fd, - iptClients: iptClients, + NodeIP: cfg.NodeIP, + timerFd: fd, } } @@ -104,17 +92,6 @@ func (c *SysConfWatchController) Run(ctx context.Context, ready chan<- struct{}, // Take a snapshot of the system and monototic clocks as a base reference stimeRef, mtimeRef := getSysMonTimes() - for i := range c.iptClients { - iptClient := c.iptClients[i] - go iptClient.Monitor( - utiliptables.Chain("MICROSHIFT-SYSCONF-CANARY"), - []utiliptables.Table{utiliptables.TableMangle, utiliptables.TableNAT, utiliptables.TableFilter}, - func() { klog.Warningf("iptables flush is detected, restarting MicroShift"); os.Exit(0) }, - sysConfigCheckInterval, - wait.NeverStop, - ) - } - klog.Infof("sysconfwatch-controller is ready") close(ready) for {