From 6f4fb69c1e33b6dcfc2e37e2608d933a34936e9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Fri, 11 Sep 2020 10:07:15 +0200 Subject: [PATCH 1/4] OpenStack: Fix keepalived dysfunction on vrrp iface change When using CNV or other operators that modify how the node is connected to the network, we may end up in the case where the configured VRRP interface no longer has an address in the network that it is configured to hold virtual IPs in. This patch takes a page from what we do for HAProxy and adds a monitor side car container that checks keepalived and reloads it when necessary. This ports #1124 to OpenStack platform, alongside with fixes from #1508 and #1604. --- .../openstack/files/openstack-keepalived.yaml | 108 ++++++++++++------ 1 file changed, 71 insertions(+), 37 deletions(-) diff --git a/templates/common/openstack/files/openstack-keepalived.yaml b/templates/common/openstack/files/openstack-keepalived.yaml index a02a98a283..a03ebd855b 100644 --- a/templates/common/openstack/files/openstack-keepalived.yaml +++ b/templates/common/openstack/files/openstack-keepalived.yaml @@ -23,31 +23,8 @@ contents: - name: conf-dir hostPath: path: "/etc/keepalived" - initContainers: - - name: render-config - image: {{ .Images.baremetalRuntimeCfgImage }} - command: - - runtimecfg - - render - - "/etc/kubernetes/kubeconfig" - - "--api-vip" - - "{{ .Infra.Status.PlatformStatus.OpenStack.APIServerInternalIP }}" - - "--dns-vip" - - "{{ .Infra.Status.PlatformStatus.OpenStack.NodeDNSIP }}" - - "--ingress-vip" - - "{{ .Infra.Status.PlatformStatus.OpenStack.IngressIP }}" - - "/config" - - "--out-dir" - - "/etc/keepalived" - resources: {} - volumeMounts: - - name: resource-dir - mountPath: "/config" - - name: kubeconfig - mountPath: "/etc/kubernetes/kubeconfig" - - name: conf-dir - mountPath: "/etc/keepalived" - imagePullPolicy: IfNotPresent + - name: run-dir + empty-dir: {} containers: - name: keepalived securityContext: @@ -57,14 +34,40 @@ contents: - name: NSS_SDB_USE_CACHE value: "no" command: - - /usr/sbin/keepalived - args: - - "-f" - - "/etc/keepalived/keepalived.conf" - - "--dont-fork" - - "--vrrp" - - "--log-detail" - - "--log-console" + - /bin/bash + - -c + - | + #/bin/bash + reload_keepalived() + { + if pid=$(pgrep -o keepalived); then + kill -s SIGHUP "$pid" + else + /usr/sbin/keepalived -f /etc/keepalived/keepalived.conf --dont-fork --vrrp --log-detail --log-console & + fi + } + + msg_handler() + { + while read -r line; do + echo "The client sent: $line" >&2 + # currently only 'reload' msg is supported + if [ "$line" = reload ]; then + reload_keepalived + fi + done + } + + set -ex + declare -r keepalived_sock="/var/run/keepalived/keepalived.sock" + export -f msg_handler + export -f reload_keepalived + if [ -s "/etc/keepalived/keepalived.conf" ]; then + /usr/sbin/keepalived -f /etc/keepalived/keepalived.conf --dont-fork --vrrp --log-detail --log-console & + fi + + rm -f "$keepalived_sock" + socat UNIX-LISTEN:${keepalived_sock},fork system:'bash -c msg_handler' resources: requests: cpu: 100m @@ -72,16 +75,47 @@ contents: volumeMounts: - name: conf-dir mountPath: "/etc/keepalived" + - name: run-dir + mountPath: "/var/run/keepalived" livenessProbe: exec: command: - - pgrep - - keepalived - initialDelaySeconds: 10 + - /bin/bash + - -c + - | + kill -s SIGUSR1 "$(pgrep -o keepalived)" && ! grep -q "State = FAULT" /tmp/keepalived.data + initialDelaySeconds: 20 terminationMessagePolicy: FallbackToLogsOnError imagePullPolicy: IfNotPresent + - name: keepalived-monitor + image: {{ .Images.baremetalRuntimeCfgImage }} + command: + - dynkeepalived + - "/etc/kubernetes/kubeconfig" + - "/config/keepalived.conf.tmpl" + - "/etc/keepalived/keepalived.conf" + - "--api-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.APIServerInternalIP }}" + - "--dns-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.NodeDNSIP }}" + - "--ingress-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.IngressIP }}" + resources: + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: resource-dir + mountPath: "/config" + - name: kubeconfig + mountPath: "/etc/kubernetes/kubeconfig" + - name: conf-dir + mountPath: "/etc/keepalived" + - name: run-dir + mountPath: "/var/run/keepalived" + imagePullPolicy: IfNotPresent hostNetwork: true tolerations: - operator: Exists priorityClassName: system-node-critical - status: {} \ No newline at end of file + status: {} From 01951661bdb37f1870b00d7cf5094f813f5dd105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Fri, 11 Sep 2020 10:33:51 +0200 Subject: [PATCH 2/4] OpenStack: Don't failover api vip if loadbalanced endpoint is responding We're having some issues in ci with api flakiness that appear to be related to VIP failover. In order to reduce the need to do failovers during normal circumstances, we want to leave the VIP alone as long as the loadbalanced api endpoint on the node is responding. Currently if the local api service on a node stops responding it will trigger a failover. This is unnecessary since in most cases haproxy will continue to distribute the traffic while the local api restarts. In order to get this behavior, the chk_ocp vrrp_script is split in two. The reason for this is that we still want to handle the case where all haproxy instances in the cluster go down, but at least one api service is still functional. One check looks at the haproxy endpoint only. This one has a higher weight since it's the preferred situation. The other check looks for either the haproxy endpoint or the local endpoint. This means that if haproxy is up then both checks will succeed and the node will have maximum priority regardless of the state of its local api. However, if haproxy goes down but the local api is still working then at least the priority will be higher than the minimum because the local api is responding. It was also necessary to add a check that the haproxy firewall rule is in place. It does us no good to have the loadbalancer working if traffic isn't being routed to it. This ports #1893 to OpenStack platform. --- .../openstack/files/openstack-keepalived.yaml | 33 +++++++++++++++ .../files/openstack-haproxy-haproxy.yaml | 2 +- .../openstack/files/openstack-haproxy.yaml | 2 +- .../openstack-keepalived-keepalived.yaml | 42 ++++++++++++++++--- .../openstack-keepalived-script-both.yaml | 7 ++++ .../files/openstack-keepalived-script.yaml | 7 ++++ 6 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml create mode 100644 templates/master/00-master/openstack/files/openstack-keepalived-script.yaml diff --git a/templates/common/openstack/files/openstack-keepalived.yaml b/templates/common/openstack/files/openstack-keepalived.yaml index a03ebd855b..321f23316f 100644 --- a/templates/common/openstack/files/openstack-keepalived.yaml +++ b/templates/common/openstack/files/openstack-keepalived.yaml @@ -17,6 +17,9 @@ contents: - name: resource-dir hostPath: path: "/etc/kubernetes/static-pod-resources/keepalived" + - name: script-dir + hostPath: + path: "/etc/kubernetes/static-pod-resources/keepalived/scripts" - name: kubeconfig hostPath: path: "/etc/kubernetes/kubeconfig" @@ -25,6 +28,34 @@ contents: path: "/etc/keepalived" - name: run-dir empty-dir: {} + - name: chroot-host + hostPath: + path: "/" + initContainers: + - name: render-config-keepalived + image: {{ .Images.baremetalRuntimeCfgImage }} + command: + - runtimecfg + - render + - "/etc/kubernetes/kubeconfig" + - "--api-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.APIServerInternalIP }}" + - "--dns-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.NodeDNSIP }}" + - "--ingress-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.IngressIP }}" + - "/config" + - "--out-dir" + - "/etc/keepalived" + resources: {} + volumeMounts: + - name: kubeconfig + mountPath: "/etc/kubernetes/kubeconfig" + - name: script-dir + mountPath: "/config" + - name: conf-dir + mountPath: "/etc/keepalived" + imagePullPolicy: IfNotPresent containers: - name: keepalived securityContext: @@ -113,6 +144,8 @@ contents: mountPath: "/etc/keepalived" - name: run-dir mountPath: "/var/run/keepalived" + - name: chroot-host + mountPath: "/host" imagePullPolicy: IfNotPresent hostNetwork: true tolerations: diff --git a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml index a793235f5e..7c82e26bfc 100644 --- a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml +++ b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml @@ -22,7 +22,7 @@ contents: listen health_check_http_url bind :::50936 v4v6 mode http - monitor-uri /readyz + monitor-uri /haproxy_ready option dontlognull listen stats bind localhost:{{`{{ .LBConfig.StatPort }}`}} diff --git a/templates/master/00-master/openstack/files/openstack-haproxy.yaml b/templates/master/00-master/openstack/files/openstack-haproxy.yaml index 1f4761fce4..486571a708 100644 --- a/templates/master/00-master/openstack/files/openstack-haproxy.yaml +++ b/templates/master/00-master/openstack/files/openstack-haproxy.yaml @@ -99,7 +99,7 @@ contents: livenessProbe: initialDelaySeconds: 10 httpGet: - path: /readyz + path: /haproxy_ready port: 50936 terminationMessagePolicy: FallbackToLogsOnError imagePullPolicy: IfNotPresent diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml index 8a25a78c3f..b7c5c97fc8 100644 --- a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml +++ b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml @@ -3,15 +3,44 @@ mode: 0644 path: "/etc/kubernetes/static-pod-resources/keepalived/keepalived.conf.tmpl" contents: inline: | - vrrp_script chk_ocp { - script "/usr/bin/curl -o /dev/null -kLfs https://localhost:6443/readyz && /usr/bin/curl -o /dev/null -kLfs http://localhost:50936/readyz" - interval 1 - weight 50 + global_defs { + enable_script_security + script_user root } + vrrp_script chk_dns { script "/usr/bin/host -t SRV _etcd-server-ssl._tcp.{{ .EtcdDiscoveryDomain }} localhost" interval 1 - weight 50 + weight 20 + rise 3 + fall 2 + } + + # These are separate checks to provide the following behavior: + # If the loadbalanced endpoint is responding then all is well regardless + # of what the local api status is. Both checks will return success and + # we'll have the maximum priority. This means as long as there is a node + # with a functional loadbalancer it will get the VIP. + # If all of the loadbalancers go down but the local api is still running, + # the _both check will still succeed and allow any node with a functional + # api to take the VIP. This isn't preferred because it means all api + # traffic will go through one node, but at least it keeps the api available. + vrrp_script chk_ocp_lb { + script "/usr/bin/timeout 0.9 /etc/keepalived/chk_ocp_script.sh" + interval 1 + weight 20 + rise 3 + fall 2 + } + + vrrp_script chk_ocp_both { + script "/usr/bin/timeout 0.9 /etc/keepalived/chk_ocp_script_both.sh" + interval 1 + # Use a smaller weight for this check so it won't trigger the move from + # bootstrap to master by itself. + weight 5 + rise 3 + fall 2 } # TODO: Improve this check. The port is assumed to be alive. # Need to assess what is the ramification if the port is not there. @@ -34,7 +63,8 @@ contents: {{`{{ .Cluster.APIVIP }}`}}/{{`{{ .Cluster.VIPNetmask }}`}} } track_script { - chk_ocp + chk_ocp_lb + chk_ocp_both } } vrrp_instance {{`{{ .Cluster.Name }}`}}_DNS { diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml new file mode 100644 index 0000000000..3056ae97a9 --- /dev/null +++ b/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml @@ -0,0 +1,7 @@ +filesystem: "root" +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script_both.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs https://localhost:{{`{{ .LBConfig.LbPort }}`}}/readyz && [ -e /var/run/keepalived/iptables-rule-exists ] || /usr/bin/curl -kLfs https://localhost:{{`{{ .LBConfig.ApiPort }}`}}/readyz diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml new file mode 100644 index 0000000000..78707f4274 --- /dev/null +++ b/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml @@ -0,0 +1,7 @@ +filesystem: "root" +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs https://localhost:{{`{{ .LBConfig.LbPort }}`}}/readyz && [ -e /var/run/keepalived/iptables-rule-exists ] From a7bb6a6002ef327cd323634c750ad1eabf412038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Fri, 11 Sep 2020 14:19:12 +0200 Subject: [PATCH 3/4] OpenStack: Add security context for ipfilters to run Partial port of #1768 to OpenStack platform --- .../openstack/files/openstack-keepalived.yaml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/templates/common/openstack/files/openstack-keepalived.yaml b/templates/common/openstack/files/openstack-keepalived.yaml index 321f23316f..7aa78af541 100644 --- a/templates/common/openstack/files/openstack-keepalived.yaml +++ b/templates/common/openstack/files/openstack-keepalived.yaml @@ -22,7 +22,10 @@ contents: path: "/etc/kubernetes/static-pod-resources/keepalived/scripts" - name: kubeconfig hostPath: - path: "/etc/kubernetes/kubeconfig" + path: "/etc/kubernetes" + - name: kubeconfigvarlib + hostPath: + path: "/var/lib/kubelet" - name: conf-dir hostPath: path: "/etc/keepalived" @@ -50,7 +53,7 @@ contents: resources: {} volumeMounts: - name: kubeconfig - mountPath: "/etc/kubernetes/kubeconfig" + mountPath: "/etc/kubernetes" - name: script-dir mountPath: "/config" - name: conf-dir @@ -119,10 +122,17 @@ contents: terminationMessagePolicy: FallbackToLogsOnError imagePullPolicy: IfNotPresent - name: keepalived-monitor + securityContext: + privileged: true image: {{ .Images.baremetalRuntimeCfgImage }} + env: + - name: ENABLE_UNICAST + value: "no" + - name: IS_BOOTSTRAP + value: "no" command: - dynkeepalived - - "/etc/kubernetes/kubeconfig" + - "/var/lib/kubelet/kubeconfig" - "/config/keepalived.conf.tmpl" - "/etc/keepalived/keepalived.conf" - "--api-vip" @@ -138,8 +148,8 @@ contents: volumeMounts: - name: resource-dir mountPath: "/config" - - name: kubeconfig - mountPath: "/etc/kubernetes/kubeconfig" + - name: kubeconfigvarlib + mountPath: "/var/lib/kubelet" - name: conf-dir mountPath: "/etc/keepalived" - name: run-dir From 76d8f4a07267f21e2beea79601c3682007b9613b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Andr=C3=A9?= Date: Wed, 16 Sep 2020 09:17:03 +0200 Subject: [PATCH 4/4] OpenStack: Reverse haproxy and keepalived check timings Since we moved to keepalived healthchecking against haproxy, we want haproxy to handle most failures so the VIP doesn't have to move. However, previously the time it took for haproxy to recognize an outage on a node was longer than it was for keepalived, which resulted in the VIP moving before haproxy removed the failing backend. This change makes the haproxy interval 1 second, so it should notice outages in 2 seconds or less (because it has a fall value of 2). The keepalived interval is changed to 2, which means it will detect failures in 2 to 4 seconds (also a fall value of 2). This means haproxy should deal with api outages before keepalived does and allow the VIP to stay on the same node. This ports https://github.com/openshift/machine-config-operator/pull/2075/ to OpenStack platform. --- .../openstack/files/openstack-haproxy-haproxy.yaml | 2 +- .../openstack/files/openstack-keepalived-keepalived.yaml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml index 7c82e26bfc..1a0568925e 100644 --- a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml +++ b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml @@ -37,5 +37,5 @@ contents: option log-health-checks balance roundrobin {{`{{- range .LBConfig.Backends }} - server {{ .Host }} {{ .Address }}:{{ .Port }} weight 1 verify none check check-ssl inter 3s fall 2 rise 3 + server {{ .Host }} {{ .Address }}:{{ .Port }} weight 1 verify none check check-ssl inter 1s fall 2 rise 3 {{- end }}`}} diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml index b7c5c97fc8..aca355a777 100644 --- a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml +++ b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml @@ -26,16 +26,16 @@ contents: # api to take the VIP. This isn't preferred because it means all api # traffic will go through one node, but at least it keeps the api available. vrrp_script chk_ocp_lb { - script "/usr/bin/timeout 0.9 /etc/keepalived/chk_ocp_script.sh" - interval 1 + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script.sh" + interval 2 weight 20 rise 3 fall 2 } vrrp_script chk_ocp_both { - script "/usr/bin/timeout 0.9 /etc/keepalived/chk_ocp_script_both.sh" - interval 1 + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script_both.sh" + interval 2 # Use a smaller weight for this check so it won't trigger the move from # bootstrap to master by itself. weight 5