diff --git a/templates/common/openstack/files/openstack-keepalived.yaml b/templates/common/openstack/files/openstack-keepalived.yaml index a02a98a283..7aa78af541 100644 --- a/templates/common/openstack/files/openstack-keepalived.yaml +++ b/templates/common/openstack/files/openstack-keepalived.yaml @@ -17,14 +17,25 @@ contents: - name: resource-dir hostPath: path: "/etc/kubernetes/static-pod-resources/keepalived" + - name: script-dir + hostPath: + path: "/etc/kubernetes/static-pod-resources/keepalived/scripts" - name: kubeconfig hostPath: - path: "/etc/kubernetes/kubeconfig" + path: "/etc/kubernetes" + - name: kubeconfigvarlib + hostPath: + path: "/var/lib/kubelet" - name: conf-dir hostPath: path: "/etc/keepalived" + - name: run-dir + empty-dir: {} + - name: chroot-host + hostPath: + path: "/" initContainers: - - name: render-config + - name: render-config-keepalived image: {{ .Images.baremetalRuntimeCfgImage }} command: - runtimecfg @@ -41,10 +52,10 @@ contents: - "/etc/keepalived" resources: {} volumeMounts: - - name: resource-dir - mountPath: "/config" - name: kubeconfig - mountPath: "/etc/kubernetes/kubeconfig" + mountPath: "/etc/kubernetes" + - name: script-dir + mountPath: "/config" - name: conf-dir mountPath: "/etc/keepalived" imagePullPolicy: IfNotPresent @@ -57,14 +68,40 @@ contents: - name: NSS_SDB_USE_CACHE value: "no" command: - - /usr/sbin/keepalived - args: - - "-f" - - "/etc/keepalived/keepalived.conf" - - "--dont-fork" - - "--vrrp" - - "--log-detail" - - "--log-console" + - /bin/bash + - -c + - | + #/bin/bash + reload_keepalived() + { + if pid=$(pgrep -o keepalived); then + kill -s SIGHUP "$pid" + else + /usr/sbin/keepalived -f /etc/keepalived/keepalived.conf --dont-fork --vrrp --log-detail --log-console & + fi + } + + msg_handler() + { + while read -r line; do + echo "The client sent: $line" >&2 + # currently only 'reload' msg is supported + if [ "$line" = reload ]; then + reload_keepalived + fi + done + } + + set -ex + declare -r keepalived_sock="/var/run/keepalived/keepalived.sock" + export -f msg_handler + export -f reload_keepalived + if [ -s "/etc/keepalived/keepalived.conf" ]; then + /usr/sbin/keepalived -f /etc/keepalived/keepalived.conf --dont-fork --vrrp --log-detail --log-console & + fi + + rm -f "$keepalived_sock" + socat UNIX-LISTEN:${keepalived_sock},fork system:'bash -c msg_handler' resources: requests: cpu: 100m @@ -72,16 +109,56 @@ contents: volumeMounts: - name: conf-dir mountPath: "/etc/keepalived" + - name: run-dir + mountPath: "/var/run/keepalived" livenessProbe: exec: command: - - pgrep - - keepalived - initialDelaySeconds: 10 + - /bin/bash + - -c + - | + kill -s SIGUSR1 "$(pgrep -o keepalived)" && ! grep -q "State = FAULT" /tmp/keepalived.data + initialDelaySeconds: 20 terminationMessagePolicy: FallbackToLogsOnError imagePullPolicy: IfNotPresent + - name: keepalived-monitor + securityContext: + privileged: true + image: {{ .Images.baremetalRuntimeCfgImage }} + env: + - name: ENABLE_UNICAST + value: "no" + - name: IS_BOOTSTRAP + value: "no" + command: + - dynkeepalived + - "/var/lib/kubelet/kubeconfig" + - "/config/keepalived.conf.tmpl" + - "/etc/keepalived/keepalived.conf" + - "--api-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.APIServerInternalIP }}" + - "--dns-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.NodeDNSIP }}" + - "--ingress-vip" + - "{{ .Infra.Status.PlatformStatus.OpenStack.IngressIP }}" + resources: + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: resource-dir + mountPath: "/config" + - name: kubeconfigvarlib + mountPath: "/var/lib/kubelet" + - name: conf-dir + mountPath: "/etc/keepalived" + - name: run-dir + mountPath: "/var/run/keepalived" + - name: chroot-host + mountPath: "/host" + imagePullPolicy: IfNotPresent hostNetwork: true tolerations: - operator: Exists priorityClassName: system-node-critical - status: {} \ No newline at end of file + status: {} diff --git a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml index a793235f5e..1a0568925e 100644 --- a/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml +++ b/templates/master/00-master/openstack/files/openstack-haproxy-haproxy.yaml @@ -22,7 +22,7 @@ contents: listen health_check_http_url bind :::50936 v4v6 mode http - monitor-uri /readyz + monitor-uri /haproxy_ready option dontlognull listen stats bind localhost:{{`{{ .LBConfig.StatPort }}`}} @@ -37,5 +37,5 @@ contents: option log-health-checks balance roundrobin {{`{{- range .LBConfig.Backends }} - server {{ .Host }} {{ .Address }}:{{ .Port }} weight 1 verify none check check-ssl inter 3s fall 2 rise 3 + server {{ .Host }} {{ .Address }}:{{ .Port }} weight 1 verify none check check-ssl inter 1s fall 2 rise 3 {{- end }}`}} diff --git a/templates/master/00-master/openstack/files/openstack-haproxy.yaml b/templates/master/00-master/openstack/files/openstack-haproxy.yaml index 1f4761fce4..486571a708 100644 --- a/templates/master/00-master/openstack/files/openstack-haproxy.yaml +++ b/templates/master/00-master/openstack/files/openstack-haproxy.yaml @@ -99,7 +99,7 @@ contents: livenessProbe: initialDelaySeconds: 10 httpGet: - path: /readyz + path: /haproxy_ready port: 50936 terminationMessagePolicy: FallbackToLogsOnError imagePullPolicy: IfNotPresent diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml index 8a25a78c3f..aca355a777 100644 --- a/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml +++ b/templates/master/00-master/openstack/files/openstack-keepalived-keepalived.yaml @@ -3,15 +3,44 @@ mode: 0644 path: "/etc/kubernetes/static-pod-resources/keepalived/keepalived.conf.tmpl" contents: inline: | - vrrp_script chk_ocp { - script "/usr/bin/curl -o /dev/null -kLfs https://localhost:6443/readyz && /usr/bin/curl -o /dev/null -kLfs http://localhost:50936/readyz" - interval 1 - weight 50 + global_defs { + enable_script_security + script_user root } + vrrp_script chk_dns { script "/usr/bin/host -t SRV _etcd-server-ssl._tcp.{{ .EtcdDiscoveryDomain }} localhost" interval 1 - weight 50 + weight 20 + rise 3 + fall 2 + } + + # These are separate checks to provide the following behavior: + # If the loadbalanced endpoint is responding then all is well regardless + # of what the local api status is. Both checks will return success and + # we'll have the maximum priority. This means as long as there is a node + # with a functional loadbalancer it will get the VIP. + # If all of the loadbalancers go down but the local api is still running, + # the _both check will still succeed and allow any node with a functional + # api to take the VIP. This isn't preferred because it means all api + # traffic will go through one node, but at least it keeps the api available. + vrrp_script chk_ocp_lb { + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script.sh" + interval 2 + weight 20 + rise 3 + fall 2 + } + + vrrp_script chk_ocp_both { + script "/usr/bin/timeout 1.9 /etc/keepalived/chk_ocp_script_both.sh" + interval 2 + # Use a smaller weight for this check so it won't trigger the move from + # bootstrap to master by itself. + weight 5 + rise 3 + fall 2 } # TODO: Improve this check. The port is assumed to be alive. # Need to assess what is the ramification if the port is not there. @@ -34,7 +63,8 @@ contents: {{`{{ .Cluster.APIVIP }}`}}/{{`{{ .Cluster.VIPNetmask }}`}} } track_script { - chk_ocp + chk_ocp_lb + chk_ocp_both } } vrrp_instance {{`{{ .Cluster.Name }}`}}_DNS { diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml new file mode 100644 index 0000000000..3056ae97a9 --- /dev/null +++ b/templates/master/00-master/openstack/files/openstack-keepalived-script-both.yaml @@ -0,0 +1,7 @@ +filesystem: "root" +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script_both.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs https://localhost:{{`{{ .LBConfig.LbPort }}`}}/readyz && [ -e /var/run/keepalived/iptables-rule-exists ] || /usr/bin/curl -kLfs https://localhost:{{`{{ .LBConfig.ApiPort }}`}}/readyz diff --git a/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml b/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml new file mode 100644 index 0000000000..78707f4274 --- /dev/null +++ b/templates/master/00-master/openstack/files/openstack-keepalived-script.yaml @@ -0,0 +1,7 @@ +filesystem: "root" +mode: 0755 +path: "/etc/kubernetes/static-pod-resources/keepalived/scripts/chk_ocp_script.sh.tmpl" +contents: + inline: | + #!/bin/bash + /usr/bin/curl -o /dev/null -kLfs https://localhost:{{`{{ .LBConfig.LbPort }}`}}/readyz && [ -e /var/run/keepalived/iptables-rule-exists ]