Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,7 @@ Session.vim
!.vscode/extensions.json
.history

### GoLand files ###
.idea

# End of https://www.gitignore.io/api/go,vim,emacs,visualstudiocode
122 changes: 121 additions & 1 deletion bindata/network/ovn-kubernetes/ovnkube-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,11 @@ spec:
- /bin/bash
- -c
- |
set -x
MASTER_IP="{{.OVN_MASTER_IP}}"
if [[ "${K8S_NODE_IP}" == "${MASTER_IP}" ]]; then

# set the connection and disable inactivity probe
retries=0
while ! ovn-nbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_NB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe=60000; do
(( retries += 1 ))
Expand All @@ -161,6 +164,61 @@ spec:
sleep 2
done
fi

election_timer="${OVN_NB_RAFT_ELECTION_TIMER}"
echo "Setting nb-db raft election timer to ${election_timer} ms"
retries=0
while current_election_timer=$(ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound 2>/dev/null \
| grep -oP '(?<=Election timer:\s)[[:digit:]]+'); do
if [[ -z "${current_election_timer}" ]]; then
(( retries += 1 ))
if [[ "${retries}" -gt 10 ]]; then
echo "Failed to get current nb-db raft election timer value after multiple attempts. Exiting..."
exit 1
fi
sleep 2
else
break
fi
done

if [[ ${election_timer} -ne ${current_election_timer} ]]; then
retries=0
while is_candidate=$(ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound 2>/dev/null \
| grep "Role: candidate" ); do
if [[ ! -z "${is_candidate}" ]]; then
(( retries += 1 ))
if [[ "${retries}" -gt 10 ]]; then
echo "Cluster node (nb-db raft) is in candidate role for prolonged time. Continuing..."
fi
sleep 2
else
break
fi
done

is_leader=$(ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound 2>/dev/null \
| grep "Role: leader")
if [[ ! -z "${is_leader}" ]]; then
while [[ ${current_election_timer} != ${election_timer} ]]; do
max_election_timer=$((${current_election_timer} * 2))
if [[ ${election_timer} -le ${max_election_timer} ]]; then
if ! ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/change-election-timer OVN_Northbound ${election_timer}; then
echo "Failed to set nb-db raft election timer ${election_timer}. Exiting..."
exit 2
fi
current_election_timer=${election_timer}
else
if ! ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/change-election-timer OVN_Northbound ${max_election_timer}; then
echo "Failed to set nb-db raft election timer ${max_election_timer}. Exiting..."
exit 2
fi
current_election_timer=${max_election_timer}
fi
done
fi
fi

readinessProbe:
initialDelaySeconds: 30
exec:
Expand All @@ -173,6 +231,8 @@ spec:
env:
- name: OVN_LOG_LEVEL
value: info
- name: OVN_NB_RAFT_ELECTION_TIMER
value: "{{.OVN_NB_RAFT_ELECTION_TIMER}}"
- name: K8S_NODE_IP
valueFrom:
fieldRef:
Expand Down Expand Up @@ -212,7 +272,7 @@ spec:
- /bin/bash
- -c
- |
set -xe
set -x
if [[ -f /env/_master ]]; then
set -o allexport
source /env/_master
Expand Down Expand Up @@ -256,8 +316,11 @@ spec:
- /bin/bash
- -c
- |
set -x
MASTER_IP="{{.OVN_MASTER_IP}}"
if [[ "${K8S_NODE_IP}" == "${MASTER_IP}" ]]; then

# set the connection and disable inactivity probe
retries=0
while ! ovn-sbctl --no-leader-only -t 5 set-connection pssl:{{.OVN_SB_PORT}}{{.LISTEN_DUAL_STACK}} -- set connection . inactivity_probe=60000; do
(( retries += 1 ))
Expand All @@ -268,6 +331,61 @@ spec:
sleep 2
done
fi

election_timer="${OVN_SB_RAFT_ELECTION_TIMER}"
echo "Setting sb-db raft election timer to ${election_timer} ms"
retries=0
while current_election_timer=$(ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound 2>/dev/null \
| grep -oP '(?<=Election timer:\s)[[:digit:]]+'); do
if [[ -z "${current_election_timer}" ]]; then
(( retries += 1 ))
if [[ "${retries}" -gt 10 ]]; then
echo "Failed to get current sb-db raft election timer value after multiple attempts. Exiting..."
exit 1
fi
sleep 2
else
break
fi
done

if [[ ${election_timer} -ne ${current_election_timer} ]]; then
retries=0
while is_candidate=$(ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound 2>/dev/null \
| grep "Role: candidate" ); do
if [[ ! -z "${is_candidate}" ]]; then
(( retries += 1 ))
if [[ "${retries}" -gt 10 ]]; then
echo "Cluster node (sb-db raft) is in candidate role for prolonged time. Continuing..."
fi
sleep 2
else
break
fi
done

is_leader=$(ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound 2>/dev/null \
| grep "Role: leader")
if [[ ! -z "${is_leader}" ]]; then
while [[ ${current_election_timer} != ${election_timer} ]]; do
max_election_timer=$((${current_election_timer} * 2))
if [[ ${election_timer} -le ${max_election_timer} ]]; then
if ! ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/change-election-timer OVN_Southbound ${election_timer}; then
echo "Failed to set sb-db raft election timer ${election_timer}. Exiting..."
exit 2
fi
current_election_timer=${election_timer}
else
if ! ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/change-election-timer OVN_Southbound ${max_election_timer}; then
echo "Failed to set sb-db raft election timer ${max_election_timer}. Exiting..."
exit 2
fi
current_election_timer=${max_election_timer}
fi
done
fi
fi

readinessProbe:
initialDelaySeconds: 30
exec:
Expand All @@ -280,6 +398,8 @@ spec:
env:
- name: OVN_LOG_LEVEL
value: info
- name: OVN_SB_RAFT_ELECTION_TIMER
value: "{{.OVN_SB_RAFT_ELECTION_TIMER}}"
- name: K8S_NODE_IP
valueFrom:
fieldRef:
Expand Down
3 changes: 3 additions & 0 deletions bindata/network/ovn-kubernetes/ovnkube-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ spec:
--sb-client-cacert /ovn-ca/ca-bundle.crt \
--config-file=/run/ovnkube-config/ovnkube.conf \
--loglevel "${OVN_KUBE_LOG_LEVEL}" \
--inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}"
${hybrid_overlay_flags} \
--metrics-bind-address "0.0.0.0:9103"
env:
Expand All @@ -139,6 +140,8 @@ spec:
value: "{{.KUBERNETES_SERVICE_PORT}}"
- name: KUBERNETES_SERVICE_HOST
value: "{{.KUBERNETES_SERVICE_HOST}}"
- name: OVN_CONTROLLER_INACTIVITY_PROBE
value: "{{.OVN_CONTROLLER_INACTIVITY_PROBE}}"
- name: OVN_KUBE_LOG_LEVEL
value: "4"
- name: K8S_NODE
Expand Down
6 changes: 6 additions & 0 deletions manifests/0000_70_cluster-network-operator_03_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ spec:
value: "quay.io/openshift/origin-multus-route-override-cni:4.4"
- name: OVN_IMAGE
value: "quay.io/openshift/origin-ovn-kubernetes:4.3"
- name: OVN_NB_RAFT_ELECTION_TIMER
value: "5000"
- name: OVN_SB_RAFT_ELECTION_TIMER
value: "5000"
- name: OVN_CONTROLLER_INACTIVITY_PROBE
value: "30000"
- name: KURYR_DAEMON_IMAGE
value: "quay.io/openshift/origin-kuryr-cni:4.3"
- name: KURYR_CONTROLLER_IMAGE
Expand Down
3 changes: 3 additions & 0 deletions pkg/network/ovn_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
data.Data["OVN_SB_PORT"] = OVN_SB_PORT
data.Data["OVN_NB_RAFT_PORT"] = OVN_NB_RAFT_PORT
data.Data["OVN_SB_RAFT_PORT"] = OVN_SB_RAFT_PORT
data.Data["OVN_NB_RAFT_ELECTION_TIMER"] = os.Getenv("OVN_NB_RAFT_ELECTION_TIMER")
data.Data["OVN_SB_RAFT_ELECTION_TIMER"] = os.Getenv("OVN_SB_RAFT_ELECTION_TIMER")
data.Data["OVN_CONTROLLER_INACTIVITY_PROBE"] = os.Getenv("OVN_CONTROLLER_INACTIVITY_PROBE")
data.Data["OVN_NB_DB_LIST"] = dbList(bootstrapResult.OVN.MasterIPs, OVN_NB_PORT)
data.Data["OVN_SB_DB_LIST"] = dbList(bootstrapResult.OVN.MasterIPs, OVN_SB_PORT)
data.Data["OVN_MASTER_IP"] = bootstrapResult.OVN.MasterIPs[0]
Expand Down