Built using:
513dfa8
hashicorp/raft@bfc1fc9
I wrote a small bootstrap script for bringing up new clusters. The simple version of what it does is copy the config, remove the "start_join" key and add the "bootstrap": true key. It will then watch the member list for members to join. When it sees them join it will restart consul. Restart is basically kill -TERM $PID.
The issue I'm seeing is that when a second and third node join the cluster and the bootstrap server restarts I'm still seeing the 'bootstrap' flag listed on the second and third server. This is despite the fact that the consul log on consul1 states that bootstrap is false.
If I stop the server, wait and then start the server it will correctly clear the 'bootstrap=1' flag.
Is this purely cosmetic or do other nodes view of the bootstrap flag influence their behavior? Please let me know what I can do to assist.
vagrant@consul1:~$ consul members
consul1 10.0.99.15:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
consul3 10.0.99.35:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
consul2 10.0.99.25:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
vagrant@consul1:~$ ps aux | grep consul | grep -v grep
nobody 3168 0.0 0.1 7388 712 pts/0 S 02:52 0:00 logger -p local3.info -t consul
nobody 3169 3.5 2.5 1130860 9684 pts/0 Sl 02:52 0:16 /usr/bin/consul agent -config-dir /etc/consul.d/
vagrant@consul1:~$ cat /etc/consul.d/main.json
{
"bind_addr": "10.0.99.15",
"data_dir": "/var/lib/consul",
"bootstrap": false,
"server": true,
"datacenter": "vagrant",
"start_join": [
"10.0.99.15",
"10.0.99.25",
"10.0.99.35"
]
}
vagrant@consul2:~$ consul members
consul2 10.0.99.25:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
consul1 10.0.99.15:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1
consul3 10.0.99.35:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
vagrant@consul2:~$ ps aux | grep consul | grep -v grep
nobody 2401 0.0 0.1 7388 708 pts/0 S 02:52 0:00 logger -p local3.info -t consul
nobody 2402 5.9 2.8 1130988 10848 pts/0 Sl 02:52 0:29 /usr/bin/consul agent -config-dir /etc/consul.d/
vagrant@consul2:~$ cat /etc/consul.d/main.json
{
"bind_addr": "10.0.99.25",
"data_dir": "/var/lib/consul",
"bootstrap": false,
"server": true,
"datacenter": "vagrant",
"start_join": [
"10.0.99.15",
"10.0.99.25",
"10.0.99.35"
]
}
vagrant@consul3:~$ consul members
consul3 10.0.99.35:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
consul1 10.0.99.15:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300,bootstrap=1
consul2 10.0.99.25:8301 alive role=consul,dc=vagrant,vsn=1,vsn_min=1,vsn_max=1,port=8300
vagrant@consul3:~$ ps aux | grep consul | grep -v grep
nobody 2416 0.0 0.1 7388 704 pts/0 S 02:52 0:00 logger -p local3.info -t consul
nobody 2417 3.5 2.6 1130860 9920 pts/0 Sl 02:52 0:17 /usr/bin/consul agent -config-dir /etc/consul.d/
vagrant@consul3:~$ cat /etc/consul.d/main.json
{
"bind_addr": "10.0.99.35",
"data_dir": "/var/lib/consul",
"bootstrap": false,
"server": true,
"datacenter": "vagrant",
"start_join": [
"10.0.99.15",
"10.0.99.25",
"10.0.99.35"
]
}
For reference the bootstrap script (parts of it are making massive assumptions about our infrastructure):
#!/usr/bin/env python
import os
from os import path
import sys
import json
import subprocess
import time
devnull = open(os.devnull, 'w')
if len(sys.argv) != 2:
print >>sys.stderr, "No config directory specified"
sys.exit(1)
config_dir = sys.argv[1]
# Check if consul is already running/bootstrapped
if os.system("consul members 1>/dev/null 2>&1") == 0:
sys.exit(0)
with open(os.path.join(config_dir, "main.json")) as f:
data = json.load(f)
if "start_join" in data:
del(data["start_join"])
bootstrap_config="/tmp/consul_bootstrap.json"
with open(bootstrap_config, "w") as f:
json.dump(data, f)
print "Starting consul"
p = subprocess.Popen(["/usr/bin/setuidgid", "nobody", "/usr/bin/consul", "agent", "-config-file", bootstrap_config, "-bootstrap"], stdout=devnull, stderr=devnull)
try:
while True:
print "Checking bootstrap status"
try:
res = subprocess.check_output(["/usr/bin/consul", "members", "-role", "consul"])
line_count = len(res.rstrip().splitlines())
if line_count >= 2:
print res.rstrip()
# Give the cluster a chance to settle before ripping the leader out.
time.sleep(2)
break
except subprocess.CalledProcessError:
pass
time.sleep(1)
except Exception, e:
print e
p.kill()
p.kill()
subprocess.check_call(["/etc/init.d/consul", "start"])
Vagrant file:
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
$script = <<EOF
#!/bin/sh
set -x
echo "# jtv\ndeb [arch=amd64] http://mirrors.internal.justin.tv/jtv precise main" > /etc/apt/sources.list.d/jtv.list
wget -qO - http://mirrors.internal.justin.tv/jtv/jtv.asc | sudo apt-key add -
sudo apt-get update
sudo apt-get install -y consul daemontools vim jq
sudo mkdir -p /var/run/consul
sudo chown nobody:nogroup /var/run/consul
sudo mkdir -p /var/lib/consul
sudo chown nobody:nogroup /var/lib/consul
sudo mkdir -p /etc/consul.d
ip=`ifconfig eth1 | grep "inet addr" | awk '{print $2 }' | cut -d':' -f2`
tee /etc/consul.d/main.json <<EOL
{
"bind_addr": "$ip",
"data_dir": "/var/lib/consul",
"bootstrap": false,
"server": true,
"datacenter": "vagrant",
"start_join": [
"10.0.99.15",
"10.0.99.25",
"10.0.99.35"
]
}
EOL
EOF
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.vm.provision :shell, :inline => $script
config.vm.box = "precise64"
config.vm.box_url = "http://files.vagrantup.com/precise64.box"
config.vm.define "consul1" do |consul1|
consul1.vm.hostname = "consul1"
consul1.vm.network "private_network", ip: "10.0.99.15", virtualbox__intnet: "consul"
end
config.vm.define "consul2" do |consul2|
consul2.vm.hostname = "consul2"
consul2.vm.network "private_network", ip: "10.0.99.25", virtualbox__intnet: "consul"
end
config.vm.define "consul3" do |consul3|
consul3.vm.hostname = "consul3"
consul3.vm.network "private_network", ip: "10.0.99.35", virtualbox__intnet: "consul"
end
end
Logs:
Built using:
513dfa8
hashicorp/raft@bfc1fc9
I wrote a small bootstrap script for bringing up new clusters. The simple version of what it does is copy the config, remove the "start_join" key and add the
"bootstrap": truekey. It will then watch the member list for members to join. When it sees them join it will restart consul. Restart is basicallykill -TERM $PID.The issue I'm seeing is that when a second and third node join the cluster and the bootstrap server restarts I'm still seeing the 'bootstrap' flag listed on the second and third server. This is despite the fact that the consul log on consul1 states that bootstrap is false.
If I stop the server, wait and then start the server it will correctly clear the 'bootstrap=1' flag.
Is this purely cosmetic or do other nodes view of the bootstrap flag influence their behavior? Please let me know what I can do to assist.
For reference the bootstrap script (parts of it are making massive assumptions about our infrastructure):
Vagrant file:
Logs: