Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 62 additions & 47 deletions add-db-to-osd.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
#!/bin/bash
# Brett Kelly Oct 2021
# Anthony D'Atri 2025-04-15
# 45Drives
# Version 1.3 stable
# Version 1.4 stable

usage() { # Help
cat << EOF
Usage:
[-b] Block DB size. Required. Allowed suffixes K,M,G,T
[-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y
[-f] Bypass osd per db warning
[-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3>
[-d] Device to use for DB+WAL. Required. Aliased Device name should be used /dev/X-Y
[-f] Bypass OSD per DB warning
[-o] OSDs to which to add DB+WAL. Required. Comma separated list of osd.id. <0,1,2,3>
[-r] Number of OSDs to share a given WAL+DB offload device, default is 5, which is
appropriate for SAS/SATA SSD offload devices. A value of 10 is usually reasonable
for NVMe offload devices, but note that this number of OSDs will fail when the
offload device fails.
[-h] Displays this message
EOF
exit 0
Expand All @@ -36,25 +41,25 @@ add_lv_tags(){
lvchange --addtag "ceph.type=db" $DB_LV_DEVICE
}

check_dependancies(){
for i in "${!SCRIPT_DEPENDANCIES[@]}"; do
if ! command -v ${SCRIPT_DEPENDANCIES[i]} >/dev/null 2>&1;then
echo "cli utility: ${SCRIPT_DEPENDANCIES[i]} is not installed"
echo "jq, and bc are required"
check_dependencies(){
for i in "${!SCRIPT_DEPENDENCIES[@]}"; do
if ! command -v ${SCRIPT_DEPENDENCIES[i]} >/dev/null 2>&1;then
echo "The required utility: ${SCRIPT_DEPENDENCIES[i]} is not installed"
echo "The jq and bc utilities are required"
exit 1
fi
done
}

# if encountering any error quit, so to not make a mess
# Quit if we encounter any error, so to not make anything even worse
set -e

SCRIPT_DEPENDANCIES=(bc jq)
SCRIPT_DEPENDENCIES=(bc jq)
FORCE="false"
PHYSICAL_EXTENT_SIZE_BYTES=4194304
OSD_PER_DB_LIMIT=5

while getopts 'b:fo:d:h' OPTION; do
while getopts 'b:fo:d:hr:' OPTION; do
case ${OPTION} in
b)
BLOCK_DB_SIZE=${OPTARG}
Expand All @@ -74,6 +79,12 @@ while getopts 'b:fo:d:h' OPTION; do
OSD_LIST_=${OPTARG}
IFS=',' read -r -a OSD_LIST <<< "$OSD_LIST_"
;;
r)
OSD_PER_DB_LIMIT=${OPTARG}
case $OSD_PER_DB_LIMIT in
''|*[!0-9]*) echo "OSDs per DB device ratio must be an integer" ; exit 1 ;;
esac
;;
h)
usage
;;
Expand All @@ -86,10 +97,8 @@ if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then
exit 1
fi

# If the db device given is a linux sd device then warn if you want to continue

# Check cli depandancies
check_dependancies
# Check CLI depandencies
check_dependencies

BLOCK_DB_SIZE_EXTENTS=$(bc <<< "$BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES")
OSD_COUNT="${#OSD_LIST[@]}"
Expand All @@ -101,44 +110,44 @@ DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE)
# check with wipefs that device has LVM data present
DB_DEVICE_SIGNATURE=$(wipefs "$DB_DEVICE" --json | jq -r '.signatures | .[0].type // empty')
# If this is empty the disk is assumed new.
# If this is LVM2_member the disk is assumed to already have a db lv present it
# If this is LVM2_member the disk is assumed to already have a DB LV present it
# If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning
if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ];then
if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ]; then
:
else
echo "Disk is not empty nor a LVM device, wipe device first and run again"
echo "Device is neither empty nor an LV device. Wipe the device and run again"
exit 1
fi

# Get PVS info for the specific disk we want
# Get PV info for the specific disk we want
LVM_JSON=$(pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json )
LVM_JSON_DEVICE=$(echo $LVM_JSON | jq --arg disk "$DB_DEVICE" '.[] |.[].pv | .[] | select(.pv_name==$disk)')

# Check we are using the correct device name
# Ensure that we are using the correct device
# if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning
if [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ] && [ -z "$LVM_JSON_DEVICE" ];then
echo "WARNING: device selected ($DB_DEVICE) has a LVM signature, but could not get LVM info."
echo "Wrong disk name was most likely provided, use the device alias name instead of the linux device name"
echo "WARNING: device selected ($DB_DEVICE) has an LVM signature, but could not get LVM info."
echo "Wrong device name was most likely provided, use the device alias name instead of the Linux device name"
exit 1
fi

# are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk
# Are we using an existing DB device or a new device? if LVM_JSON_DEVICE is empty and DB_DEVICE_SIGNATURE is empty we have an empty device
if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
DB_VG_NAME="ceph-$(uuidgen)"
else
# if not how do we get db_VG ? inspect from device given
# If not how do we get db_VG ? Derive from device given
DB_VG_NAME="$(echo $LVM_JSON_DEVICE | jq -r '.vg_name' | awk 'NR==1')"
# If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again
# If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe device and run again.
if [ -z $DB_VG_NAME ];then
echo "WARNING: Device selected ($DB_DEVICE) has a LVM2_member signature, but no volume group"
echo "Wipe disk and run again"
echo "WARNING: Device selected ($DB_DEVICE) has an LVM2_member signature, but no volume group"
echo "Wipe the device and run again"
exit 1
fi
# Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT
# Count how many LV DBs are present, add that to input OSDs and compare to OSD_LIMIT
EXISTING_DB_COUNT=$(echo $LVM_JSON_DEVICE | jq -r '.lv_count' | awk 'NR==1')
echo "WARNING: device currently has $EXISTING_DB_COUNT db's present"
echo "WARNING: device currently has $EXISTING_DB_COUNT dbs present"
OSD_COUNT=$(bc <<< "${#OSD_LIST[@]}+$EXISTING_DB_COUNT")
# set db total device size to the amount of free Bytes in the volume group
# set DB total device size to the amount of free Bytes in the volume group
DB_DEVICE_DISK_SIZE_BYTES=$(echo $LVM_JSON_DEVICE | jq -r '.vg_free' | awk 'NR==1')
fi

Expand All @@ -151,16 +160,17 @@ if [ "$FORCE" == "false" ] ; then
fi
fi

# Check if total size of db's to be created will fit on db device
# Check if total size of DBs to be created will fit on DB device
if [ "$TOTAL_DB_SIZE_BYTES" -gt "$DB_DEVICE_SIZE_BYTES" ] ; then
echo "Warning: total size of db will not fit on device $DB_DEVICE"
exit 1
fi

# Check each osd to see if it present on host
# Check each osd to see if it already has db device
# Check current bluestore db size and compare to chosen db size
# Check each OSD to see if it present on host
# Check each OSD to see if it already has a DB device
# Check current BlueStore DB size and compare to supplied DB size
# Gather ceph-volume output before entering loop as it takes a while to run

CEPH_VOLUME_JSON=$(ceph-volume lvm list --format json)
for i in "${!OSD_LIST[@]}"; do
OSD_ID=${OSD_LIST[i]}
Expand All @@ -171,7 +181,7 @@ for i in "${!OSD_LIST[@]}"; do
fi
DB_CHECK=$(echo $OSD_JSON | jq 'select(.tags["ceph.db_device"])');
if [ ! -z "$DB_CHECK" ]; then
echo "Warning: osd.$OSD_ID already has a db device attached"
echo "Warning: osd.$OSD_ID already has a DB device attached"
exit 1
fi
CURRENT_BLOCK_DB_USED_BYTES=$(ceph daemon osd.$OSD_ID perf dump | jq '.bluefs | .db_used_bytes')
Expand All @@ -181,9 +191,10 @@ for i in "${!OSD_LIST[@]}"; do
fi
done

# Make sure ceph admin keyring is present hs correct permission
# Make sure the admin keyring is present with correct permissions
# Remove "set -e" so we can check ceph status error code
# Then turn it back on after
# Then turn it back on

set +e
ceph status > /dev/null 2>&1 ; rc=$?
set -e
Expand All @@ -192,8 +203,7 @@ if [[ "$rc" -ne 0 ]];then
exit 1
fi

# If we got this far then all checked are passed
# Start migration process
# If we got this far then all checks passed, so start the migration process

if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then
pvcreate $DB_DEVICE
Expand All @@ -214,33 +224,38 @@ for i in "${!OSD_LIST[@]}"; do
chown -h ceph:ceph $DB_LV_DEVICE
chown -R ceph:ceph $(realpath $DB_LV_DEVICE)

# Call ceph health check function dont continue unless cluster healthy
# Don't continue unless the cluster is healthy

CEPH_STATUS=$(ceph health --format json | jq -r '.status')
while [ "$CEPH_STATUS" != "HEALTH_OK" ]; do
echo "Warning: Cluster is not in HEALTH_OK state"
sleep 2
CEPH_STATUS=$(ceph health --format json | jq -r '.status')
done

OK_TO_STOP=$(ceph osd ok-to-stop $OSD_ID)
if [ $OK_TOP_STOP -ne 0 ];
echo "Error: stopping osd.$OSD_ID would result in data unavailability"
exit 1
fi

echo "Set noout"
ceph osd set noout
echo "Stop OSD.$OSD_ID"
systemctl stop ceph-osd@$OSD_ID
echo "Flush OSD Journal"
ceph-osd -i $OSD_ID --flush-journal
echo "Create new db"
echo "Create new DB"
CEPH_ARGS="--bluestore-block-db-size $BLOCK_DB_SIZE_BYTES" ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID/ --dev-target $DB_LV_DEVICE
echo "Migrate old db to new db"
echo "Migrate old DB to new DB"
ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID/ --devs-source /var/lib/ceph/osd/ceph-$OSD_ID/block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID/block.db
echo "Update LV tags on block and db"
echo "Update LV tags on block and DB devices"
add_lv_tags
echo "unmount OSD.$OSD_ID"
umount /var/lib/ceph/osd/ceph-$OSD_ID/
echo "Activate OSD.$OSD_ID"
ceph-volume lvm activate $OSD_ID $OSD_FSID
echo "Unset noout"
ceph osd unset noout
echo "Verify osd is back up before continuing"
echo "Verify OSD is up before continuing"
OSD_STATE=$(ceph osd tree --format json | jq --arg id "$OSD_ID" -r '.nodes[] | select(.id == ($id |tonumber)) | .status')
echo "OSD_STATE: $OSD_STATE"
while [ "$OSD_STATE" != "up" ]; do
Expand Down