diff --git a/add-db-to-osd.sh b/add-db-to-osd.sh index 669134d..3136e4c 100644 --- a/add-db-to-osd.sh +++ b/add-db-to-osd.sh @@ -1,15 +1,20 @@ #!/bin/bash # Brett Kelly Oct 2021 +# Anthony D'Atri 2025-04-15 # 45Drives -# Version 1.3 stable +# Version 1.4 stable usage() { # Help cat << EOF Usage: [-b] Block DB size. Required. Allowed suffixes K,M,G,T - [-d] Device to use as db. Required. Aliased Device name should be used /dev/X-Y - [-f] Bypass osd per db warning - [-o] OSDs to add db to. Required. Comma separated list of osd.id. <0,1,2,3> + [-d] Device to use for DB+WAL. Required. Aliased Device name should be used /dev/X-Y + [-f] Bypass OSD per DB warning + [-o] OSDs to which to add DB+WAL. Required. Comma separated list of osd.id. <0,1,2,3> + [-r] Number of OSDs to share a given WAL+DB offload device, default is 5, which is + appropriate for SAS/SATA SSD offload devices. A value of 10 is usually reasonable + for NVMe offload devices, but note that this number of OSDs will fail when the + offload device fails. [-h] Displays this message EOF exit 0 @@ -36,25 +41,25 @@ add_lv_tags(){ lvchange --addtag "ceph.type=db" $DB_LV_DEVICE } -check_dependancies(){ - for i in "${!SCRIPT_DEPENDANCIES[@]}"; do - if ! command -v ${SCRIPT_DEPENDANCIES[i]} >/dev/null 2>&1;then - echo "cli utility: ${SCRIPT_DEPENDANCIES[i]} is not installed" - echo "jq, and bc are required" +check_dependencies(){ + for i in "${!SCRIPT_DEPENDENCIES[@]}"; do + if ! command -v ${SCRIPT_DEPENDENCIES[i]} >/dev/null 2>&1;then + echo "The required utility: ${SCRIPT_DEPENDENCIES[i]} is not installed" + echo "The jq and bc utilities are required" exit 1 fi done } -# if encountering any error quit, so to not make a mess +# Quit if we encounter any error, so to not make anything even worse set -e -SCRIPT_DEPENDANCIES=(bc jq) +SCRIPT_DEPENDENCIES=(bc jq) FORCE="false" PHYSICAL_EXTENT_SIZE_BYTES=4194304 OSD_PER_DB_LIMIT=5 -while getopts 'b:fo:d:h' OPTION; do +while getopts 'b:fo:d:hr:' OPTION; do case ${OPTION} in b) BLOCK_DB_SIZE=${OPTARG} @@ -74,6 +79,12 @@ while getopts 'b:fo:d:h' OPTION; do OSD_LIST_=${OPTARG} IFS=',' read -r -a OSD_LIST <<< "$OSD_LIST_" ;; + r) + OSD_PER_DB_LIMIT=${OPTARG} + case $OSD_PER_DB_LIMIT in + ''|*[!0-9]*) echo "OSDs per DB device ratio must be an integer" ; exit 1 ;; + esac + ;; h) usage ;; @@ -86,10 +97,8 @@ if [ -z $OSD_LIST ] || [ -z $DB_DEVICE ] || [ -z $BLOCK_DB_SIZE_BYTES ]; then exit 1 fi -# If the db device given is a linux sd device then warn if you want to continue - -# Check cli depandancies -check_dependancies +# Check CLI depandencies +check_dependencies BLOCK_DB_SIZE_EXTENTS=$(bc <<< "$BLOCK_DB_SIZE_BYTES/$PHYSICAL_EXTENT_SIZE_BYTES") OSD_COUNT="${#OSD_LIST[@]}" @@ -101,44 +110,44 @@ DB_DEVICE_SIZE_BYTES=$(blockdev --getsize64 $DB_DEVICE) # check with wipefs that device has LVM data present DB_DEVICE_SIGNATURE=$(wipefs "$DB_DEVICE" --json | jq -r '.signatures | .[0].type // empty') # If this is empty the disk is assumed new. -# If this is LVM2_member the disk is assumed to already have a db lv present it +# If this is LVM2_member the disk is assumed to already have a DB LV present it # If anything else the disk is assumed to have something else on it and should be wiped. Quit with warning -if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ];then +if [ -z "$LVM_JSON_DEVICE" ] || [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ]; then : else - echo "Disk is not empty nor a LVM device, wipe device first and run again" + echo "Device is neither empty nor an LV device. Wipe the device and run again" exit 1 fi -# Get PVS info for the specific disk we want +# Get PV info for the specific disk we want LVM_JSON=$(pvs --units B --nosuffix -o name,vg_name,lv_name,lv_count,lvsize,vg_free --reportformat json ) LVM_JSON_DEVICE=$(echo $LVM_JSON | jq --arg disk "$DB_DEVICE" '.[] |.[].pv | .[] | select(.pv_name==$disk)') -# Check we are using the correct device name +# Ensure that we are using the correct device # if DB_DEVICE_SIGNATURE is LVM2_member and LVM_JSON_DEVICE is empty, then the wrong disk name was used (sd name instead of alias). Quit with warning if [ "$DB_DEVICE_SIGNATURE" == "LVM2_member" ] && [ -z "$LVM_JSON_DEVICE" ];then - echo "WARNING: device selected ($DB_DEVICE) has a LVM signature, but could not get LVM info." - echo "Wrong disk name was most likely provided, use the device alias name instead of the linux device name" + echo "WARNING: device selected ($DB_DEVICE) has an LVM signature, but could not get LVM info." + echo "Wrong device name was most likely provided, use the device alias name instead of the Linux device name" exit 1 fi -# are we using an exitsing db device or a new device, if LVM_JSON_DEVICE is empty, and DB_DEVICE_SIGNATURE is empty we have a new disk +# Are we using an existing DB device or a new device? if LVM_JSON_DEVICE is empty and DB_DEVICE_SIGNATURE is empty we have an empty device if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then DB_VG_NAME="ceph-$(uuidgen)" else - # if not how do we get db_VG ? inspect from device given + # If not how do we get db_VG ? Derive from device given DB_VG_NAME="$(echo $LVM_JSON_DEVICE | jq -r '.vg_name' | awk 'NR==1')" - # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe disk and run again + # If there is no DB Volume group quit with warning. The disk has a LVM2_memebr signature but no volume group. Wipe device and run again. if [ -z $DB_VG_NAME ];then - echo "WARNING: Device selected ($DB_DEVICE) has a LVM2_member signature, but no volume group" - echo "Wipe disk and run again" + echo "WARNING: Device selected ($DB_DEVICE) has an LVM2_member signature, but no volume group" + echo "Wipe the device and run again" exit 1 fi - # Count how many lv dbs are present, add that to input osds and compare to OSD_LIMIT + # Count how many LV DBs are present, add that to input OSDs and compare to OSD_LIMIT EXISTING_DB_COUNT=$(echo $LVM_JSON_DEVICE | jq -r '.lv_count' | awk 'NR==1') - echo "WARNING: device currently has $EXISTING_DB_COUNT db's present" + echo "WARNING: device currently has $EXISTING_DB_COUNT dbs present" OSD_COUNT=$(bc <<< "${#OSD_LIST[@]}+$EXISTING_DB_COUNT") - # set db total device size to the amount of free Bytes in the volume group + # set DB total device size to the amount of free Bytes in the volume group DB_DEVICE_DISK_SIZE_BYTES=$(echo $LVM_JSON_DEVICE | jq -r '.vg_free' | awk 'NR==1') fi @@ -151,16 +160,17 @@ if [ "$FORCE" == "false" ] ; then fi fi -# Check if total size of db's to be created will fit on db device +# Check if total size of DBs to be created will fit on DB device if [ "$TOTAL_DB_SIZE_BYTES" -gt "$DB_DEVICE_SIZE_BYTES" ] ; then echo "Warning: total size of db will not fit on device $DB_DEVICE" exit 1 fi -# Check each osd to see if it present on host -# Check each osd to see if it already has db device -# Check current bluestore db size and compare to chosen db size +# Check each OSD to see if it present on host +# Check each OSD to see if it already has a DB device +# Check current BlueStore DB size and compare to supplied DB size # Gather ceph-volume output before entering loop as it takes a while to run + CEPH_VOLUME_JSON=$(ceph-volume lvm list --format json) for i in "${!OSD_LIST[@]}"; do OSD_ID=${OSD_LIST[i]} @@ -171,7 +181,7 @@ for i in "${!OSD_LIST[@]}"; do fi DB_CHECK=$(echo $OSD_JSON | jq 'select(.tags["ceph.db_device"])'); if [ ! -z "$DB_CHECK" ]; then - echo "Warning: osd.$OSD_ID already has a db device attached" + echo "Warning: osd.$OSD_ID already has a DB device attached" exit 1 fi CURRENT_BLOCK_DB_USED_BYTES=$(ceph daemon osd.$OSD_ID perf dump | jq '.bluefs | .db_used_bytes') @@ -181,9 +191,10 @@ for i in "${!OSD_LIST[@]}"; do fi done -# Make sure ceph admin keyring is present hs correct permission +# Make sure the admin keyring is present with correct permissions # Remove "set -e" so we can check ceph status error code -# Then turn it back on after +# Then turn it back on + set +e ceph status > /dev/null 2>&1 ; rc=$? set -e @@ -192,8 +203,7 @@ if [[ "$rc" -ne 0 ]];then exit 1 fi -# If we got this far then all checked are passed -# Start migration process +# If we got this far then all checks passed, so start the migration process if [ -z "$LVM_JSON_DEVICE" ] && [ -z "$DB_DEVICE_SIGNATURE" ];then pvcreate $DB_DEVICE @@ -214,7 +224,8 @@ for i in "${!OSD_LIST[@]}"; do chown -h ceph:ceph $DB_LV_DEVICE chown -R ceph:ceph $(realpath $DB_LV_DEVICE) - # Call ceph health check function dont continue unless cluster healthy + # Don't continue unless the cluster is healthy + CEPH_STATUS=$(ceph health --format json | jq -r '.status') while [ "$CEPH_STATUS" != "HEALTH_OK" ]; do echo "Warning: Cluster is not in HEALTH_OK state" @@ -222,17 +233,21 @@ for i in "${!OSD_LIST[@]}"; do CEPH_STATUS=$(ceph health --format json | jq -r '.status') done + OK_TO_STOP=$(ceph osd ok-to-stop $OSD_ID) + if [ $OK_TOP_STOP -ne 0 ]; + echo "Error: stopping osd.$OSD_ID would result in data unavailability" + exit 1 + fi + echo "Set noout" ceph osd set noout echo "Stop OSD.$OSD_ID" systemctl stop ceph-osd@$OSD_ID - echo "Flush OSD Journal" - ceph-osd -i $OSD_ID --flush-journal - echo "Create new db" + echo "Create new DB" CEPH_ARGS="--bluestore-block-db-size $BLOCK_DB_SIZE_BYTES" ceph-bluestore-tool bluefs-bdev-new-db --path /var/lib/ceph/osd/ceph-$OSD_ID/ --dev-target $DB_LV_DEVICE - echo "Migrate old db to new db" + echo "Migrate old DB to new DB" ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-$OSD_ID/ --devs-source /var/lib/ceph/osd/ceph-$OSD_ID/block --dev-target /var/lib/ceph/osd/ceph-$OSD_ID/block.db - echo "Update LV tags on block and db" + echo "Update LV tags on block and DB devices" add_lv_tags echo "unmount OSD.$OSD_ID" umount /var/lib/ceph/osd/ceph-$OSD_ID/ @@ -240,7 +255,7 @@ for i in "${!OSD_LIST[@]}"; do ceph-volume lvm activate $OSD_ID $OSD_FSID echo "Unset noout" ceph osd unset noout - echo "Verify osd is back up before continuing" + echo "Verify OSD is up before continuing" OSD_STATE=$(ceph osd tree --format json | jq --arg id "$OSD_ID" -r '.nodes[] | select(.id == ($id |tonumber)) | .status') echo "OSD_STATE: $OSD_STATE" while [ "$OSD_STATE" != "up" ]; do