From 4e99497c5b5848de38e41bb9b5a4bbbb4ba86356 Mon Sep 17 00:00:00 2001 From: Gancho Tenev <10522628+gtenev@users.noreply.github.com> Date: Tue, 14 Jul 2020 12:40:12 -0700 Subject: [PATCH] Fix volume/stripe calcs when using forced volumes (#6995) Fixed problems with initialization of cache volumes when at least one volume is being forced to a specific "exclusive" span. Problem description: ==================== Disks are cleared in the following configuration where volume sizes are specified using percentages and also one of the volumes is forced to a specific span (disk): storage.config: /dev/disk1 /dev/disk2 volume=3 # <- exclusive span forced to a specific volume volume.config: volume=1 scheme=http size=50% volume=2 scheme=http size=50% volume=3 scheme=http size=512 # <- volume forced to an exclusive span During the first start ATS identifies the clears disks and does the following: 1. creates and spreads new volume 1 and 2 blocks across disk1 and disk2 2. deletes all volume 1 and 2 blocks from disk2 to make space for volume 3 3. creates new volume 3 that takes over the whole disk2. In step (1) volumes are caclulated larger and spread to disk2 only to be deleted in step (2) to make space for the forced volume 3. During the initial start the global volume list cp_list would end up containing "zombie" CacheVol instances which corespond to the volume 1 and 2 blocks deleted from disk2 to make space for the volume 3 and the mapping of domains to volumes (hosting.config) could end up mapping to any of the deleted volume blocks. This problem disappears after restart since cp_list will be initialized from the disks and cp_list will contain only valid CacheVol instances. The fix: ======== This fix prevents this from happening by making sure all volumes meant to have "exclusive" disks are created first to make sure span free spaces are updated correctly and by excluding the size of the "exclusive" disks from the total cache size used for volume size calculations when sizes are specified in percentages (volume.config). (cherry picked from commit 17ee97aa90191767c25a485958965cd76f0e5013) --- doc/admin-guide/files/volume.config.en.rst | 28 ++++++++ iocore/cache/Cache.cc | 74 +++++++++++++++++----- 2 files changed, 86 insertions(+), 16 deletions(-) diff --git a/doc/admin-guide/files/volume.config.en.rst b/doc/admin-guide/files/volume.config.en.rst index ebae6d4e12a..a05a4b63ef4 100644 --- a/doc/admin-guide/files/volume.config.en.rst +++ b/doc/admin-guide/files/volume.config.en.rst @@ -52,6 +52,34 @@ do not allocate all the disk space in the cache, then the extra disk space is not used. You can use the extra space later to create new volumes without deleting and clearing the existing volumes. + +Exclusive spans and volume sizes +================================ + +In the following sample configuration 2 spans `/dev/disk1` and `/dev/disk2` are defined +in :file:`storage.config`, where span `/dev/disk2` is assigned to `volume 3` exclusively +(`volume 3` is forced to an "exclusive" span `/dev/disk2`). +In :file:`volume.config` there are 3 volumes defined, where `volume 1` and `volume 2` +occupy span `/dev/disk1` taking each 50% of its space and `volume 3` takes 100% of span +`/dev/disk2` exclusively. + +storage.config:: + + /dev/disk1 + /dev/disk2 volume=3 # <- exclusinve span + +volume.config:: + + volume=1 scheme=http size=50% + volume=2 scheme=http size=50% + volume=3 scheme=http size=512 # <- volume forced to a specific exclusive span + +It is important to note that when percentages are used to specify volume sizes +and "exclusive" spans are assigned (forced) to a particular volume (in this case `volume 3`), +the "exclusive" spans (in this case `/dev/disk2`) are excluded from the total cache +space when the "non-forced" volumes sizes are calculated (in this case `volume 1` and `volume 2`). + + Examples ======== diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index 5ef523e11f9..d1f2a3eb7ae 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -2654,6 +2654,8 @@ cplist_init() } } +static int fillExclusiveDisks(CacheVol *cp); + void cplist_update() { @@ -2709,6 +2711,37 @@ cplist_update() cp = cp->link.next; } } + + // Look for (exclusive) spans forced to a specific volume but not yet referenced by any volumes in cp_list, + // if found then create a new volume. This also makes sure new exclusive disk volumes are created first + // before any other new volumes to assure proper span free space calculation and proper volume block distribution. + for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { + if (nullptr == config_vol->cachep) { + // Find out if this is a forced volume assigned exclusively to a span which was cleared (hence not referenced in cp_list). + // Note: non-exclusive cleared spans are not handled here, only the "exclusive" + for (int d_no = 0; d_no < gndisks; d_no++) { + if (gdisks[d_no]->forced_volume_num == config_vol->number) { + CacheVol *new_cp = new CacheVol(); + if (nullptr != new_cp) { + new_cp->disk_vols = (DiskVol **)ats_malloc(gndisks * sizeof(DiskVol *)); + if (nullptr != new_cp->disk_vols) { + memset(new_cp->disk_vols, 0, gndisks * sizeof(DiskVol *)); + new_cp->vol_number = config_vol->number; + new_cp->scheme = config_vol->scheme; + config_vol->cachep = new_cp; + fillExclusiveDisks(config_vol->cachep); + cp_list.enqueue(new_cp); + } else { + delete new_cp; + } + } + } + } + } else { + // Fill if this is exclusive disk. + fillExclusiveDisks(config_vol->cachep); + } + } } static int @@ -2722,20 +2755,31 @@ fillExclusiveDisks(CacheVol *cp) if (gdisks[i]->forced_volume_num != volume_number) { continue; } - /* The user had created several volumes before - clear the disk - and create one volume for http */ + + /* OK, this should be an "exclusive" disk (span). */ + diskCount++; + + /* There should be a single "forced" volume and no other volumes should exist on this "exclusive" disk (span) */ + bool found_nonforced_volumes = false; for (int j = 0; j < (int)gdisks[i]->header->num_volumes; j++) { if (volume_number != gdisks[i]->disk_vols[j]->vol_number) { - Note("Clearing Disk: %s", gdisks[i]->path); - gdisks[i]->delete_all_volumes(); + found_nonforced_volumes = true; break; } } - diskCount++; + if (found_nonforced_volumes) { + /* The user had created several volumes before - clear the disk and create one volume for http */ + Note("Clearing Disk: %s", gdisks[i]->path); + gdisks[i]->delete_all_volumes(); + } else if (1 == gdisks[i]->header->num_volumes) { + /* "Forced" volumes take the whole disk (span) hence nothing more to do for this span. */ + continue; + } + + /* Now, volumes have been either deleted or did not existing to begin with so we need to create them. */ int64_t size_diff = gdisks[i]->num_usable_blocks; DiskVolBlock *dpb; - do { dpb = gdisks[i]->create_volume(volume_number, size_diff, cp->scheme); if (dpb) { @@ -2751,6 +2795,8 @@ fillExclusiveDisks(CacheVol *cp) } } while ((size_diff > 0)); } + + /* Report back the number of disks (spans) that were assigned to volume specified by volume_number. */ return diskCount; } @@ -2815,7 +2861,11 @@ cplist_reconfigure() /* sum up the total space available on all the disks. round down the space to 128 megabytes */ for (int i = 0; i < gndisks; i++) { - tot_space_in_blks += (gdisks[i]->num_usable_blocks / blocks_per_vol) * blocks_per_vol; + // Exclude exclusive disks (with forced volumes) from the following total space calculation, + // in such a way forced volumes will not impact volume percentage calculations. + if (-1 == gdisks[i]->forced_volume_num) { + tot_space_in_blks += (gdisks[i]->num_usable_blocks / blocks_per_vol) * blocks_per_vol; + } } double percent_remaining = 100.00; @@ -2843,16 +2893,8 @@ cplist_reconfigure() Debug("cache_hosting", "Volume: %d Size: %" PRId64, config_vol->number, (int64_t)config_vol->size); } cplist_update(); - /* go through volume config and grow and create volumes */ - - for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { - // if volume is given exclusive disks, fill here and continue - if (!config_vol->cachep) { - continue; - } - fillExclusiveDisks(config_vol->cachep); - } + /* go through volume config and grow and create volumes */ for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { size = config_vol->size; if (size < 128) {