From 3985bd85a0851cc068ef58a0b961d4999ed64d34 Mon Sep 17 00:00:00 2001 From: Gancho Tenev Date: Thu, 9 Jul 2020 16:18:48 -0700 Subject: [PATCH] Fix volume/stripe calcs when using forced volumes Fixed problems with initialization of cache volumes when at least one volume is being forced to a specific "exclusive" span. Problem description: ==================== Disks are cleared in the following configuration where volume sizes are specified using percentages and also one of the volumes is forced to a specific span (disk): storage.config: /dev/disk1 /dev/disk2 volume=3 # <- exclusive span forced to a specific volume volume.config: volume=1 scheme=http size=50% volume=2 scheme=http size=50% volume=3 scheme=http size=512 # <- volume forced to an exclusive span During the first start ATS identifies the clears disks and does the following: 1. creates and spreads new volume 1 and 2 blocks across disk1 and disk2 2. deletes all volume 1 and 2 blocks from disk2 to make space for volume 3 3. creates new volume 3 that takes over the whole disk2. In step (1) volumes are caclulated larger and spread to disk2 only to be deleted in step (2) to make space for the forced volume 3. During the initial start the global volume list cp_list would end up containing "zombie" CacheVol instances which corespond to the volume 1 and 2 blocks deleted from disk2 to make space for the volume 3 and the mapping of domains to volumes (hosting.config) could end up mapping to any of the deleted volume blocks. This problem disappears after restart since cp_list will be initialized from the disks and cp_list will contain only valid CacheVol instances. The fix: ======== This fix prevents this from happening by making sure all volumes meant to have "exclusive" disks are created first to make sure span free spaces are updated correctly and by excluding the size of the "exclusive" disks from the total cache size used for volume size calculations when sizes are specified in percentages (volume.config). --- doc/admin-guide/files/volume.config.en.rst | 27 ++++++++ iocore/cache/Cache.cc | 77 +++++++++++++++++----- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/doc/admin-guide/files/volume.config.en.rst b/doc/admin-guide/files/volume.config.en.rst index 46395f180d6..c96d2fbc32b 100644 --- a/doc/admin-guide/files/volume.config.en.rst +++ b/doc/admin-guide/files/volume.config.en.rst @@ -68,6 +68,33 @@ sits in front of a volume. This may be desirable if you are using something lik ramdisks, to avoid wasting RAM and cpu time on double caching objects. +Exclusive spans and volume sizes +================================ + +In the following sample configuration 2 spans `/dev/disk1` and `/dev/disk2` are defined +in :file:`storage.config`, where span `/dev/disk2` is assigned to `volume 3` exclusively +(`volume 3` is forced to an "exclusive" span `/dev/disk2`). +In :file:`volume.config` there are 3 volumes defined, where `volume 1` and `volume 2` +occupy span `/dev/disk1` taking each 50% of its space and `volume 3` takes 100% of span +`/dev/disk2` exclusively. + +storage.config:: + + /dev/disk1 + /dev/disk2 volume=3 # <- exclusinve span + +volume.config:: + + volume=1 scheme=http size=50% + volume=2 scheme=http size=50% + volume=3 scheme=http size=512 # <- volume forced to a specific exclusive span + +It is important to note that when percentages are used to specify volume sizes +and "exclusive" spans are assigned (forced) to a particular volume (in this case `volume 3`), +the "exclusive" spans (in this case `/dev/disk2`) are excluded from the total cache +space when the "non-forced" volumes sizes are calculated (in this case `volume 1` and `volume 2`). + + Examples ======== diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index 11156c2e6c7..f0131d87733 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -2530,6 +2530,8 @@ cplist_init() } } +static int fillExclusiveDisks(CacheVol *cp); + void cplist_update() { @@ -2586,6 +2588,37 @@ cplist_update() cp = cp->link.next; } } + + // Look for (exclusive) spans forced to a specific volume but not yet referenced by any volumes in cp_list, + // if found then create a new volume. This also makes sure new exclusive disk volumes are created first + // before any other new volumes to assure proper span free space calculation and proper volume block distribution. + for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { + if (nullptr == config_vol->cachep) { + // Find out if this is a forced volume assigned exclusively to a span which was cleared (hence not referenced in cp_list). + // Note: non-exclusive cleared spans are not handled here, only the "exclusive" + for (int d_no = 0; d_no < gndisks; d_no++) { + if (gdisks[d_no]->forced_volume_num == config_vol->number) { + CacheVol *new_cp = new CacheVol(); + if (nullptr != new_cp) { + new_cp->disk_vols = (DiskVol **)ats_malloc(gndisks * sizeof(DiskVol *)); + if (nullptr != new_cp->disk_vols) { + memset(new_cp->disk_vols, 0, gndisks * sizeof(DiskVol *)); + new_cp->vol_number = config_vol->number; + new_cp->scheme = config_vol->scheme; + config_vol->cachep = new_cp; + fillExclusiveDisks(config_vol->cachep); + cp_list.enqueue(new_cp); + } else { + delete new_cp; + } + } + } + } + } else { + // Fill if this is exclusive disk. + fillExclusiveDisks(config_vol->cachep); + } + } } static int @@ -2599,20 +2632,32 @@ fillExclusiveDisks(CacheVol *cp) if (gdisks[i]->forced_volume_num != volume_number) { continue; } - /* The user had created several volumes before - clear the disk - and create one volume for http */ - for (int j = 0; j < static_cast(gdisks[i]->header->num_volumes); j++) { + + /* OK, this should be an "exclusive" disk (span). */ + diskCount++; + + /* There should be a single "forced" volume and no other volumes should exist on this "exclusive" disk (span) */ + bool found_nonforced_volumes = false; + for (int j = 0; j < (int)gdisks[i]->header->num_volumes; j++) { if (volume_number != gdisks[i]->disk_vols[j]->vol_number) { - Note("Clearing Disk: %s", gdisks[i]->path); - gdisks[i]->delete_all_volumes(); + found_nonforced_volumes = true; break; } } - diskCount++; + + if (found_nonforced_volumes) { + /* The user had created several volumes before - clear the disk and create one volume for http */ + Note("Clearing Disk: %s", gdisks[i]->path); + gdisks[i]->delete_all_volumes(); + } else if (1 == gdisks[i]->header->num_volumes) { + /* "Forced" volumes take the whole disk (span) hence nothing more to do for this span. */ + continue; + } + + /* Now, volumes have been either deleted or did not existing to begin with so we need to create them. */ int64_t size_diff = gdisks[i]->num_usable_blocks; DiskVolBlock *dpb; - do { dpb = gdisks[i]->create_volume(volume_number, size_diff, cp->scheme); if (dpb) { @@ -2628,6 +2673,8 @@ fillExclusiveDisks(CacheVol *cp) } } while ((size_diff > 0)); } + + /* Report back the number of disks (spans) that were assigned to volume specified by volume_number. */ return diskCount; } @@ -2692,7 +2739,11 @@ cplist_reconfigure() /* sum up the total space available on all the disks. round down the space to 128 megabytes */ for (int i = 0; i < gndisks; i++) { - tot_space_in_blks += (gdisks[i]->num_usable_blocks / blocks_per_vol) * blocks_per_vol; + // Exclude exclusive disks (with forced volumes) from the following total space calculation, + // in such a way forced volumes will not impact volume percentage calculations. + if (-1 == gdisks[i]->forced_volume_num) { + tot_space_in_blks += (gdisks[i]->num_usable_blocks / blocks_per_vol) * blocks_per_vol; + } } double percent_remaining = 100.00; @@ -2721,16 +2772,8 @@ cplist_reconfigure() config_vol->ramcache_enabled); } cplist_update(); - /* go through volume config and grow and create volumes */ - - for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { - // if volume is given exclusive disks, fill here and continue - if (!config_vol->cachep) { - continue; - } - fillExclusiveDisks(config_vol->cachep); - } + /* go through volume config and grow and create volumes */ for (config_vol = config_volumes.cp_queue.head; config_vol; config_vol = config_vol->link.next) { size = config_vol->size; if (size < 128) {