From 4b41b23f8e7d91309b8594fe4e84b1434c7488d9 Mon Sep 17 00:00:00 2001 From: Slavka Peleva Date: Fri, 15 Sep 2023 15:16:21 +0300 Subject: [PATCH 1/4] KVM Host HA enhancement for StorPool storage Extending the current functionality of KVM Host HA for StorPool storage plugin and option for easy integration for the rest of the storage plugins --- .../java/com/cloud/agent/api/to/HostTO.java | 12 ++ .../cloud/agent/api/CheckOnHostCommand.java | 11 ++ .../com/cloud/agent/api/FenceCommand.java | 16 +++ .../api/storage/PrimaryDataStoreDriver.java | 6 + .../com/cloud/ha/HighAvailabilityManager.java | 3 + .../java/com/cloud/ha/KVMInvestigator.java | 47 +++--- .../hypervisor/kvm/resource/KVMHABase.java | 85 ++++++++--- .../hypervisor/kvm/resource/KVMHAChecker.java | 42 ++---- .../hypervisor/kvm/resource/KVMHAMonitor.java | 87 +++++------ .../kvm/resource/KVMHAVMActivityChecker.java | 45 ++---- .../LibvirtCheckOnHostCommandWrapper.java | 8 +- ...VMActivityOnStoragePoolCommandWrapper.java | 15 +- .../wrapper/LibvirtFenceCommandWrapper.java | 6 +- .../kvm/storage/IscsiAdmStoragePool.java | 34 +++++ .../kvm/storage/KVMStoragePool.java | 23 +++ .../kvm/storage/KVMStoragePoolManager.java | 7 +- .../kvm/storage/LibvirtStorageAdaptor.java | 1 + .../kvm/storage/LibvirtStoragePool.java | 95 ++++++++++++ .../kvm/storage/ScaleIOStoragePool.java | 34 +++++ .../cloudstack/kvm/ha/KVMHAProvider.java | 5 +- .../kvm/ha/KVMHostActivityChecker.java | 3 +- .../LibvirtComputingResourceTest.java | 6 +- .../driver/DateraPrimaryDataStoreDriver.java | 11 ++ .../CloudStackPrimaryDataStoreDriverImpl.java | 9 ++ .../kvm/storage/LinstorStoragePool.java | 34 +++++ .../LinstorPrimaryDataStoreDriverImpl.java | 11 ++ .../driver/NexentaPrimaryDataStoreDriver.java | 11 ++ .../SamplePrimaryDataStoreDriverImpl.java | 11 ++ .../driver/ScaleIOPrimaryDataStoreDriver.java | 10 ++ .../SolidFirePrimaryDataStoreDriver.java | 9 ++ .../StorPoolModifyStoragePoolAnswer.java | 12 +- .../StorPoolModifyStorageCommandWrapper.java | 39 ++--- .../kvm/storage/StorPoolStoragePool.java | 135 ++++++++++++++++++ .../StorPoolPrimaryDataStoreDriver.java | 17 +++ .../provider/StorPoolHostListener.java | 23 ++- scripts/vm/hypervisor/kvm/kvmspheartbeat.sh | 68 +++++++++ .../cloud/ha/HighAvailabilityManagerImpl.java | 30 +++- .../src/main/java/com/cloud/ha/KVMFencer.java | 1 + .../ha/HighAvailabilityManagerImplTest.java | 56 ++++---- 39 files changed, 849 insertions(+), 229 deletions(-) create mode 100755 scripts/vm/hypervisor/kvm/kvmspheartbeat.sh diff --git a/api/src/main/java/com/cloud/agent/api/to/HostTO.java b/api/src/main/java/com/cloud/agent/api/to/HostTO.java index 967a63d63b5b..ee14e6597183 100644 --- a/api/src/main/java/com/cloud/agent/api/to/HostTO.java +++ b/api/src/main/java/com/cloud/agent/api/to/HostTO.java @@ -24,6 +24,7 @@ public class HostTO { private NetworkTO publicNetwork; private NetworkTO storageNetwork1; private NetworkTO storageNetwork2; + private String parent; protected HostTO() { } @@ -40,6 +41,9 @@ public HostTO(Host vo) { if (vo.getStorageIpAddressDeux() != null) { storageNetwork2 = new NetworkTO(vo.getStorageIpAddressDeux(), vo.getStorageNetmaskDeux(), vo.getStorageMacAddressDeux()); } + if (vo.getParent() != null) { + parent = vo.getParent(); + } } public String getGuid() { @@ -81,4 +85,12 @@ public NetworkTO getStorageNetwork2() { public void setStorageNetwork2(NetworkTO storageNetwork2) { this.storageNetwork2 = storageNetwork2; } + + public String getParent() { + return parent; + } + + public void setParent(String parent) { + this.parent = parent; + } } diff --git a/core/src/main/java/com/cloud/agent/api/CheckOnHostCommand.java b/core/src/main/java/com/cloud/agent/api/CheckOnHostCommand.java index b4055179e4dc..982e10cf2ebe 100644 --- a/core/src/main/java/com/cloud/agent/api/CheckOnHostCommand.java +++ b/core/src/main/java/com/cloud/agent/api/CheckOnHostCommand.java @@ -24,6 +24,7 @@ public class CheckOnHostCommand extends Command { HostTO host; + boolean reportCheckFailureIfOneStorageIsDown; protected CheckOnHostCommand() { } @@ -33,10 +34,20 @@ public CheckOnHostCommand(Host host) { setWait(20); } + public CheckOnHostCommand(Host host, boolean reportCheckFailureIfOneStorageIsDown) { + super(); + this.host = new HostTO(host); + this.reportCheckFailureIfOneStorageIsDown = reportCheckFailureIfOneStorageIsDown; + } + public HostTO getHost() { return host; } + public boolean isCheckFailedOnOneStorage() { + return reportCheckFailureIfOneStorageIsDown; + } + @Override public boolean executeInSequence() { return false; diff --git a/core/src/main/java/com/cloud/agent/api/FenceCommand.java b/core/src/main/java/com/cloud/agent/api/FenceCommand.java index 88e508ad1e26..2158923223fb 100644 --- a/core/src/main/java/com/cloud/agent/api/FenceCommand.java +++ b/core/src/main/java/com/cloud/agent/api/FenceCommand.java @@ -19,6 +19,7 @@ package com.cloud.agent.api; +import com.cloud.agent.api.to.HostTO; import com.cloud.host.Host; import com.cloud.vm.VirtualMachine; @@ -32,6 +33,8 @@ public FenceCommand() { String hostGuid; String hostIp; boolean inSeq; + HostTO host; + boolean reportCheckFailureIfOneStorageIsDown; public FenceCommand(VirtualMachine vm, Host host) { super(); @@ -39,6 +42,7 @@ public FenceCommand(VirtualMachine vm, Host host) { hostGuid = host.getGuid(); hostIp = host.getPrivateIpAddress(); inSeq = false; + this.host = new HostTO(host); } public void setSeq(boolean inseq) { @@ -61,4 +65,16 @@ public String getHostIp() { public boolean executeInSequence() { return inSeq; } + + public HostTO getHost() { + return host; + } + + public boolean isReportCheckFailureIfOneStorageIsDown() { + return reportCheckFailureIfOneStorageIsDown; + } + + public void setReportCheckFailureIfOneStorageIsDown(boolean reportCheckFailureIfOneStorageIsDown) { + this.reportCheckFailureIfOneStorageIsDown = reportCheckFailureIfOneStorageIsDown; + } } diff --git a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/PrimaryDataStoreDriver.java b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/PrimaryDataStoreDriver.java index 540d4f6673aa..f5812a106b8e 100644 --- a/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/PrimaryDataStoreDriver.java +++ b/engine/api/src/main/java/org/apache/cloudstack/engine/subsystem/api/storage/PrimaryDataStoreDriver.java @@ -23,6 +23,8 @@ import com.cloud.host.Host; import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.Pair; public interface PrimaryDataStoreDriver extends DataStoreDriver { @@ -132,4 +134,8 @@ enum QualityOfServiceState { MIGRATION, NO_MIGRATION } * @param tagValue The value of the VM's tag */ void provideVmTags(long vmId, long volumeId, String tagValue); + + boolean isStorageSupportHA(StoragePoolType type); + + void detachVolumeFromAllStorageNodes(Volume volume); } diff --git a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java index 22def16c400c..72737d0b04d2 100644 --- a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java +++ b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java @@ -72,6 +72,9 @@ public interface HighAvailabilityManager extends Manager { + " which are registered for the HA event that were successful and are now ready to be purged.", true, Cluster); + public static final ConfigKey KvmHAFenceHostIfHeartbeatFailsOnStorage = new ConfigKey<>("Advanced", Boolean.class, "kvm.ha.fence.on.storage.heartbeat.failure", "false", + "Proceed fencing the host even the heartbeat failed for only one storage pool", false, ConfigKey.Scope.Zone); + public enum WorkType { Migration, // Migrating VMs off of a host. Stop, // Stops a VM for storage pool migration purposes. This should be obsolete now. diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java index a76b56a1a4de..d30cf0557e47 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java @@ -27,8 +27,12 @@ import com.cloud.host.dao.HostDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceManager; -import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.component.AdapterBase; + +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreDriver; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProvider; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProviderManager; +import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; import org.apache.cloudstack.ha.HAManager; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; @@ -49,6 +53,8 @@ public class KVMInvestigator extends AdapterBase implements Investigator { private PrimaryDataStoreDao _storagePoolDao; @Inject private HAManager haManager; + @Inject + private DataStoreProviderManager dataStoreProviderMgr; @Override public boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) throws UnknownVM { @@ -77,24 +83,14 @@ public Status isAgentAlive(Host agent) { return haManager.getHostStatus(agent); } + //TODO: check storage for HA support List clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId()); - boolean hasNfs = false; - for (StoragePoolVO pool : clusterPools) { - if (pool.getPoolType() == StoragePoolType.NetworkFilesystem) { - hasNfs = true; - break; - } - } - if (!hasNfs) { + boolean storageSupportHA = storageSupportHa(clusterPools); + if (!storageSupportHA) { List zonePools = _storagePoolDao.findZoneWideStoragePoolsByHypervisor(agent.getDataCenterId(), agent.getHypervisorType()); - for (StoragePoolVO pool : zonePools) { - if (pool.getPoolType() == StoragePoolType.NetworkFilesystem) { - hasNfs = true; - break; - } - } + storageSupportHA = storageSupportHa(zonePools); } - if (!hasNfs) { + if (!storageSupportHA) { s_logger.warn( "Agent investigation was requested on host " + agent + ", but host does not support investigation because it has no NFS storage. Skipping investigation."); return Status.Disconnected; @@ -102,7 +98,8 @@ public Status isAgentAlive(Host agent) { Status hostStatus = null; Status neighbourStatus = null; - CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + boolean reportFailureIfOneStorageIsDown = HighAvailabilityManager.KvmHAFenceHostIfHeartbeatFailsOnStorage.value(); + CheckOnHostCommand cmd = new CheckOnHostCommand(agent, reportFailureIfOneStorageIsDown); try { Answer answer = _agentMgr.easySend(agent.getId(), cmd); @@ -145,4 +142,20 @@ public Status isAgentAlive(Host agent) { s_logger.debug("HA: HOST is ineligible legacy state " + hostStatus + " for host " + agent.getId()); return hostStatus; } + + private boolean storageSupportHa(List pools) { + boolean storageSupportHA = false; + for (StoragePoolVO pool : pools) { + DataStoreProvider storeProvider = dataStoreProviderMgr.getDataStoreProvider(pool.getStorageProviderName()); + DataStoreDriver storeDriver = storeProvider.getDataStoreDriver(); + if (storeDriver instanceof PrimaryDataStoreDriver) { + PrimaryDataStoreDriver primaryStoreDriver = (PrimaryDataStoreDriver)storeDriver; + if (primaryStoreDriver.isStorageSupportHA(pool.getPoolType())) { + storageSupportHA = true; + break; + } + } + } + return storageSupportHA; + } } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java index fd1122f07ad3..093070fddd6d 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java @@ -24,6 +24,7 @@ import org.libvirt.StoragePoolInfo; import org.libvirt.StoragePoolInfo.StoragePoolState; +import com.cloud.hypervisor.kvm.storage.KVMStoragePool; import com.cloud.utils.script.OutputInterpreter; import com.cloud.utils.script.OutputInterpreter.AllLinesParser; import com.cloud.utils.script.Script; @@ -41,26 +42,76 @@ public static enum PoolType { PrimaryStorage, SecondaryStorage } - public static class NfsStoragePool { - String _poolUUID; - String _poolIp; - String _poolMountSourcePath; - String _mountDestPath; - PoolType _type; + public static class HAStoragePool { + String poolUuid; + String poolIp; + String poolMountSourcePath; + String mountDestPath; + PoolType poolType; + KVMStoragePool pool; + + public HAStoragePool(KVMStoragePool pool, String host, String path, PoolType type) { + this.pool = pool; + this.poolUuid = pool.getUuid(); + this.mountDestPath = pool.getLocalPath(); + this.poolIp = host; + this.poolMountSourcePath = path; + this.poolType = type; + } + + public String getPoolUUID() { + return poolUuid; + } + + public void setPoolUUID(String poolUuid) { + this.poolUuid = poolUuid; + } + + public String getPoolIp() { + return poolIp; + } + + public void setPoolIp(String poolIp) { + this.poolIp = poolIp; + } + + public String getPoolMountSourcePath() { + return poolMountSourcePath; + } + + public void setPoolMountSourcePath(String poolMountSourcePath) { + this.poolMountSourcePath = poolMountSourcePath; + } + + public String getMountDestPath() { + return mountDestPath; + } + + public void setMountDestPath(String mountDestPath) { + this.mountDestPath = mountDestPath; + } + + public PoolType getType() { + return poolType; + } + + public void setType(PoolType type) { + this.poolType = type; + } + + public KVMStoragePool getPool() { + return pool; + } - public NfsStoragePool(String poolUUID, String poolIp, String poolSourcePath, String mountDestPath, PoolType type) { - _poolUUID = poolUUID; - _poolIp = poolIp; - _poolMountSourcePath = poolSourcePath; - _mountDestPath = mountDestPath; - _type = type; + public void setPool(KVMStoragePool pool) { + this.pool = pool; } } - protected String checkingMountPoint(NfsStoragePool pool, String poolName) { - String mountSource = pool._poolIp + ":" + pool._poolMountSourcePath; + protected String checkingMountPoint(HAStoragePool pool, String poolName) { + String mountSource = pool.getPoolIp() + ":" + pool.getPoolMountSourcePath(); String mountPaths = Script.runSimpleBashScript("cat /proc/mounts | grep " + mountSource); - String destPath = pool._mountDestPath; + String destPath = pool.getMountDestPath(); if (mountPaths != null) { String token[] = mountPaths.split(" "); @@ -100,12 +151,12 @@ protected String checkingMountPoint(NfsStoragePool pool, String poolName) { return destPath; } - protected String getMountPoint(NfsStoragePool storagePool) { + protected String getMountPoint(HAStoragePool storagePool) { StoragePool pool = null; String poolName = null; try { - pool = LibvirtConnection.getConnection().storagePoolLookupByUUIDString(storagePool._poolUUID); + pool = LibvirtConnection.getConnection().storagePoolLookupByUUIDString(storagePool.getPoolUUID()); if (pool != null) { StoragePoolInfo spi = pool.getInfo(); if (spi.state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java index 5ceaef2bb197..2df70375107b 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java @@ -22,18 +22,18 @@ import org.apache.log4j.Logger; -import com.cloud.utils.script.OutputInterpreter; -import com.cloud.utils.script.Script; +import com.cloud.agent.api.to.HostTO; public class KVMHAChecker extends KVMHABase implements Callable { private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class); - private List nfsStoragePools; - private String hostIp; - private long heartBeatCheckerTimeout = 360000; // 6 minutes + private List storagePools; + private HostTO host; + private boolean reportFailureIfOneStorageIsDown; - public KVMHAChecker(List pools, String host) { - this.nfsStoragePools = pools; - this.hostIp = host; + public KVMHAChecker(List pools, HostTO host, boolean reportFailureIfOneStorageIsDown) { + this.storagePools = pools; + this.host = host; + this.reportFailureIfOneStorageIsDown = reportFailureIfOneStorageIsDown; } /* @@ -44,30 +44,14 @@ public KVMHAChecker(List pools, String host) { public Boolean checkingHeartBeat() { boolean validResult = false; - String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", "))); + String hostAndPools = String.format("host IP [%s] in pools [%s]", host.getPrivateNetwork().getIp(), storagePools.stream().map(pool -> pool.getPoolUUID()).collect(Collectors.joining(", "))); s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools)); - for (NfsStoragePool pool : nfsStoragePools) { - Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger); - cmd.add("-i", pool._poolIp); - cmd.add("-p", pool._poolMountSourcePath); - cmd.add("-m", pool._mountDestPath); - cmd.add("-h", hostIp); - cmd.add("-r"); - cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000)); - OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); - String result = cmd.execute(parser); - String parsedLine = parser.getLine(); - - s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine, - pool._poolIp)); - - if (result == null && parsedLine.contains("DEAD")) { - s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(), - result, parsedLine, hostIp)); - } else { - validResult = true; + for (HAStoragePool pool : storagePools) { + validResult = pool.getPool().checkingHeartBeat(pool, host); + if (reportFailureIfOneStorageIsDown && !validResult) { + break; } } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index 022b48c929f9..c5ca4bc0ce41 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -18,6 +18,7 @@ import com.cloud.agent.properties.AgentProperties; import com.cloud.agent.properties.AgentPropertiesFileHandler; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.script.Script; import org.apache.log4j.Logger; import org.libvirt.Connect; @@ -35,14 +36,14 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class); - private final Map storagePool = new ConcurrentHashMap<>(); + private final Map storagePool = new ConcurrentHashMap<>(); private final boolean rebootHostAndAlertManagementOnHeartbeatTimeout; private final String hostPrivateIp; - public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) { + public KVMHAMonitor(HAStoragePool pool, String host, String scriptPath) { if (pool != null) { - storagePool.put(pool._poolUUID, pool); + storagePool.put(pool.getPoolUUID(), pool); } hostPrivateIp = host; configureHeartBeatPath(scriptPath); @@ -55,29 +56,29 @@ private static synchronized void configureHeartBeatPath(String scriptPath) { KVMHABase.s_heartBeatPath = scriptPath; } - public void addStoragePool(NfsStoragePool pool) { + public void addStoragePool(HAStoragePool pool) { synchronized (storagePool) { - storagePool.put(pool._poolUUID, pool); + storagePool.put(pool.getPoolUUID(), pool); } } public void removeStoragePool(String uuid) { synchronized (storagePool) { - NfsStoragePool pool = storagePool.get(uuid); + HAStoragePool pool = storagePool.get(uuid); if (pool != null) { - Script.runSimpleBashScript("umount " + pool._mountDestPath); + Script.runSimpleBashScript("umount " + pool.getMountDestPath()); storagePool.remove(uuid); } } } - public List getStoragePools() { + public List getStoragePools() { synchronized (storagePool) { return new ArrayList<>(storagePool.values()); } } - public NfsStoragePool getStoragePool(String uuid) { + public HAStoragePool getStoragePool(String uuid) { synchronized (storagePool) { return storagePool.get(uuid); } @@ -87,41 +88,39 @@ protected void runHeartBeat() { synchronized (storagePool) { Set removedPools = new HashSet<>(); for (String uuid : storagePool.keySet()) { - NfsStoragePool primaryStoragePool = storagePool.get(uuid); + HAStoragePool primaryStoragePool = storagePool.get(uuid); StoragePool storage; - try { - Connect conn = LibvirtConnection.getConnection(); - storage = conn.storagePoolLookupByUUIDString(uuid); - if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) { - if (storage == null) { - s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid)); - } else { - s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid)); + if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) { + try { + Connect conn = LibvirtConnection.getConnection(); + storage = conn.storagePoolLookupByUUIDString(uuid); + if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) { + if (storage == null) { + s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid)); + } else { + s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid)); + } + + removedPools.add(uuid); + continue; } - removedPools.add(uuid); - continue; - } - - s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid)); + s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid)); - } catch (LibvirtException e) { - s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e); + } catch (LibvirtException e) { + s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e); - if (e.toString().contains("pool not found")) { - s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid)); - removedPools.add(uuid); - continue; + if (e.toString().contains("pool not found")) { + s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid)); + removedPools.add(uuid); + continue; + } } - } String result = null; for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) { - Script cmd = createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true); - result = cmd.execute(); - - s_logger.debug(String.format("The command (%s), to the pool [%s], has the result [%s].", cmd.toString(), uuid, result)); + result = primaryStoragePool.getPool().createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true); if (result != null) { s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries)); @@ -138,8 +137,7 @@ protected void runHeartBeat() { if (result != null && rebootHostAndAlertManagementOnHeartbeatTimeout) { s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; stopping cloudstack-agent.", uuid, result)); - Script cmd = createHeartBeatCommand(primaryStoragePool, null, false); - result = cmd.execute(); + primaryStoragePool.getPool().createHeartBeatCommand(primaryStoragePool, null, false);; } } @@ -152,23 +150,6 @@ protected void runHeartBeat() { } - private Script createHeartBeatCommand(NfsStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation) { - Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger); - cmd.add("-i", primaryStoragePool._poolIp); - cmd.add("-p", primaryStoragePool._poolMountSourcePath); - cmd.add("-m", primaryStoragePool._mountDestPath); - - if (hostValidation) { - cmd.add("-h", hostPrivateIp); - } - - if (!hostValidation) { - cmd.add("-c"); - } - - return cmd; - } - @Override public void run() { while (true) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java index 758edd2a24b0..e6937b515e95 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java @@ -16,54 +16,31 @@ // under the License. package com.cloud.hypervisor.kvm.resource; -import com.cloud.utils.script.OutputInterpreter; -import com.cloud.utils.script.Script; -import org.apache.log4j.Logger; +import com.cloud.agent.api.to.HostTO; import org.joda.time.Duration; import java.util.concurrent.Callable; public class KVMHAVMActivityChecker extends KVMHABase implements Callable { - private static final Logger LOG = Logger.getLogger(KVMHAVMActivityChecker.class); - final private NfsStoragePool nfsStoragePool; - final private String hostIP; - final private String volumeUuidList; - final private String vmActivityCheckPath; - final private Duration activityScriptTimeout = Duration.standardSeconds(3600L); - final private long suspectTimeInSeconds; + private final HAStoragePool storagePool; + private final String volumeUuidList; + private final String vmActivityCheckPath; + private final Duration activityScriptTimeout = Duration.standardSeconds(3600L); + private final long suspectTimeInSeconds; + private final HostTO host; - public KVMHAVMActivityChecker(final NfsStoragePool pool, final String host, final String volumeUUIDListString, String vmActivityCheckPath, final long suspectTime) { - this.nfsStoragePool = pool; - this.hostIP = host; + public KVMHAVMActivityChecker(final HAStoragePool pool, final HostTO host, final String volumeUUIDListString, String vmActivityCheckPath, final long suspectTime) { + this.storagePool = pool; this.volumeUuidList = volumeUUIDListString; this.vmActivityCheckPath = vmActivityCheckPath; this.suspectTimeInSeconds = suspectTime; + this.host = host; } @Override public Boolean checkingHeartBeat() { - Script cmd = new Script(vmActivityCheckPath, activityScriptTimeout.getStandardSeconds(), LOG); - cmd.add("-i", nfsStoragePool._poolIp); - cmd.add("-p", nfsStoragePool._poolMountSourcePath); - cmd.add("-m", nfsStoragePool._mountDestPath); - cmd.add("-h", hostIP); - cmd.add("-u", volumeUuidList); - cmd.add("-t", String.valueOf(String.valueOf(System.currentTimeMillis() / 1000))); - cmd.add("-d", String.valueOf(suspectTimeInSeconds)); - OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); - - String result = cmd.execute(parser); - String parsedLine = parser.getLine(); - - LOG.debug(String.format("Checking heart beat with KVMHAVMActivityChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine, nfsStoragePool._poolIp)); - - if (result == null && parsedLine.contains("DEAD")) { - LOG.warn(String.format("Checking heart beat with KVMHAVMActivityChecker command [%s] returned [%s]. It is [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(), result, parsedLine, hostIP)); - return false; - } else { - return true; - } + return this.storagePool.getPool().vmActivityCheck(storagePool, host, activityScriptTimeout, volumeUuidList, vmActivityCheckPath, suspectTimeInSeconds); } @Override diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java index ff72436ec7d1..48996a7ba97c 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java @@ -28,8 +28,7 @@ import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckOnHostCommand; import com.cloud.agent.api.to.HostTO; -import com.cloud.agent.api.to.NetworkTO; -import com.cloud.hypervisor.kvm.resource.KVMHABase.NfsStoragePool; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.hypervisor.kvm.resource.KVMHAChecker; import com.cloud.hypervisor.kvm.resource.KVMHAMonitor; import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; @@ -44,10 +43,9 @@ public Answer execute(final CheckOnHostCommand command, final LibvirtComputingRe final ExecutorService executors = Executors.newSingleThreadExecutor(); final KVMHAMonitor monitor = libvirtComputingResource.getMonitor(); - final List pools = monitor.getStoragePools(); + final List pools = monitor.getStoragePools(); final HostTO host = command.getHost(); - final NetworkTO privateNetwork = host.getPrivateNetwork(); - final KVMHAChecker ha = new KVMHAChecker(pools, privateNetwork.getIp()); + final KVMHAChecker ha = new KVMHAChecker(pools, host, command.isCheckFailedOnOneStorage()); final Future future = executors.submit(ha); try { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java index 9c899a040550..a708d441be59 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java @@ -22,13 +22,14 @@ import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand; import com.cloud.agent.api.to.StorageFilerTO; -import com.cloud.hypervisor.kvm.resource.KVMHABase.NfsStoragePool; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; +import com.cloud.hypervisor.kvm.storage.KVMStoragePool; +import com.cloud.hypervisor.kvm.storage.KVMStoragePoolManager; import com.cloud.hypervisor.kvm.resource.KVMHAMonitor; import com.cloud.hypervisor.kvm.resource.KVMHAVMActivityChecker; import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; import com.cloud.resource.CommandWrapper; import com.cloud.resource.ResourceWrapper; -import com.cloud.storage.Storage; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -43,9 +44,13 @@ public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final L final ExecutorService executors = Executors.newSingleThreadExecutor(); final KVMHAMonitor monitor = libvirtComputingResource.getMonitor(); final StorageFilerTO pool = command.getPool(); - if (Storage.StoragePoolType.NetworkFilesystem == pool.getType()){ - final NfsStoragePool nfspool = monitor.getStoragePool(pool.getUuid()); - final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost().getPrivateNetwork().getIp(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds()); + final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr(); + + KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid()); + + if (primaryPool.isPoolSupportHA()){ + final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid()); + final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds()); final Future future = executors.submit(ha); try { final Boolean result = future.get(); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtFenceCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtFenceCommandWrapper.java index e41a896fae42..9a6ee7a41700 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtFenceCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtFenceCommandWrapper.java @@ -30,7 +30,7 @@ import com.cloud.agent.api.Answer; import com.cloud.agent.api.FenceAnswer; import com.cloud.agent.api.FenceCommand; -import com.cloud.hypervisor.kvm.resource.KVMHABase.NfsStoragePool; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.hypervisor.kvm.resource.KVMHAChecker; import com.cloud.hypervisor.kvm.resource.KVMHAMonitor; import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; @@ -47,7 +47,7 @@ public Answer execute(final FenceCommand command, final LibvirtComputingResource final ExecutorService executors = Executors.newSingleThreadExecutor(); final KVMHAMonitor monitor = libvirtComputingResource.getMonitor(); - final List pools = monitor.getStoragePools(); + final List pools = monitor.getStoragePools(); /** * We can only safely fence off hosts when we use NFS @@ -60,7 +60,7 @@ public Answer execute(final FenceCommand command, final LibvirtComputingResource return new FenceAnswer(command, false, logline); } - final KVMHAChecker ha = new KVMHAChecker(pools, command.getHostIp()); + final KVMHAChecker ha = new KVMHAChecker(pools, command.getHost(), command.isReportCheckFailureIfOneStorageIsDown()); final Future future = executors.submit(ha); try { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/IscsiAdmStoragePool.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/IscsiAdmStoragePool.java index 09034c653250..a89650f6eb65 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/IscsiAdmStoragePool.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/IscsiAdmStoragePool.java @@ -22,7 +22,10 @@ import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; +import org.joda.time.Duration; +import com.cloud.agent.api.to.HostTO; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; import com.cloud.storage.Storage.StoragePoolType; @@ -183,4 +186,35 @@ public String toString() { return new ToStringBuilder(this, ToStringStyle.JSON_STYLE).append("uuid", getUuid()).append("path", getLocalPath()).toString(); } + @Override + public boolean isPoolSupportHA() { + return false; + } + + @Override + public String getHearthBeatPath() { + return null; + } + + @Override + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, + boolean hostValidation) { + return null; + } + + @Override + public String getStorageNodeId() { + return null; + } + + @Override + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) { + return null; + } + + @Override + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration) { + return null; + } + } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePool.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePool.java index 3bff9c9852e9..98e779253cc3 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePool.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePool.java @@ -20,11 +20,22 @@ import java.util.Map; import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; +import org.joda.time.Duration; +import com.cloud.agent.api.to.HostTO; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; import com.cloud.storage.Storage.StoragePoolType; public interface KVMStoragePool { + + public static final long HeartBeatUpdateTimeout = 60000; + public static final long HeartBeatUpdateFreq = 60000; + public static final long HeartBeatUpdateMaxTries = 5; + public static final long HeartBeatUpdateRetrySleep = 10000; + public static final long HeartBeatCheckerTimeout = 360000; // 6 minutes + + public KVMPhysicalDisk createPhysicalDisk(String volumeUuid, PhysicalDiskFormat format, Storage.ProvisioningType provisioningType, long size, byte[] passphrase); public KVMPhysicalDisk createPhysicalDisk(String volumeUuid, Storage.ProvisioningType provisioningType, long size, byte[] passphrase); @@ -74,4 +85,16 @@ public interface KVMStoragePool { public boolean supportsConfigDriveIso(); public Map getDetails(); + + public boolean isPoolSupportHA(); + + public String getHearthBeatPath(); + + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation); + + public String getStorageNodeId(); + + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host); + + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration); } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java index bfaa799e134b..7981cd632cce 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java @@ -360,9 +360,10 @@ private synchronized KVMStoragePool createStoragePool(String name, String host, KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details); // LibvirtStorageAdaptor-specific statement - if (type == StoragePoolType.NetworkFilesystem && primaryStorage) { - KVMHABase.NfsStoragePool nfspool = new KVMHABase.NfsStoragePool(pool.getUuid(), host, path, pool.getLocalPath(), PoolType.PrimaryStorage); - _haMonitor.addStoragePool(nfspool); + if (pool.isPoolSupportHA() && primaryStorage) { + //KVMHABase.HAStoragePool nfspool = new KVMHABase.HAStoragePool(pool.getUuid(), host, path, pool.getLocalPath(), PoolType.PrimaryStorage); + KVMHABase.HAStoragePool storagePool = new KVMHABase.HAStoragePool(pool, host, path, PoolType.PrimaryStorage); + _haMonitor.addStoragePool(storagePool); } StoragePoolInformation info = new StoragePoolInformation(name, host, port, path, userInfo, type, details, primaryStorage); addStoragePool(pool.getUuid(), info); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java index 183a36446cf2..bdaa419c6983 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStorageAdaptor.java @@ -72,6 +72,7 @@ import java.util.Set; import java.util.stream.Collectors; + public class LibvirtStorageAdaptor implements StorageAdaptor { private static final Logger s_logger = Logger.getLogger(LibvirtStorageAdaptor.class); private StorageLayer _storageLayer; diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java index 4a449ab57feb..d81b40391d09 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java @@ -21,15 +21,22 @@ import java.util.Map; import org.apache.log4j.Logger; +import org.joda.time.Duration; import org.libvirt.StoragePool; import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; +import com.cloud.agent.api.to.HostTO; +import com.cloud.agent.properties.AgentProperties; +import com.cloud.agent.properties.AgentPropertiesFileHandler; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; public class LibvirtStoragePool implements KVMStoragePool { private static final Logger s_logger = Logger.getLogger(LibvirtStoragePool.class); @@ -287,8 +294,96 @@ public Map getDetails() { return null; } + @Override + public boolean isPoolSupportHA() { + return type == StoragePoolType.NetworkFilesystem; + } + + public String getHearthBeatPath() { + if (type == StoragePoolType.NetworkFilesystem) { + String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR); + return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh"); + } + return null; + } + + + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation) { + Script cmd = new Script(primaryStoragePool.getPool().getHearthBeatPath(), HeartBeatUpdateTimeout, s_logger); + cmd.add("-i", primaryStoragePool.getPoolIp()); + cmd.add("-p", primaryStoragePool.getPoolMountSourcePath()); + cmd.add("-m", primaryStoragePool.getMountDestPath()); + + if (hostValidation) { + cmd.add("-h", hostPrivateIp); + } + + if (!hostValidation) { + cmd.add("-c"); + } + + return cmd.execute(); + } + @Override public String toString() { return new ToStringBuilder(this, ToStringStyle.JSON_STYLE).append("uuid", getUuid()).append("path", getLocalPath()).toString(); } + + @Override + public String getStorageNodeId() { + return null; + } + + @Override + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) { + boolean validResult = false; + String hostIp = host.getPrivateNetwork().getIp(); + Script cmd = new Script(getHearthBeatPath(), HeartBeatCheckerTimeout, s_logger); + cmd.add("-i", pool.getPoolIp()); + cmd.add("-p", pool.getPoolMountSourcePath()); + cmd.add("-m", pool.getMountDestPath()); + cmd.add("-h", hostIp); + cmd.add("-r"); + cmd.add("-t", String.valueOf(HeartBeatUpdateFreq / 1000)); + OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); + String result = cmd.execute(parser); + String parsedLine = parser.getLine(); + + s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine, + pool.getPoolIp())); + + if (result == null && parsedLine.contains("DEAD")) { + s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(), + result, parsedLine, hostIp)); + } else { + validResult = true; + } + return validResult; + } + + @Override + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration) { + Script cmd = new Script(vmActivityCheckPath, activityScriptTimeout.getStandardSeconds(), s_logger); + cmd.add("-i", pool.getPoolIp()); + cmd.add("-p", pool.getPoolMountSourcePath()); + cmd.add("-m", pool.getMountDestPath()); + cmd.add("-h", host.getPrivateNetwork().getIp()); + cmd.add("-u", volumeUUIDListString); + cmd.add("-t", String.valueOf(String.valueOf(System.currentTimeMillis() / 1000))); + cmd.add("-d", String.valueOf(duration)); + OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); + + String result = cmd.execute(parser); + String parsedLine = parser.getLine(); + + s_logger.debug(String.format("Checking heart beat with KVMHAVMActivityChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine, pool.getPoolIp())); + + if (result == null && parsedLine.contains("DEAD")) { + s_logger.warn(String.format("Checking heart beat with KVMHAVMActivityChecker command [%s] returned [%s]. It is [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(), result, parsedLine, host.getPrivateNetwork().getIp())); + return false; + } else { + return true; + } + } } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStoragePool.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStoragePool.java index cf977f5467be..293ff29f984f 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStoragePool.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStoragePool.java @@ -23,7 +23,10 @@ import org.apache.cloudstack.storage.datastore.client.ScaleIOGatewayClient; import org.apache.cloudstack.storage.datastore.util.ScaleIOUtil; import org.apache.cloudstack.utils.qemu.QemuImg; +import org.joda.time.Duration; +import com.cloud.agent.api.to.HostTO; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; public class ScaleIOStoragePool implements KVMStoragePool { @@ -205,4 +208,35 @@ public boolean supportsConfigDriveIso() { public Map getDetails() { return this.details; } + + @Override + public boolean isPoolSupportHA() { + return false; + } + + @Override + public String getHearthBeatPath() { + return null; + } + + @Override + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, + boolean hostValidation) { + return null; + } + + @Override + public String getStorageNodeId() { + return null; + } + + @Override + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) { + return null; + } + + @Override + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration) { + return null; + } } diff --git a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java index 5399fd23a1cd..14922727d34d 100644 --- a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java +++ b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java @@ -86,6 +86,7 @@ public boolean recover(Host r) throws HARecoveryException { @Override public boolean fence(Host r) throws HAFenceException { + try { if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){ final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.OFF, null); @@ -96,7 +97,7 @@ public boolean fence(Host r) throws HAFenceException { } } catch (Exception e){ LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage()); - throw new HAFenceException("OOBM service is not configured or enabled for this host " + r.getName() , e); + throw new HAFenceException("OBM service is not configured or enabled for this host " + r.getName() , e); } } @@ -151,7 +152,7 @@ public ConfigKey[] getConfigKeys() { KVMHAConfig.KvmHAActivityCheckFailureThreshold, KVMHAConfig.KvmHADegradedMaxPeriod, KVMHAConfig.KvmHARecoverWaitPeriod, - KVMHAConfig.KvmHARecoverAttemptThreshold + KVMHAConfig.KvmHARecoverAttemptThreshold, }; } } diff --git a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java index e5752cb97da2..0866d668a439 100644 --- a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java @@ -22,6 +22,7 @@ import com.cloud.agent.api.CheckOnHostCommand; import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand; import com.cloud.exception.StorageUnavailableException; +import com.cloud.ha.HighAvailabilityManager; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.Status; @@ -90,7 +91,7 @@ private boolean isAgentActive(Host agent) { } Status hostStatus = Status.Unknown; Status neighbourStatus = Status.Unknown; - final CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + final CheckOnHostCommand cmd = new CheckOnHostCommand(agent, HighAvailabilityManager.KvmHAFenceHostIfHeartbeatFailsOnStorage.value()); try { LOG.debug(String.format("Checking %s status...", agent.toString())); Answer answer = agentMgr.easySend(agent.getId(), cmd); diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java index dde2669abba9..fc462adc86dc 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java @@ -174,7 +174,7 @@ import com.cloud.agent.resource.virtualnetwork.VirtualRoutingResource; import com.cloud.exception.InternalErrorException; import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.hypervisor.kvm.resource.KVMHABase.NfsStoragePool; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ChannelDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ClockDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ConsoleDef; @@ -3346,8 +3346,8 @@ public void testFenceCommand() { final KVMHAMonitor monitor = Mockito.mock(KVMHAMonitor.class); - final NfsStoragePool storagePool = Mockito.mock(NfsStoragePool.class); - final List pools = new ArrayList(); + final HAStoragePool storagePool = Mockito.mock(HAStoragePool.class); + final List pools = new ArrayList(); pools.add(storagePool); when(libvirtComputingResourceMock.getMonitor()).thenReturn(monitor); diff --git a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/driver/DateraPrimaryDataStoreDriver.java b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/driver/DateraPrimaryDataStoreDriver.java index 79b621671175..ae4b699ea7bf 100644 --- a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/driver/DateraPrimaryDataStoreDriver.java +++ b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/driver/DateraPrimaryDataStoreDriver.java @@ -66,8 +66,10 @@ import com.cloud.storage.Snapshot; import com.cloud.storage.SnapshotVO; import com.cloud.storage.Storage; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; +import com.cloud.storage.Volume; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.SnapshotDao; @@ -1879,4 +1881,13 @@ public boolean isVmTagsNeeded(String tagKey) { @Override public void provideVmTags(long vmId, long volumeId, String tagValue) { } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java index 19a6fe132818..a0aaab1d0aa1 100644 --- a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java @@ -552,4 +552,13 @@ private boolean anyVolumeRequiresEncryption(DataObject ... objects) { } return false; } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return StoragePoolType.NetworkFilesystem == type; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java index 5bc60fd23993..d03095218742 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java @@ -20,7 +20,10 @@ import java.util.Map; import org.apache.cloudstack.utils.qemu.QemuImg; +import org.joda.time.Duration; +import com.cloud.agent.api.to.HostTO; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; public class LinstorStoragePool implements KVMStoragePool { @@ -194,4 +197,35 @@ public Map getDetails() { public String getResourceGroup() { return _resourceGroup; } + + @Override + public boolean isPoolSupportHA() { + return false; + } + + @Override + public String getHearthBeatPath() { + return null; + } + + @Override + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, + boolean hostValidation) { + return null; + } + + @Override + public String getStorageNodeId() { + return null; + } + + @Override + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) { + return null; + } + + @Override + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUUIDListString, String vmActivityCheckPath, long duration) { + return null; + } } diff --git a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java index d2d13eafc483..d67b8ab7d5b3 100644 --- a/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/linstor/src/main/java/org/apache/cloudstack/storage/datastore/driver/LinstorPrimaryDataStoreDriverImpl.java @@ -54,9 +54,11 @@ import com.cloud.host.Host; import com.cloud.storage.ResizeVolumePayload; import com.cloud.storage.SnapshotVO; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; +import com.cloud.storage.Volume; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.SnapshotDao; @@ -881,4 +883,13 @@ public boolean isVmTagsNeeded(String tagKey) { @Override public void provideVmTags(long vmId, long volumeId, String tagValue) { } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/nexenta/src/main/java/org/apache/cloudstack/storage/datastore/driver/NexentaPrimaryDataStoreDriver.java b/plugins/storage/volume/nexenta/src/main/java/org/apache/cloudstack/storage/datastore/driver/NexentaPrimaryDataStoreDriver.java index 84051888c5e6..8487c881158e 100644 --- a/plugins/storage/volume/nexenta/src/main/java/org/apache/cloudstack/storage/datastore/driver/NexentaPrimaryDataStoreDriver.java +++ b/plugins/storage/volume/nexenta/src/main/java/org/apache/cloudstack/storage/datastore/driver/NexentaPrimaryDataStoreDriver.java @@ -49,7 +49,9 @@ import com.cloud.agent.api.to.DataTO; import com.cloud.host.Host; import com.cloud.storage.Storage; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.VolumeDao; import com.cloud.user.dao.AccountDao; @@ -257,4 +259,13 @@ public boolean isVmTagsNeeded(String tagKey) { @Override public void provideVmTags(long vmId, long volumeId, String tagValue) { } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/sample/src/main/java/org/apache/cloudstack/storage/datastore/driver/SamplePrimaryDataStoreDriverImpl.java b/plugins/storage/volume/sample/src/main/java/org/apache/cloudstack/storage/datastore/driver/SamplePrimaryDataStoreDriverImpl.java index 732786047c43..0b26ce0337a6 100644 --- a/plugins/storage/volume/sample/src/main/java/org/apache/cloudstack/storage/datastore/driver/SamplePrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/sample/src/main/java/org/apache/cloudstack/storage/datastore/driver/SamplePrimaryDataStoreDriverImpl.java @@ -44,7 +44,9 @@ import com.cloud.agent.api.to.DataStoreTO; import com.cloud.agent.api.to.DataTO; import com.cloud.host.Host; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; import com.cloud.storage.dao.StoragePoolHostDao; import com.cloud.utils.Pair; import com.cloud.utils.exception.CloudRuntimeException; @@ -283,4 +285,13 @@ public boolean isVmTagsNeeded(String tagKey) { @Override public void provideVmTags(long vmId, long volumeId, String tagValue) { } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java index d37a339eb2d9..6d3089480d0b 100644 --- a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java +++ b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java @@ -79,6 +79,7 @@ import com.cloud.storage.ResizeVolumePayload; import com.cloud.storage.SnapshotVO; import com.cloud.storage.Storage; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePool; import com.cloud.storage.StoragePoolHostVO; @@ -1415,4 +1416,13 @@ protected boolean anyVolumeRequiresEncryption(DataObject ... objects) { } return false; } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java index 4478dc98ca45..d3360f616ddc 100644 --- a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java +++ b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java @@ -1663,4 +1663,13 @@ public boolean isVmTagsNeeded(String tagKey) { @Override public void provideVmTags(long vmId, long volumeId, String tagValue) { } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return false; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + } } diff --git a/plugins/storage/volume/storpool/src/main/java/com/cloud/agent/api/storage/StorPoolModifyStoragePoolAnswer.java b/plugins/storage/volume/storpool/src/main/java/com/cloud/agent/api/storage/StorPoolModifyStoragePoolAnswer.java index c27f3f813894..437e786f0f61 100644 --- a/plugins/storage/volume/storpool/src/main/java/com/cloud/agent/api/storage/StorPoolModifyStoragePoolAnswer.java +++ b/plugins/storage/volume/storpool/src/main/java/com/cloud/agent/api/storage/StorPoolModifyStoragePoolAnswer.java @@ -35,13 +35,15 @@ public class StorPoolModifyStoragePoolAnswer extends Answer{ private String poolType; private List datastoreClusterChildren = new ArrayList<>(); private String clusterId; + private String clientNodeId; - public StorPoolModifyStoragePoolAnswer(StorPoolModifyStoragePoolCommand cmd, long capacityBytes, long availableBytes, Map tInfo, String clusterId) { + public StorPoolModifyStoragePoolAnswer(StorPoolModifyStoragePoolCommand cmd, long capacityBytes, long availableBytes, Map tInfo, String clusterId, String clientNodeId) { super(cmd); result = true; poolInfo = new StoragePoolInfo(null, cmd.getPool().getHost(), cmd.getPool().getPath(), cmd.getLocalPath(), cmd.getPool().getType(), capacityBytes, availableBytes); templateInfo = tInfo; this.clusterId = clusterId; + this.clientNodeId = clientNodeId; } public StorPoolModifyStoragePoolAnswer(String errMsg) { @@ -91,4 +93,12 @@ public void setDatastoreClusterChildren(List datastoreC public String getClusterId() { return clusterId; } + + public String getClientNodeId() { + return clientNodeId; + } + + public void setClientNodeId(String clientNodeId) { + this.clientNodeId = clientNodeId; + } } diff --git a/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/StorPoolModifyStorageCommandWrapper.java b/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/StorPoolModifyStorageCommandWrapper.java index b797b3c20d1f..8bd8a52b667f 100644 --- a/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/StorPoolModifyStorageCommandWrapper.java +++ b/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/StorPoolModifyStorageCommandWrapper.java @@ -33,6 +33,7 @@ import com.cloud.hypervisor.kvm.storage.KVMStoragePool; import com.cloud.hypervisor.kvm.storage.KVMStoragePoolManager; import com.cloud.hypervisor.kvm.storage.StorPoolStorageAdaptor; +import com.cloud.hypervisor.kvm.storage.StorPoolStoragePool; import com.cloud.resource.CommandWrapper; import com.cloud.resource.ResourceWrapper; import com.cloud.storage.template.TemplateProp; @@ -47,7 +48,7 @@ public final class StorPoolModifyStorageCommandWrapper extends CommandWrapper tInfo = new HashMap(); - final StorPoolModifyStoragePoolAnswer answer = new StorPoolModifyStoragePoolAnswer(command, storagepool.getCapacity(), storagepool.getAvailable(), tInfo, clusterId); - - return answer; + final Map tInfo = new HashMap<>(); + return new StorPoolModifyStoragePoolAnswer(command, storagepool.getCapacity(), storagepool.getAvailable(), tInfo, clusterId, storagepool.getStorageNodeId()); } catch (Exception e) { log.debug(String.format("Could not modify storage due to %s", e.getMessage())); return new Answer(command, e); } } - private String getSpClusterId() { - Script sc = new Script("storpool_confget", 0, log); - OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); - - String SP_CLUSTER_ID = null; - final String err = sc.execute(parser); - if (err != null) { - StorPoolStorageAdaptor.SP_LOG("Could not execute storpool_confget. Error: %s", err); - return SP_CLUSTER_ID; - } - - for (String line: parser.getLines().split("\n")) { - String[] toks = line.split("="); - if( toks.length != 2 ) { - continue; - } - if (toks[0].equals("SP_CLUSTER_ID")) { - SP_CLUSTER_ID = toks[1]; - return SP_CLUSTER_ID; - } - } - return SP_CLUSTER_ID; - } - public String attachOrDetachVolume(String command, String type, String volumeUuid) { final String name = StorPoolStorageAdaptor.getVolumeNameFromPath(volumeUuid, true); if (name == null) { @@ -126,7 +101,11 @@ public String attachOrDetachVolume(String command, String type, String volumeUui Set> obj2 = new JsonParser().parse(res).getAsJsonObject().entrySet(); for (Entry entry : obj2) { if (entry.getKey().equals("error")) { - res = entry.getValue().getAsJsonObject().get("name").getAsString(); + JsonElement errName = entry.getValue().getAsJsonObject().get("name"); + if (errName != null) { + res = errName.getAsString(); + break; + } } } } catch (Exception e) { diff --git a/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/storage/StorPoolStoragePool.java b/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/storage/StorPoolStoragePool.java index 47937212f21a..02095503c3b4 100644 --- a/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/storage/StorPoolStoragePool.java +++ b/plugins/storage/volume/storpool/src/main/java/com/cloud/hypervisor/kvm/storage/StorPoolStoragePool.java @@ -18,13 +18,28 @@ import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; +import org.apache.log4j.Logger; +import org.joda.time.Duration; +import com.cloud.agent.api.to.HostTO; +import com.cloud.agent.properties.AgentProperties; +import com.cloud.agent.properties.AgentPropertiesFileHandler; +import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; +import com.google.gson.JsonElement; +import com.google.gson.JsonIOException; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.google.gson.JsonSyntaxException; public class StorPoolStoragePool implements KVMStoragePool { + private static final Logger log = Logger.getLogger(StorPoolStoragePool.class); private String _uuid; private String _sourceHost; private int _sourcePort; @@ -34,6 +49,7 @@ public class StorPoolStoragePool implements KVMStoragePool { private String _authSecret; private String _sourceDir; private String _localPath; + private String storageNodeId = getStorPoolConfigParam("SP_OURID"); public StorPoolStoragePool(String uuid, String host, int port, StoragePoolType storagePoolType, StorageAdaptor storageAdaptor) { _uuid = uuid; @@ -166,4 +182,123 @@ public boolean supportsConfigDriveIso() { public Map getDetails() { return null; } + + @Override + public boolean isPoolSupportHA() { + return true; + } + + @Override + public String getHearthBeatPath() { + String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR); + return Script.findScript(kvmScriptsDir, "kvmspheartbeat.sh"); + } + + @Override + public String createHeartBeatCommand(HAStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation) { + boolean isStorageNodeUp = checkingHeartBeat(primaryStoragePool, null); + if (!isStorageNodeUp && !hostValidation) { + //restart the host + log.debug(String.format("The host [%s] will be restarted because the health check failed for the storage pool [%s]", hostPrivateIp, primaryStoragePool.getPool().getType())); + Script cmd = new Script(primaryStoragePool.getPool().getHearthBeatPath(), HeartBeatUpdateTimeout, log); + cmd.add("-c"); + cmd.execute(); + return "Down"; + } + return isStorageNodeUp ? null : "Down"; + } + + @Override + public String getStorageNodeId() { + return storageNodeId; + } + + public static final String getStorPoolConfigParam(String param) { + Script sc = new Script("storpool_confget", 0, Logger.getLogger(StorPoolStoragePool.class)); + OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + + String configParam = null; + final String err = sc.execute(parser); + if (err != null) { + StorPoolStorageAdaptor.SP_LOG("Could not execute storpool_confget. Error: %s", err); + return configParam; + } + + for (String line: parser.getLines().split("\n")) { + String[] toks = line.split("="); + if( toks.length != 2 ) { + continue; + } + if (toks[0].equals(param)) { + configParam = toks[1]; + return configParam; + } + } + return configParam; + } + + @Override + public Boolean checkingHeartBeat(HAStoragePool pool, HostTO host) { + boolean isNodeWorking = false; + OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + + String res = executeStorPoolServiceListCmd(parser); + + if (res != null) { + return isNodeWorking; + } + String response = parser.getLines(); + + Integer hostStorageNodeId = null; + if (host == null) { + hostStorageNodeId = Integer.parseInt(storageNodeId); + } else { + hostStorageNodeId = host.getParent() != null ? Integer.parseInt(host.getParent()) : null; + } + if (hostStorageNodeId == null) { + return isNodeWorking; + } + try { + isNodeWorking = checkIfNodeIsRunning(response, hostStorageNodeId); + } catch (JsonIOException | JsonSyntaxException e) { + e.printStackTrace(); + } + return isNodeWorking; + } + + private boolean checkIfNodeIsRunning(String response, Integer hostStorageNodeId) { + boolean isNodeWorking = false; + JsonParser jsonParser = new JsonParser(); + JsonObject stats = (JsonObject) jsonParser.parse(response); + JsonObject data = stats.getAsJsonObject("data"); + if (data != null) { + JsonObject clients = data.getAsJsonObject("clients"); + for (Entry element : clients.entrySet()) { + String storageNodeStatus = element.getValue().getAsJsonObject().get("status").getAsString(); + int nodeId = element.getValue().getAsJsonObject().get("nodeId").getAsInt(); + if (hostStorageNodeId == nodeId) { + if (storageNodeStatus.equals("running")) { + return true; + } else { + return isNodeWorking; + } + } + } + } + return isNodeWorking; + } + + private String executeStorPoolServiceListCmd(OutputInterpreter.AllLinesParser parser) { + Script sc = new Script("storpool", 0, log); + sc.add("-j"); + sc.add("service"); + sc.add("list"); + String res = sc.execute(parser); + return res; + } + + @Override + public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activityScriptTimeout, String volumeUuidListString, String vmActivityCheckPath, long duration) { + return checkingHeartBeat(pool, host); + } } diff --git a/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/driver/StorPoolPrimaryDataStoreDriver.java b/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/driver/StorPoolPrimaryDataStoreDriver.java index 0b8777c3b757..d42a2ba0a354 100644 --- a/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/driver/StorPoolPrimaryDataStoreDriver.java +++ b/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/driver/StorPoolPrimaryDataStoreDriver.java @@ -92,6 +92,7 @@ import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateDetailVO; import com.cloud.storage.VMTemplateStoragePoolVO; +import com.cloud.storage.Volume; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.SnapshotDetailsDao; @@ -1164,4 +1165,20 @@ public void provideVmTags(long vmId, long volumeId, String tagValue) { } } } + + @Override + public boolean isStorageSupportHA(StoragePoolType type) { + return true; + } + + @Override + public void detachVolumeFromAllStorageNodes(Volume volume) { + StoragePoolVO poolVO = primaryStoreDao.findById(volume.getPoolId()); + if (poolVO != null) { + SpConnectionDesc conn = StorPoolUtil.getSpConnection(poolVO.getUuid(), poolVO.getId(), storagePoolDetailsDao, primaryStoreDao); + String volName = StorPoolStorageAdaptor.getVolumeNameFromPath(volume.getPath(), true); + SpApiResponse resp = StorPoolUtil.detachAllForced(volName, false, conn); + StorPoolUtil.spLog("The volume [%s] is detach from all clusters [%s]", volName, resp); + } + } } diff --git a/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/provider/StorPoolHostListener.java b/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/provider/StorPoolHostListener.java index 9b5320df6d8b..bf7642b9122e 100644 --- a/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/provider/StorPoolHostListener.java +++ b/plugins/storage/volume/storpool/src/main/java/org/apache/cloudstack/storage/datastore/provider/StorPoolHostListener.java @@ -37,6 +37,8 @@ import org.apache.cloudstack.storage.datastore.util.StorPoolUtil; import org.apache.cloudstack.storage.datastore.util.StorPoolUtil.SpApiResponse; import org.apache.cloudstack.storage.datastore.util.StorPoolUtil.SpConnectionDesc; +import org.apache.cloudstack.storage.snapshot.StorPoolConfigurationManager; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; @@ -47,6 +49,7 @@ import com.cloud.agent.manager.AgentAttache; import com.cloud.alert.AlertManager; import com.cloud.dc.ClusterDetailsDao; +import com.cloud.dc.ClusterDetailsVO; import com.cloud.dc.dao.ClusterDao; import com.cloud.exception.StorageConflictException; import com.cloud.host.HostVO; @@ -162,6 +165,11 @@ public boolean hostConnect(long hostId, long poolId) throws StorageConflictExcep poolHost.setLocalPath(mspAnswer.getPoolInfo().getLocalPath().replaceAll("//", "/")); } + if (host.getParent() == null && mspAnswer.getClientNodeId() != null) { + host.setParent(mspAnswer.getClientNodeId()); + hostDao.update(host.getId(), host); + } + StorPoolHelper.setSpClusterIdIfNeeded(hostId, mspAnswer.getClusterId(), clusterDao, hostDao, clusterDetailsDao); StorPoolUtil.spLog("Connection established between storage pool [%s] and host [%s]", poolVO.getName(), host.getName()); @@ -214,10 +222,23 @@ public boolean hostAboutToBeRemoved(long hostId) { } @Override - public boolean hostRemoved(long hostId, long clusterId) { + public synchronized boolean hostRemoved(long hostId, long clusterId) { + List hosts = hostDao.findByClusterId(clusterId); + if (CollectionUtils.isNotEmpty(hosts) && hosts.size() == 1) { + removeSPClusterIdWhenTheLastHostIsRemoved(clusterId); + } return true; } + private void removeSPClusterIdWhenTheLastHostIsRemoved(long clusterId) { + ClusterDetailsVO clusterDetailsVo = clusterDetailsDao.findDetail(clusterId, + StorPoolConfigurationManager.StorPoolClusterId.key()); + if (clusterDetailsVo != null && (clusterDetailsVo.getValue() != null && !clusterDetailsVo.getValue().equals(StorPoolConfigurationManager.StorPoolClusterId.defaultValue())) ){ + clusterDetailsVo.setValue(StorPoolConfigurationManager.StorPoolClusterId.defaultValue()); + clusterDetailsDao.update(clusterDetailsVo.getId(), clusterDetailsVo); + } + } + //workaround: we need this "hack" to add our command StorPoolModifyStoragePoolCommand in AgentAttache.s_commandsAllowedInMaintenanceMode //which checks the allowed commands when the host is in maintenance mode private void addModifyCommandToCommandsAllowedInMaintenanceMode() { diff --git a/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh new file mode 100755 index 000000000000..190bb29eb28f --- /dev/null +++ b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +help() { + printf "Usage: $0 + -c cleanup" + exit 1 +} +#set -x +cflag=0 + +while getopts 'c' OPTION +do + case $OPTION in + c) + cflag=1 + ;; + *) + help + ;; + esac +done + + +#delete VMs on this mountpoint +deleteVMs() { + vmPids=$(ps aux| grep qemu | grep 'storpool-byid' | awk '{print $2}' 2> /dev/null) + if [ $? -gt 0 ] + then + return + fi + + if [ -z "$vmPids" ] + then + return + fi + + for pid in $vmPids + do + kill -9 $pid &> /dev/null + done +} + +if [ "$cflag" == "1" ] +then + /usr/bin/logger -t heartbeat "kvmspheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage." + sync & + sleep 5 + echo b > /proc/sysrq-trigger + exit $? +fi + + diff --git a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java index 1b9b512118d7..f22bcde9e84e 100644 --- a/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/main/java/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -29,6 +29,10 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreDriver; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProvider; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProviderManager; +import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; @@ -67,8 +71,11 @@ import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.storage.StorageManager; +import com.cloud.storage.VolumeVO; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.GuestOSDao; +import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.secondary.SecondaryStorageVmManager; import com.cloud.user.AccountManager; import com.cloud.utils.component.ManagerBase; @@ -133,6 +140,10 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur private ConsoleProxyManager consoleProxyManager; @Inject private SecondaryStorageVmManager secondaryStorageVmManager; + @Inject + VolumeDao volumeDao; + @Inject + DataStoreProviderManager dataStoreProviderMgr; long _serverId; @@ -314,6 +325,7 @@ public void scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { } protected void wakeupWorkers() { + s_logger.debug("Wakeup workers HA"); for (WorkerThread worker : _workers) { worker.wakup(); } @@ -332,6 +344,7 @@ public boolean scheduleMigration(final VMInstanceVO vm) { @Override public void scheduleRestart(VMInstanceVO vm, boolean investigate) { + s_logger.debug("HA schedule restart"); Long hostId = vm.getHostId(); if (hostId == null) { try { @@ -425,6 +438,7 @@ public void scheduleRestart(VMInstanceVO vm, boolean investigate) { } protected Long restart(final HaWorkVO work) { + s_logger.debug("RESTART with HAWORK"); List items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId()); if (items.size() > 0) { StringBuilder str = new StringBuilder("Cancelling this work item because newer ones have been scheduled. Work Ids = ["); @@ -599,6 +613,20 @@ protected Long restart(final HaWorkVO work) { } try{ + if (HypervisorType.KVM == host.getHypervisorType()) { + List volumes = volumeDao.findByInstance(vmId); + for (VolumeVO volumeVO : volumes) { + //detach the volumes from all clusters before starting the VM on another host. + if (volumeVO.getPoolType() == StoragePoolType.StorPool) { + DataStoreProvider storeProvider = dataStoreProviderMgr.getDataStoreProvider(volumeVO.getPoolType().name()); + DataStoreDriver storeDriver = storeProvider.getDataStoreDriver(); + if (storeDriver instanceof PrimaryDataStoreDriver) { + PrimaryDataStoreDriver primaryStoreDriver = (PrimaryDataStoreDriver)storeDriver; + primaryStoreDriver.detachVolumeFromAllStorageNodes(volumeVO); + } + } + } + } // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency. _itMgr.advanceStart(vm.getUuid(), params, null); }catch (InsufficientCapacityException e){ @@ -1064,6 +1092,6 @@ public String getConfigComponentName() { public ConfigKey[] getConfigKeys() { return new ConfigKey[] {TimeBetweenCleanup, MigrationMaxRetries, TimeToSleep, TimeBetweenFailures, StopRetryInterval, RestartRetryInterval, MigrateRetryInterval, InvestigateRetryInterval, - HAWorkers, ForceHA}; + HAWorkers, ForceHA, KvmHAFenceHostIfHeartbeatFailsOnStorage}; } } diff --git a/server/src/main/java/com/cloud/ha/KVMFencer.java b/server/src/main/java/com/cloud/ha/KVMFencer.java index e102bc273ac9..ea10570d6f8f 100644 --- a/server/src/main/java/com/cloud/ha/KVMFencer.java +++ b/server/src/main/java/com/cloud/ha/KVMFencer.java @@ -82,6 +82,7 @@ public Boolean fenceOff(VirtualMachine vm, Host host) { List hosts = _resourceMgr.listAllHostsInCluster(host.getClusterId()); FenceCommand fence = new FenceCommand(vm, host); + fence.setReportCheckFailureIfOneStorageIsDown(HighAvailabilityManager.KvmHAFenceHostIfHeartbeatFailsOnStorage.value()); int i = 0; for (HostVO h : hosts) { diff --git a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java index 7084ba524bbb..629fae2ac2e3 100644 --- a/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java +++ b/server/src/test/java/com/cloud/ha/HighAvailabilityManagerImplTest.java @@ -16,6 +16,33 @@ // under the License. package com.cloud.ha; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.lang.reflect.Array; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import javax.inject.Inject; + +import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProviderManager; +import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.cloudstack.managed.context.ManagedContext; +import org.apache.log4j.Logger; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.runners.MockitoJUnitRunner; + import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; import com.cloud.consoleproxy.ConsoleProxyManager; @@ -39,36 +66,13 @@ import com.cloud.storage.StorageManager; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.GuestOSDao; +import com.cloud.storage.dao.VolumeDao; import com.cloud.storage.secondary.SecondaryStorageVmManager; import com.cloud.user.AccountManager; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; -import org.apache.cloudstack.framework.config.dao.ConfigurationDao; -import org.apache.cloudstack.managed.context.ManagedContext; -import org.apache.log4j.Logger; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.runners.MockitoJUnitRunner; - -import javax.inject.Inject; -import java.lang.reflect.Array; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; @RunWith(MockitoJUnitRunner.class) public class HighAvailabilityManagerImplTest { @@ -117,6 +121,10 @@ public class HighAvailabilityManagerImplTest { SecondaryStorageVmManager secondaryStorageVmManager; @Mock HostVO hostVO; + @Mock + VolumeDao volumeDao; + @Mock + DataStoreProviderManager dataStoreProviderMgr; HighAvailabilityManagerImpl highAvailabilityManager; HighAvailabilityManagerImpl highAvailabilityManagerSpy; From c5c81f988bb88f6434ba32da1be1dc43d5a0b6b8 Mon Sep 17 00:00:00 2001 From: Slavka Peleva Date: Tue, 10 Oct 2023 14:48:34 +0300 Subject: [PATCH 2/4] remove trailing spaces --- scripts/vm/hypervisor/kvm/kvmspheartbeat.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh index 190bb29eb28f..70da881aae0a 100755 --- a/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh +++ b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh @@ -6,9 +6,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -17,7 +17,7 @@ # under the License. help() { - printf "Usage: $0 + printf "Usage: $0 -c cleanup" exit 1 } @@ -39,7 +39,7 @@ done #delete VMs on this mountpoint deleteVMs() { - vmPids=$(ps aux| grep qemu | grep 'storpool-byid' | awk '{print $2}' 2> /dev/null) + vmPids=$(ps aux| grep qemu | grep 'storpool-byid' | awk '{print $2}' 2> /dev/null) if [ $? -gt 0 ] then return From e7d70ddf53b8595ded9d5c024e7e689f8def62b0 Mon Sep 17 00:00:00 2001 From: Slavka Peleva Date: Tue, 10 Oct 2023 15:47:55 +0300 Subject: [PATCH 3/4] remove line ending --- scripts/vm/hypervisor/kvm/kvmspheartbeat.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh index 70da881aae0a..3cb459e3e854 100755 --- a/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh +++ b/scripts/vm/hypervisor/kvm/kvmspheartbeat.sh @@ -64,5 +64,3 @@ then echo b > /proc/sysrq-trigger exit $? fi - - From b99d1a1ef25f22ba1a275c8eeddac8a2c83310d0 Mon Sep 17 00:00:00 2001 From: Slavka Peleva Date: Tue, 31 Oct 2023 08:36:18 +0200 Subject: [PATCH 4/4] address comments --- .../java/com/cloud/ha/KVMInvestigator.java | 1 - .../hypervisor/kvm/resource/KVMHAMonitor.java | 90 ++++++++++--------- .../kvm/storage/KVMStoragePoolManager.java | 1 - 3 files changed, 48 insertions(+), 44 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java index d30cf0557e47..022501524f7f 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/ha/KVMInvestigator.java @@ -83,7 +83,6 @@ public Status isAgentAlive(Host agent) { return haManager.getHostStatus(agent); } - //TODO: check storage for HA support List clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId()); boolean storageSupportHA = storageSupportHa(clusterPools); if (!storageSupportHA) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index c5ca4bc0ce41..72944b54e92c 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -89,65 +89,71 @@ protected void runHeartBeat() { Set removedPools = new HashSet<>(); for (String uuid : storagePool.keySet()) { HAStoragePool primaryStoragePool = storagePool.get(uuid); - StoragePool storage; if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) { - try { - Connect conn = LibvirtConnection.getConnection(); - storage = conn.storagePoolLookupByUUIDString(uuid); - if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) { - if (storage == null) { - s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid)); - } else { - s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid)); - } - - removedPools.add(uuid); - continue; - } - - s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid)); - - } catch (LibvirtException e) { - s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e); - - if (e.toString().contains("pool not found")) { - s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid)); - removedPools.add(uuid); - continue; - } + checkForNotExistingPools(removedPools, uuid); + if (removedPools.contains(uuid)) { + continue; } } - String result = null; - for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) { - result = primaryStoragePool.getPool().createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true); - - if (result != null) { - s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries)); - try { - Thread.sleep(_heartBeatUpdateRetrySleep); - } catch (InterruptedException e) { - s_logger.debug("[IGNORED] Interrupted between heartbeat retries.", e); - } - } else { - break; - } - - } + result = executePoolHeartBeatCommand(uuid, primaryStoragePool, result); if (result != null && rebootHostAndAlertManagementOnHeartbeatTimeout) { s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; stopping cloudstack-agent.", uuid, result)); primaryStoragePool.getPool().createHeartBeatCommand(primaryStoragePool, null, false);; } } - if (!removedPools.isEmpty()) { for (String uuid : removedPools) { removeStoragePool(uuid); } } } + } + private String executePoolHeartBeatCommand(String uuid, HAStoragePool primaryStoragePool, String result) { + for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) { + result = primaryStoragePool.getPool().createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true); + + if (result != null) { + s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries)); + try { + Thread.sleep(_heartBeatUpdateRetrySleep); + } catch (InterruptedException e) { + s_logger.debug("[IGNORED] Interrupted between heartbeat retries.", e); + } + } else { + break; + } + + } + return result; + } + + private void checkForNotExistingPools(Set removedPools, String uuid) { + try { + Connect conn = LibvirtConnection.getConnection(); + StoragePool storage = conn.storagePoolLookupByUUIDString(uuid); + if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) { + if (storage == null) { + s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid)); + } else { + s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid)); + } + + removedPools.add(uuid); + } + + s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid)); + + } catch (LibvirtException e) { + s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e); + + if (e.toString().contains("pool not found")) { + s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid)); + removedPools.add(uuid); + } + } } @Override diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java index 7981cd632cce..987825921b8d 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java @@ -361,7 +361,6 @@ private synchronized KVMStoragePool createStoragePool(String name, String host, // LibvirtStorageAdaptor-specific statement if (pool.isPoolSupportHA() && primaryStorage) { - //KVMHABase.HAStoragePool nfspool = new KVMHABase.HAStoragePool(pool.getUuid(), host, path, pool.getLocalPath(), PoolType.PrimaryStorage); KVMHABase.HAStoragePool storagePool = new KVMHABase.HAStoragePool(pool, host, path, PoolType.PrimaryStorage); _haMonitor.addStoragePool(storagePool); }