Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions api/src/main/java/com/cloud/agent/api/to/HostTO.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class HostTO {
private NetworkTO publicNetwork;
private NetworkTO storageNetwork1;
private NetworkTO storageNetwork2;
private String parent;

protected HostTO() {
}
Expand All @@ -40,6 +41,9 @@ public HostTO(Host vo) {
if (vo.getStorageIpAddressDeux() != null) {
storageNetwork2 = new NetworkTO(vo.getStorageIpAddressDeux(), vo.getStorageNetmaskDeux(), vo.getStorageMacAddressDeux());
}
if (vo.getParent() != null) {
parent = vo.getParent();
}
}

public String getGuid() {
Expand Down Expand Up @@ -81,4 +85,12 @@ public NetworkTO getStorageNetwork2() {
public void setStorageNetwork2(NetworkTO storageNetwork2) {
this.storageNetwork2 = storageNetwork2;
}

public String getParent() {
return parent;
}

public void setParent(String parent) {
this.parent = parent;
}
}
11 changes: 11 additions & 0 deletions core/src/main/java/com/cloud/agent/api/CheckOnHostCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

public class CheckOnHostCommand extends Command {
HostTO host;
boolean reportCheckFailureIfOneStorageIsDown;

protected CheckOnHostCommand() {
}
Expand All @@ -33,10 +34,20 @@ public CheckOnHostCommand(Host host) {
setWait(20);
}

public CheckOnHostCommand(Host host, boolean reportCheckFailureIfOneStorageIsDown) {
super();
this.host = new HostTO(host);
this.reportCheckFailureIfOneStorageIsDown = reportCheckFailureIfOneStorageIsDown;
}

public HostTO getHost() {
return host;
}

public boolean isCheckFailedOnOneStorage() {
return reportCheckFailureIfOneStorageIsDown;
}

@Override
public boolean executeInSequence() {
return false;
Expand Down
16 changes: 16 additions & 0 deletions core/src/main/java/com/cloud/agent/api/FenceCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package com.cloud.agent.api;

import com.cloud.agent.api.to.HostTO;
import com.cloud.host.Host;
import com.cloud.vm.VirtualMachine;

Expand All @@ -32,13 +33,16 @@ public FenceCommand() {
String hostGuid;
String hostIp;
boolean inSeq;
HostTO host;
boolean reportCheckFailureIfOneStorageIsDown;

public FenceCommand(VirtualMachine vm, Host host) {
super();
vmName = vm.getInstanceName();
hostGuid = host.getGuid();
hostIp = host.getPrivateIpAddress();
inSeq = false;
this.host = new HostTO(host);
}

public void setSeq(boolean inseq) {
Expand All @@ -61,4 +65,16 @@ public String getHostIp() {
public boolean executeInSequence() {
return inSeq;
}

public HostTO getHost() {
return host;
}

public boolean isReportCheckFailureIfOneStorageIsDown() {
return reportCheckFailureIfOneStorageIsDown;
}

public void setReportCheckFailureIfOneStorageIsDown(boolean reportCheckFailureIfOneStorageIsDown) {
this.reportCheckFailureIfOneStorageIsDown = reportCheckFailureIfOneStorageIsDown;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import com.cloud.host.Host;
import com.cloud.storage.StoragePool;
import com.cloud.storage.Volume;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.Pair;

public interface PrimaryDataStoreDriver extends DataStoreDriver {
Expand Down Expand Up @@ -132,4 +134,8 @@ enum QualityOfServiceState { MIGRATION, NO_MIGRATION }
* @param tagValue The value of the VM's tag
*/
void provideVmTags(long vmId, long volumeId, String tagValue);

boolean isStorageSupportHA(StoragePoolType type);

void detachVolumeFromAllStorageNodes(Volume volume);
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ public interface HighAvailabilityManager extends Manager {
+ " which are registered for the HA event that were successful and are now ready to be purged.",
true, Cluster);

public static final ConfigKey<Boolean> KvmHAFenceHostIfHeartbeatFailsOnStorage = new ConfigKey<>("Advanced", Boolean.class, "kvm.ha.fence.on.storage.heartbeat.failure", "false",
"Proceed fencing the host even the heartbeat failed for only one storage pool", false, ConfigKey.Scope.Zone);

public enum WorkType {
Migration, // Migrating VMs off of a host.
Stop, // Stops a VM for storage pool migration purposes. This should be obsolete now.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@
import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.resource.ResourceManager;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.component.AdapterBase;

import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreDriver;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProvider;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProviderManager;
import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
Expand All @@ -49,6 +53,8 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
private PrimaryDataStoreDao _storagePoolDao;
@Inject
private HAManager haManager;
@Inject
private DataStoreProviderManager dataStoreProviderMgr;

@Override
public boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) throws UnknownVM {
Expand Down Expand Up @@ -78,31 +84,21 @@ public Status isAgentAlive(Host agent) {
}

List<StoragePoolVO> clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId());
boolean hasNfs = false;
for (StoragePoolVO pool : clusterPools) {
if (pool.getPoolType() == StoragePoolType.NetworkFilesystem) {
hasNfs = true;
break;
}
}
if (!hasNfs) {
boolean storageSupportHA = storageSupportHa(clusterPools);
if (!storageSupportHA) {
List<StoragePoolVO> zonePools = _storagePoolDao.findZoneWideStoragePoolsByHypervisor(agent.getDataCenterId(), agent.getHypervisorType());
for (StoragePoolVO pool : zonePools) {
if (pool.getPoolType() == StoragePoolType.NetworkFilesystem) {
hasNfs = true;
break;
}
}
storageSupportHA = storageSupportHa(zonePools);
}
if (!hasNfs) {
if (!storageSupportHA) {
s_logger.warn(
"Agent investigation was requested on host " + agent + ", but host does not support investigation because it has no NFS storage. Skipping investigation.");
return Status.Disconnected;
}

Status hostStatus = null;
Status neighbourStatus = null;
CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
boolean reportFailureIfOneStorageIsDown = HighAvailabilityManager.KvmHAFenceHostIfHeartbeatFailsOnStorage.value();
CheckOnHostCommand cmd = new CheckOnHostCommand(agent, reportFailureIfOneStorageIsDown);

try {
Answer answer = _agentMgr.easySend(agent.getId(), cmd);
Expand Down Expand Up @@ -145,4 +141,20 @@ public Status isAgentAlive(Host agent) {
s_logger.debug("HA: HOST is ineligible legacy state " + hostStatus + " for host " + agent.getId());
return hostStatus;
}

private boolean storageSupportHa(List<StoragePoolVO> pools) {
boolean storageSupportHA = false;
for (StoragePoolVO pool : pools) {
DataStoreProvider storeProvider = dataStoreProviderMgr.getDataStoreProvider(pool.getStorageProviderName());
DataStoreDriver storeDriver = storeProvider.getDataStoreDriver();
if (storeDriver instanceof PrimaryDataStoreDriver) {
PrimaryDataStoreDriver primaryStoreDriver = (PrimaryDataStoreDriver)storeDriver;
if (primaryStoreDriver.isStorageSupportHA(pool.getPoolType())) {
storageSupportHA = true;
break;
}
}
}
return storageSupportHA;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.libvirt.StoragePoolInfo;
import org.libvirt.StoragePoolInfo.StoragePoolState;

import com.cloud.hypervisor.kvm.storage.KVMStoragePool;
import com.cloud.utils.script.OutputInterpreter;
import com.cloud.utils.script.OutputInterpreter.AllLinesParser;
import com.cloud.utils.script.Script;
Expand All @@ -41,26 +42,76 @@ public static enum PoolType {
PrimaryStorage, SecondaryStorage
}

public static class NfsStoragePool {
String _poolUUID;
String _poolIp;
String _poolMountSourcePath;
String _mountDestPath;
PoolType _type;
public static class HAStoragePool {
String poolUuid;
String poolIp;
String poolMountSourcePath;
String mountDestPath;
PoolType poolType;
KVMStoragePool pool;

public HAStoragePool(KVMStoragePool pool, String host, String path, PoolType type) {
this.pool = pool;
this.poolUuid = pool.getUuid();
this.mountDestPath = pool.getLocalPath();
this.poolIp = host;
this.poolMountSourcePath = path;
this.poolType = type;
}

public String getPoolUUID() {
return poolUuid;
}

public void setPoolUUID(String poolUuid) {
this.poolUuid = poolUuid;
}

public String getPoolIp() {
return poolIp;
}

public void setPoolIp(String poolIp) {
this.poolIp = poolIp;
}

public String getPoolMountSourcePath() {
return poolMountSourcePath;
}

public void setPoolMountSourcePath(String poolMountSourcePath) {
this.poolMountSourcePath = poolMountSourcePath;
}

public String getMountDestPath() {
return mountDestPath;
}

public void setMountDestPath(String mountDestPath) {
this.mountDestPath = mountDestPath;
}

public PoolType getType() {
return poolType;
}

public void setType(PoolType type) {
this.poolType = type;
}

public KVMStoragePool getPool() {
return pool;
}

public NfsStoragePool(String poolUUID, String poolIp, String poolSourcePath, String mountDestPath, PoolType type) {
_poolUUID = poolUUID;
_poolIp = poolIp;
_poolMountSourcePath = poolSourcePath;
_mountDestPath = mountDestPath;
_type = type;
public void setPool(KVMStoragePool pool) {
this.pool = pool;
}
}

protected String checkingMountPoint(NfsStoragePool pool, String poolName) {
String mountSource = pool._poolIp + ":" + pool._poolMountSourcePath;
protected String checkingMountPoint(HAStoragePool pool, String poolName) {
String mountSource = pool.getPoolIp() + ":" + pool.getPoolMountSourcePath();
String mountPaths = Script.runSimpleBashScript("cat /proc/mounts | grep " + mountSource);
String destPath = pool._mountDestPath;
String destPath = pool.getMountDestPath();

if (mountPaths != null) {
String token[] = mountPaths.split(" ");
Expand Down Expand Up @@ -100,12 +151,12 @@ protected String checkingMountPoint(NfsStoragePool pool, String poolName) {
return destPath;
}

protected String getMountPoint(NfsStoragePool storagePool) {
protected String getMountPoint(HAStoragePool storagePool) {

StoragePool pool = null;
String poolName = null;
try {
pool = LibvirtConnection.getConnection().storagePoolLookupByUUIDString(storagePool._poolUUID);
pool = LibvirtConnection.getConnection().storagePoolLookupByUUIDString(storagePool.getPoolUUID());
if (pool != null) {
StoragePoolInfo spi = pool.getInfo();
if (spi.state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@

import org.apache.log4j.Logger;

import com.cloud.utils.script.OutputInterpreter;
import com.cloud.utils.script.Script;
import com.cloud.agent.api.to.HostTO;

public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class);
private List<NfsStoragePool> nfsStoragePools;
private String hostIp;
private long heartBeatCheckerTimeout = 360000; // 6 minutes
private List<HAStoragePool> storagePools;
private HostTO host;
private boolean reportFailureIfOneStorageIsDown;

public KVMHAChecker(List<NfsStoragePool> pools, String host) {
this.nfsStoragePools = pools;
this.hostIp = host;
public KVMHAChecker(List<HAStoragePool> pools, HostTO host, boolean reportFailureIfOneStorageIsDown) {
this.storagePools = pools;
this.host = host;
this.reportFailureIfOneStorageIsDown = reportFailureIfOneStorageIsDown;
}

/*
Expand All @@ -44,30 +44,14 @@ public KVMHAChecker(List<NfsStoragePool> pools, String host) {
public Boolean checkingHeartBeat() {
boolean validResult = false;

String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", ")));
String hostAndPools = String.format("host IP [%s] in pools [%s]", host.getPrivateNetwork().getIp(), storagePools.stream().map(pool -> pool.getPoolUUID()).collect(Collectors.joining(", ")));

s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools));

for (NfsStoragePool pool : nfsStoragePools) {
Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger);
cmd.add("-i", pool._poolIp);
cmd.add("-p", pool._poolMountSourcePath);
cmd.add("-m", pool._mountDestPath);
cmd.add("-h", hostIp);
cmd.add("-r");
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
String result = cmd.execute(parser);
String parsedLine = parser.getLine();

s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine,
pool._poolIp));

if (result == null && parsedLine.contains("DEAD")) {
s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(),
result, parsedLine, hostIp));
} else {
validResult = true;
for (HAStoragePool pool : storagePools) {
validResult = pool.getPool().checkingHeartBeat(pool, host);
if (reportFailureIfOneStorageIsDown && !validResult) {
break;
}
}

Expand Down
Loading