diff --git a/agent/apiharness/src/main/java/com/intuit/tank/harness/APIMonitor.java b/agent/apiharness/src/main/java/com/intuit/tank/harness/APIMonitor.java index 804d76037..7211a8467 100644 --- a/agent/apiharness/src/main/java/com/intuit/tank/harness/APIMonitor.java +++ b/agent/apiharness/src/main/java/com/intuit/tank/harness/APIMonitor.java @@ -83,7 +83,10 @@ private void updateInstanceStatus() { newStatus.setTotalTps(tpsInfo.getTotalTps()); sendTps(tpsInfo); } - if (!isLocal) setInstanceStatus(newStatus.getInstanceId(), newStatus); + + if (!isLocal) { + setInstanceStatus(newStatus.getInstanceId(), newStatus); + } APITestHarness.getInstance().checkAgentThreads(); } catch (Exception t) { LOG.error(LogUtil.getLogMessage("Unable to send status metrics | " + t.getMessage()), t); @@ -120,13 +123,20 @@ private CloudVmStatus createStatus(WatsAgentStatusResponse agentStatus) { */ private JobStatus calculateJobStatus(WatsAgentStatusResponse agentStatus, JobStatus currentStatus) { AgentCommand cmd = APITestHarness.getInstance().getCmd(); - return cmd == AgentCommand.pause ? JobStatus.Paused + JobStatus newStatus = cmd == AgentCommand.pause ? JobStatus.Paused : cmd == AgentCommand.stop ? JobStatus.Stopped : cmd == AgentCommand.pause_ramp ? JobStatus.RampPaused : currentStatus == JobStatus.Unknown || currentStatus == JobStatus.Starting && agentStatus.getCurrentNumberUsers() > 0 ? JobStatus.Running : currentStatus; + + if (newStatus != currentStatus) { + LOG.info(LogUtil.getLogMessage("Agent JobStatus transition: " + currentStatus + " -> " + newStatus + + " (cmd=" + cmd + ", currentUsers=" + agentStatus.getCurrentNumberUsers() + ")")); + } + + return newStatus; } public static void setDoMonitor(boolean monitor) { diff --git a/agent/apiharness/src/main/java/com/intuit/tank/harness/CommandListener.java b/agent/apiharness/src/main/java/com/intuit/tank/harness/CommandListener.java index e7386ae60..cff991a2b 100644 --- a/agent/apiharness/src/main/java/com/intuit/tank/harness/CommandListener.java +++ b/agent/apiharness/src/main/java/com/intuit/tank/harness/CommandListener.java @@ -78,6 +78,8 @@ private static void handleRequest(HttpExchange exchange) { String path = exchange.getRequestURI().getPath(); if (path.equals(AgentCommand.start.getPath()) || path.equals(AgentCommand.run.getPath())) { response = "Received command " + path + ", Starting Test JobId=" + APITestHarness.getInstance().getAgentRunData().getJobId(); + LOG.info(LogUtil.getLogMessage("Received START command - launching test threads for job " + + APITestHarness.getInstance().getAgentRunData().getJobId())); startTest(); } else if (path.startsWith(AgentCommand.stop.getPath())) { response = "Received command " + path + ", Stopping Test JobId=" + APITestHarness.getInstance().getAgentRunData().getJobId(); diff --git a/api/src/main/java/com/intuit/tank/agent/models/VMStatus.java b/api/src/main/java/com/intuit/tank/agent/models/VMStatus.java index 7ccd68212..7727ccdd1 100644 --- a/api/src/main/java/com/intuit/tank/agent/models/VMStatus.java +++ b/api/src/main/java/com/intuit/tank/agent/models/VMStatus.java @@ -22,13 +22,18 @@ public enum VMStatus implements Serializable { terminated; public static final VMStatus fromString(String value) { - VMStatus ret = null; + if (value == null || value.isEmpty()) { + return VMStatus.unknown; + } if ("shutting-down".equals(value)) { - ret = VMStatus.shutting_down; - } else { - ret = VMStatus.valueOf(value); + return VMStatus.shutting_down; + } + try { + return VMStatus.valueOf(value); + } catch (IllegalArgumentException e) { + // Gracefully handle unknown values (e.g., 'replaced' from controller which agent doesn't need) + return VMStatus.unknown; } - return ret != null ? ret : VMStatus.unknown; } } diff --git a/api/src/main/java/com/intuit/tank/vm/vmManager/VMInformation.java b/api/src/main/java/com/intuit/tank/vm/vmManager/VMInformation.java index da175fbd5..ea5fd7e04 100644 --- a/api/src/main/java/com/intuit/tank/vm/vmManager/VMInformation.java +++ b/api/src/main/java/com/intuit/tank/vm/vmManager/VMInformation.java @@ -96,6 +96,44 @@ public String getPrivateDNS() { return (String) this.items.get("privateDns"); } + /** + * Set the virtual machine private IP address + * + * @param data + * The virtual machine's private IP address + */ + public void setPrivateIp(String data) { + this.items.put("privateIp", data); + } + + /** + * Get the virtual machine private IP address + * + * @return The virtual machine's private IP address + */ + public String getPrivateIp() { + return (String) this.items.get("privateIp"); + } + + /** + * Set the virtual machine public IP address + * + * @param data + * The virtual machine's public IP address + */ + public void setPublicIp(String data) { + this.items.put("publicIp", data); + } + + /** + * Get the virtual machine public IP address + * + * @return The virtual machine's public IP address + */ + public String getPublicIp() { + return (String) this.items.get("publicIp"); + } + public void setLaunchTime(Calendar data) { this.items.put("launchTime", data); } diff --git a/api/src/main/java/com/intuit/tank/vm/vmManager/models/VMStatus.java b/api/src/main/java/com/intuit/tank/vm/vmManager/models/VMStatus.java index 6d2b873f1..23f0a1c3a 100644 --- a/api/src/main/java/com/intuit/tank/vm/vmManager/models/VMStatus.java +++ b/api/src/main/java/com/intuit/tank/vm/vmManager/models/VMStatus.java @@ -26,16 +26,22 @@ public enum VMStatus implements Serializable { stopping, stopped, shutting_down, - terminated; + terminated, + replaced; // replaced by AgentWatchdog due to failure to report back public static final VMStatus fromString(String value) { - VMStatus ret = null; + if (value == null || value.isEmpty()) { + return VMStatus.unknown; + } if ("shutting-down".equals(value)) { - ret = VMStatus.shutting_down; - } else { - ret = VMStatus.valueOf(value); + return VMStatus.shutting_down; + } + try { + return VMStatus.valueOf(value); + } catch (IllegalArgumentException e) { + // Gracefully handle unknown values (e.g., from older/newer clients with different enum versions) + return VMStatus.unknown; } - return ret != null ? ret : VMStatus.unknown; } } diff --git a/api/src/test/java/com/intuit/tank/agent/models/VMStatusTest.java b/api/src/test/java/com/intuit/tank/agent/models/VMStatusTest.java new file mode 100644 index 000000000..5360a0ee5 --- /dev/null +++ b/api/src/test/java/com/intuit/tank/agent/models/VMStatusTest.java @@ -0,0 +1,75 @@ +package com.intuit.tank.agent.models; + +import org.junit.jupiter.api.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for the agent-side VMStatus enum. + * This enum is used by agents and doesn't include the 'replaced' status + * (which is only meaningful on the controller side). + */ +public class VMStatusTest { + + @Test + @DisplayName("fromString handles shutting-down special case") + public void testFromString_shuttingDown() { + VMStatus result = VMStatus.fromString("shutting-down"); + + assertNotNull(result); + assertEquals(VMStatus.shutting_down, result); + } + + @Test + @DisplayName("fromString returns correct enum for valid values") + public void testFromString_validValues() { + assertEquals(VMStatus.running, VMStatus.fromString("running")); + assertEquals(VMStatus.pending, VMStatus.fromString("pending")); + assertEquals(VMStatus.starting, VMStatus.fromString("starting")); + assertEquals(VMStatus.rebooting, VMStatus.fromString("rebooting")); + assertEquals(VMStatus.terminated, VMStatus.fromString("terminated")); + assertEquals(VMStatus.stopped, VMStatus.fromString("stopped")); + assertEquals(VMStatus.stopping, VMStatus.fromString("stopping")); + assertEquals(VMStatus.rampPaused, VMStatus.fromString("rampPaused")); + } + + @Test + @DisplayName("fromString returns unknown for null input") + public void testFromString_nullReturnsUnknown() { + VMStatus result = VMStatus.fromString(null); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } + + @Test + @DisplayName("fromString returns unknown for empty string") + public void testFromString_emptyReturnsUnknown() { + VMStatus result = VMStatus.fromString(""); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } + + @Test + @DisplayName("fromString returns unknown for unrecognized values") + public void testFromString_unknownValueReturnsUnknown() { + // Should not throw IllegalArgumentException - gracefully returns unknown + VMStatus result = VMStatus.fromString("garbage-value"); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } + + @Test + @DisplayName("fromString handles 'replaced' from controller gracefully (returns unknown)") + public void testFromString_replacedFromControllerReturnsUnknown() { + // The agent-side VMStatus doesn't have a 'replaced' enum value + // When the controller sends 'replaced', the agent should handle it gracefully + VMStatus result = VMStatus.fromString("replaced"); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } +} + diff --git a/api/src/test/java/com/intuit/tank/vm/vmManager/models/VMStatusTest.java b/api/src/test/java/com/intuit/tank/vm/vmManager/models/VMStatusTest.java index 117793759..66f250585 100644 --- a/api/src/test/java/com/intuit/tank/vm/vmManager/models/VMStatusTest.java +++ b/api/src/test/java/com/intuit/tank/vm/vmManager/models/VMStatusTest.java @@ -19,50 +19,85 @@ /** * The class VMStatusTest contains tests for the class {@link VMStatus}. - * - * @generatedBy CodePro at 12/15/14 2:57 PM */ public class VMStatusTest { - /** - * Run the VMStatus fromString(String) method test. - * - * @throws Exception - * - * @generatedBy CodePro at 12/15/14 2:57 PM - */ + @Test - @Disabled - public void testFromString_1() - throws Exception { - String value = "shutting-down"; + @DisplayName("fromString handles shutting-down special case") + public void testFromString_shuttingDown() { + VMStatus result = VMStatus.fromString("shutting-down"); + + assertNotNull(result); + assertEquals(VMStatus.shutting_down, result); + } - VMStatus result = VMStatus.fromString(value); + @Test + @DisplayName("fromString returns correct enum for valid values") + public void testFromString_validValues() { + assertEquals(VMStatus.running, VMStatus.fromString("running")); + assertEquals(VMStatus.pending, VMStatus.fromString("pending")); + assertEquals(VMStatus.starting, VMStatus.fromString("starting")); + assertEquals(VMStatus.ready, VMStatus.fromString("ready")); + assertEquals(VMStatus.rebooting, VMStatus.fromString("rebooting")); + assertEquals(VMStatus.terminated, VMStatus.fromString("terminated")); + assertEquals(VMStatus.stopped, VMStatus.fromString("stopped")); + assertEquals(VMStatus.stopping, VMStatus.fromString("stopping")); + assertEquals(VMStatus.rampPaused, VMStatus.fromString("rampPaused")); + } + @Test + @DisplayName("fromString returns replaced for 'replaced' value") + public void testFromString_replaced() { + VMStatus result = VMStatus.fromString("replaced"); + assertNotNull(result); - assertEquals("shutting_down", result.name()); - assertEquals("shutting_down", result.toString()); - assertEquals(6, result.ordinal()); + assertEquals(VMStatus.replaced, result); } - /** - * Run the VMStatus fromString(String) method test. - * - * @throws Exception - * - * @generatedBy CodePro at 12/15/14 2:57 PM - */ @Test - public void testFromString_2() - throws Exception { - String value = VMStatus.rebooting.name(); + @DisplayName("fromString returns unknown for null input") + public void testFromString_nullReturnsUnknown() { + VMStatus result = VMStatus.fromString(null); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } - VMStatus result = VMStatus.fromString(value); + @Test + @DisplayName("fromString returns unknown for empty string") + public void testFromString_emptyReturnsUnknown() { + VMStatus result = VMStatus.fromString(""); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } - // An unexpected exception was thrown in user code while executing this test: - // java.lang.IllegalArgumentException: No enum constant com.intuit.tank.vm.vmManager.models.VMStatus. - // at java.lang.Enum.valueOf(Enum.java:238) - // at com.intuit.tank.vm.vmManager.models.VMStatus.valueOf(VMStatus.java:5) - // at com.intuit.tank.vm.vmManager.models.VMStatus.fromString(VMStatus.java:20) + @Test + @DisplayName("fromString returns unknown for unrecognized values (graceful handling)") + public void testFromString_unknownValueReturnsUnknown() { + // Should not throw IllegalArgumentException - gracefully returns unknown + VMStatus result = VMStatus.fromString("garbage-value"); + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } + + @Test + @DisplayName("fromString handles future/unknown enum values gracefully") + public void testFromString_futureCompatibility() { + // Simulates receiving a value from a newer version of the API + VMStatus result = VMStatus.fromString("some_new_status_from_future"); + + assertNotNull(result); + assertEquals(VMStatus.unknown, result); + } + + @Test + @DisplayName("replaced is a terminal state (for documentation)") + public void testReplaced_isTerminalState() { + // This test documents that 'replaced' is intended as a terminal state + // like 'terminated', used by AgentWatchdog when replacing failed agents + assertNotNull(VMStatus.replaced); + assertEquals("replaced", VMStatus.replaced.name()); } } \ No newline at end of file diff --git a/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSender.java b/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSender.java index 5aeaf0852..9eecb9ec3 100644 --- a/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSender.java +++ b/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSender.java @@ -247,7 +247,18 @@ private List getInstancesForJob(String jobId) { List instanceIds = new ArrayList(); CloudVmStatusContainer statuses = vmTracker.getVmStatusForJob(jobId); if (statuses != null) { - instanceIds = statuses.getStatuses().stream().map(CloudVmStatus::getInstanceId).collect(Collectors.toList()); + instanceIds = statuses.getStatuses().stream() + .filter(s -> { + VMStatus vmStatus = s.getVmStatus(); + // skip unreachable instances - they can't receive commands + return vmStatus != VMStatus.terminated && + vmStatus != VMStatus.replaced && + vmStatus != VMStatus.stopped && + vmStatus != VMStatus.shutting_down && + vmStatus != VMStatus.stopping; + }) + .map(CloudVmStatus::getInstanceId) + .collect(Collectors.toList()); } return instanceIds; } @@ -260,8 +271,10 @@ public CloudVmStatus getVmStatus(String instanceId) { public void setVmStatus(final String instanceId, final CloudVmStatus status) { vmTracker.setStatus(status); - if (status.getJobStatus() == JobStatus.Completed || status.getVmStatus() == VMStatus.terminated) { - // will terrminate instance after waiting for some cleanup time + if (status.getJobStatus() == JobStatus.Completed + || status.getVmStatus() == VMStatus.terminated + || status.getVmStatus() == VMStatus.replaced) { + // will terminate instance after waiting for some cleanup time terminator.terminate(status.getInstanceId()); // check job status and kill off instances appropriately checkJobStatus(status.getJobId()); diff --git a/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/services/agent/AgentServiceV2Impl.java b/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/services/agent/AgentServiceV2Impl.java index beaebe37a..32dbde6de 100644 --- a/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/services/agent/AgentServiceV2Impl.java +++ b/rest-mvc/impl/src/main/java/com/intuit/tank/rest/mvc/rest/services/agent/AgentServiceV2Impl.java @@ -220,6 +220,11 @@ public void setInstanceStatus(String instanceId, CloudVmStatus status) { segment.putAnnotation("currentUsers", status.getCurrentUsers()); segment.putAnnotation("TotalUsers", status.getTotalUsers()); segment.putAnnotation("totalTps", status.getTotalTps()); + + LOGGER.debug("Agent " + instanceId + " reporting status - VMStatus: " + status.getVmStatus() + + ", JobStatus: " + status.getJobStatus() + ", Users: " + status.getCurrentUsers() + + "/" + status.getTotalUsers() + ", Job: " + status.getJobId()); + try { JobEventSender controller = new ServletInjector().getManagedBean( servletContext, JobEventSender.class); diff --git a/rest-mvc/impl/src/test/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSenderTest.java b/rest-mvc/impl/src/test/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSenderTest.java new file mode 100644 index 000000000..a6576e4aa --- /dev/null +++ b/rest-mvc/impl/src/test/java/com/intuit/tank/rest/mvc/rest/cloud/JobEventSenderTest.java @@ -0,0 +1,285 @@ +package com.intuit.tank.rest.mvc.rest.cloud; + +import com.intuit.tank.vm.vmManager.VMTracker; +import com.intuit.tank.vm.vmManager.models.CloudVmStatus; +import com.intuit.tank.vm.vmManager.models.CloudVmStatusContainer; +import com.intuit.tank.vm.vmManager.models.VMStatus; +import com.intuit.tank.vm.vmManager.models.ValidationStatus; +import com.intuit.tank.vm.api.enumerated.JobStatus; +import com.intuit.tank.vm.api.enumerated.VMImageType; +import com.intuit.tank.vm.api.enumerated.VMRegion; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.lang.reflect.Method; +import java.util.Date; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +/** + * Unit tests for JobEventSender + */ +@ExtendWith(MockitoExtension.class) +public class JobEventSenderTest { + + @Mock + private VMTracker vmTracker; + + @InjectMocks + private JobEventSender jobEventSender; + + private CloudVmStatusContainer container; + + @BeforeEach + void setUp() { + container = new CloudVmStatusContainer(); + container.setJobId("123"); + } + + // Helper method to create CloudVmStatus + private CloudVmStatus createStatus(String instanceId, JobStatus jobStatus, VMStatus vmStatus) { + return new CloudVmStatus( + instanceId, + "123", + "sg-test", + jobStatus, + VMImageType.AGENT, + VMRegion.US_WEST_2, + vmStatus, + new ValidationStatus(), + 100, + 50, + new Date(), + null + ); + } + + /** + * Use reflection to test the private getInstancesForJob method. + */ + private List invokeGetInstancesForJob(String jobId) throws Exception { + Method method = JobEventSender.class.getDeclaredMethod("getInstancesForJob", String.class); + method.setAccessible(true); + @SuppressWarnings("unchecked") + List result = (List) method.invoke(jobEventSender, jobId); + return result; + } + + @Test + @DisplayName("getInstancesForJob excludes terminated instances") + void getInstancesForJob_excludesTerminatedInstances() throws Exception { + // Given: Mix of running and terminated instances + container.getStatuses().add(createStatus("i-running1", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-terminated", JobStatus.Stopped, VMStatus.terminated)); + container.getStatuses().add(createStatus("i-running2", JobStatus.Running, VMStatus.running)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then: Only running instances should be returned + assertEquals(2, instances.size()); + assertTrue(instances.contains("i-running1")); + assertTrue(instances.contains("i-running2")); + assertFalse(instances.contains("i-terminated")); + } + + @Test + @DisplayName("getInstancesForJob excludes stopped instances") + void getInstancesForJob_excludesStoppedInstances() throws Exception { + // Given + container.getStatuses().add(createStatus("i-running", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-stopped", JobStatus.Stopped, VMStatus.stopped)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-running")); + assertFalse(instances.contains("i-stopped")); + } + + @Test + @DisplayName("getInstancesForJob excludes stopping instances") + void getInstancesForJob_excludesStoppingInstances() throws Exception { + // Given + container.getStatuses().add(createStatus("i-running", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-stopping", JobStatus.Stopped, VMStatus.stopping)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-running")); + assertFalse(instances.contains("i-stopping")); + } + + @Test + @DisplayName("getInstancesForJob excludes shutting_down instances") + void getInstancesForJob_excludesShuttingDownInstances() throws Exception { + // Given + container.getStatuses().add(createStatus("i-running", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-shuttingdown", JobStatus.Stopped, VMStatus.shutting_down)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-running")); + assertFalse(instances.contains("i-shuttingdown")); + } + + @Test + @DisplayName("getInstancesForJob includes pending instances (agents starting)") + void getInstancesForJob_includesPendingInstances() throws Exception { + // Given: Pending instances are still reachable + container.getStatuses().add(createStatus("i-pending", JobStatus.Starting, VMStatus.pending)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then: Pending instances should be included + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-pending")); + } + + @Test + @DisplayName("getInstancesForJob includes starting instances") + void getInstancesForJob_includesStartingInstances() throws Exception { + // Given + container.getStatuses().add(createStatus("i-starting", JobStatus.Starting, VMStatus.starting)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-starting")); + } + + @Test + @DisplayName("getInstancesForJob returns empty list for non-existent job") + void getInstancesForJob_returnsEmptyForNonExistent() throws Exception { + // Given + when(vmTracker.getVmStatusForJob("999")).thenReturn(null); + + // When + List instances = invokeGetInstancesForJob("999"); + + // Then + assertNotNull(instances); + assertTrue(instances.isEmpty()); + } + + @Test + @DisplayName("getInstancesForJob excludes replaced instances (AgentWatchdog replacements)") + void getInstancesForJob_excludesReplacedInstances() throws Exception { + // Given: VMStatus.replaced is set by AgentWatchdog when an agent fails and is replaced + container.getStatuses().add(createStatus("i-running", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-replaced", JobStatus.Starting, VMStatus.replaced)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then: Replaced instances should be filtered out (can't receive commands) + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-running")); + assertFalse(instances.contains("i-replaced")); + } + + @Test + @DisplayName("getInstancesForJob filters correctly with mixed statuses from AgentWatchdog scenario") + void getInstancesForJob_agentWatchdogScenario() throws Exception { + // Given: Real-world scenario where AgentWatchdog replaced some agents + // - 3 running agents (healthy) + // - 2 replaced agents (marked by watchdog with VMStatus.replaced) + container.getStatuses().add(createStatus("i-healthy1", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-healthy2", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-healthy3", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-replaced1", JobStatus.Starting, VMStatus.replaced)); + container.getStatuses().add(createStatus("i-replaced2", JobStatus.Starting, VMStatus.replaced)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When: User tries to kill the job + List instances = invokeGetInstancesForJob("123"); + + // Then: Only healthy instances should receive the kill command + assertEquals(3, instances.size()); + assertTrue(instances.contains("i-healthy1")); + assertTrue(instances.contains("i-healthy2")); + assertTrue(instances.contains("i-healthy3")); + assertFalse(instances.contains("i-replaced1")); + assertFalse(instances.contains("i-replaced2")); + } + + @Test + @DisplayName("getInstancesForJob excludes all terminal states correctly") + void getInstancesForJob_excludesAllTerminalStates() throws Exception { + // Given: One instance of each terminal state + one running + container.getStatuses().add(createStatus("i-running", JobStatus.Running, VMStatus.running)); + container.getStatuses().add(createStatus("i-terminated", JobStatus.Completed, VMStatus.terminated)); + container.getStatuses().add(createStatus("i-replaced", JobStatus.Starting, VMStatus.replaced)); + container.getStatuses().add(createStatus("i-stopped", JobStatus.Stopped, VMStatus.stopped)); + container.getStatuses().add(createStatus("i-stopping", JobStatus.Stopped, VMStatus.stopping)); + container.getStatuses().add(createStatus("i-shutting-down", JobStatus.Stopped, VMStatus.shutting_down)); + + when(vmTracker.getVmStatusForJob("123")).thenReturn(container); + + // When + List instances = invokeGetInstancesForJob("123"); + + // Then: Only the running instance should be included + assertEquals(1, instances.size()); + assertTrue(instances.contains("i-running")); + } + + @Test + @DisplayName("getVmStatus delegates to vmTracker") + void getVmStatus_delegatesToVmTracker() { + String instanceId = "i-test123"; + CloudVmStatus expectedStatus = createStatus(instanceId, JobStatus.Running, VMStatus.running); + when(vmTracker.getStatus(instanceId)).thenReturn(expectedStatus); + + CloudVmStatus result = jobEventSender.getVmStatus(instanceId); + + assertEquals(expectedStatus, result); + verify(vmTracker).getStatus(instanceId); + } + + @Test + @DisplayName("getVmStatusForJob delegates to vmTracker") + void getVmStatusForJob_delegatesToVmTracker() { + String jobId = "123"; + when(vmTracker.getVmStatusForJob(jobId)).thenReturn(container); + + CloudVmStatusContainer result = jobEventSender.getVmStatusForJob(jobId); + + assertEquals(container, result); + verify(vmTracker).getVmStatusForJob(jobId); + } +} + diff --git a/tank_vmManager/src/main/java/com/intuit/tank/perfManager/workLoads/JobManager.java b/tank_vmManager/src/main/java/com/intuit/tank/perfManager/workLoads/JobManager.java index 887fa39b0..ef6bd5e9b 100644 --- a/tank_vmManager/src/main/java/com/intuit/tank/perfManager/workLoads/JobManager.java +++ b/tank_vmManager/src/main/java/com/intuit/tank/perfManager/workLoads/JobManager.java @@ -172,12 +172,17 @@ public AgentTestStartData registerAgentForJob(AgentData agentData) { ret.setUserIntervalIncrement(jobInfo.jobRequest.getUserIntervalIncrement()); ret.setTargetRampRate(jobInfo.jobRequest.getTargetRatePerAgent()); jobInfo.agentData.add(agentData); + LOG.info(new ObjectMessage(Map.of("Message", "Agent " + agentData.getInstanceId() + + " added to job " + agentData.getJobId() + ". Total agents now: " + jobInfo.agentData.size() + + "/" + jobInfo.numberOfMachines + ", isFilled: " + jobInfo.isFilled()))); CloudVmStatus status = vmTracker.getStatus(agentData.getInstanceId()); if(status != null) { status.setVmStatus(VMStatus.pending); vmTracker.setStatus(status); } if (jobInfo.isFilled()) { + LOG.info(new ObjectMessage(Map.of("Message", "All " + jobInfo.numberOfMachines + + " agents registered for job " + agentData.getJobId() + " - starting test thread"))); new Thread( () -> { startTest(jobInfo); }).start(); } } @@ -209,8 +214,14 @@ private void startTest(final JobInfo info) { LOG.info(new ObjectMessage(Map.of("Message", "Start agents command received - Sending start commands for job " + jobId + " asynchronously to following agents: " + info.agentData.stream().collect(Collectors.toMap(AgentData::getInstanceId, AgentData::getInstanceUrl))))); } + LOG.info(new ObjectMessage(Map.of("Message", "Sending START commands to " + info.agentData.size() + + " agents for job " + jobId))); info.agentData.parallelStream() - .map(agentData -> agentData.getInstanceUrl() + AgentCommand.start.getPath()) + .map(agentData -> { + String url = agentData.getInstanceUrl() + AgentCommand.start.getPath(); + LOG.info(new ObjectMessage(Map.of("Message", "Sending command to url " + url))); + return url; + }) .map(URI::create) .map(uri -> sendCommand(uri, MAX_RETRIES)) .forEach(future -> { diff --git a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/AgentWatchdog.java b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/AgentWatchdog.java index 1bc20e99a..d0290e658 100644 --- a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/AgentWatchdog.java +++ b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/AgentWatchdog.java @@ -222,25 +222,44 @@ private void relaunch(ArrayList instances) { VMImageDao dao = new VMImageDao(); for (VMInformation info : instances) { vmInfo.remove(info); - vmTracker.setStatus(createTerminatedVmStatus(info)); + + // Update status to 'replaced' in the tracker (keeps visible in UI, but filtered out of calculations) + CloudVmStatus replacedStatus = vmTracker.getStatus(info.getInstanceId()); + if (replacedStatus != null) { + replacedStatus.setVmStatus(VMStatus.replaced); + vmTracker.setStatus(replacedStatus); + } + + // Also update in the database for persistence VMInstance image = dao.getImageByInstanceId(info.getInstanceId()); if (image != null) { - image.setStatus(VMStatus.terminated.name()); + image.setStatus(VMStatus.replaced.name()); // mark as replaced (not terminated) for audit trail dao.saveOrUpdate(image); } + + LOG.info(new ObjectMessage(Map.of("Message", + "Marked instance " + info.getInstanceId() + + " as REPLACED (visible in UI but filtered from job status) for job " + jobId))); } LOG.info(new ObjectMessage(Map.of("Message","Setting number of instances to relaunch to: " + instances.size() + " for job " + jobId))); instanceRequest.setNumberOfInstances(instances.size()); instances.clear(); // Create and send instance start request List newVms = amazonInstance.create(instanceRequest); - // Add new instances + // Add new instances - set to 'starting' status so watchdog waits for actual /v2/agent/ready call for (VMInformation newInfo : newVms) { vmInfo.add(newInfo); - // Add directly to started instances since these are restarted from scratch + // Add to startedInstances - watchdog will wait for this agent to actually report startedInstances.add(newInfo); - vmTracker.setStatus(createCloudStatus(instanceRequest, newInfo)); - LOG.info(new ObjectMessage(Map.of("Message","Added image (" + newInfo.getInstanceId() + ") to VMImage table for job " + jobId))); + CloudVmStatus newStatus = createCloudStatus(instanceRequest, newInfo); + vmTracker.setStatus(newStatus); + LOG.info(new ObjectMessage(Map.of( + "Message", "Created replacement agent with status " + newStatus.getVmStatus() + + " - watchdog will wait for /v2/agent/ready call", + "instanceId", newInfo.getInstanceId(), + "jobId", jobId, + "publicIp", newInfo.getPublicIp() != null ? newInfo.getPublicIp() : "N/A", + "privateIp", newInfo.getPrivateIp() != null ? newInfo.getPrivateIp() : "N/A"))); try { dao.addImageFromInfo(instanceRequest.getJobId(), newInfo, instanceRequest.getRegion()); @@ -254,21 +273,20 @@ private void relaunch(ArrayList instances) { } /** - * @param req - * @param info - * @return + * Creates initial cloud status for a newly launched replacement agent. + * CRITICAL: Must use VMStatus.starting (not pending) so watchdog waits for actual agent registration. + * + * Status flow: starting → (agent calls /v2/agent/ready) → pending → (receives START) → ready → running + * + * @param req the instance request + * @param info the VM information for the new instance + * @return CloudVmStatus with starting state */ private CloudVmStatus createCloudStatus(VMInstanceRequest req, VMInformation info) { return new CloudVmStatus(info.getInstanceId(), req.getJobId(), req.getInstanceDescription() != null ? req.getInstanceDescription().getSecurityGroup() : "unknown", JobStatus.Starting, - VMImageType.AGENT, req.getRegion(), VMStatus.pending, new ValidationStatus(), 0, 0, null, null); - } - - private CloudVmStatus createTerminatedVmStatus(VMInformation info) { - return new CloudVmStatus(info.getInstanceId(), instanceRequest.getJobId(), "unknown", - JobStatus.Stopped, VMImageType.AGENT, instanceRequest.getRegion(), - VMStatus.terminated, new ValidationStatus(), 0, 0, null, null); + VMImageType.AGENT, req.getRegion(), VMStatus.starting, new ValidationStatus(), 0, 0, null, null); } private boolean shouldRelaunchInstances() { diff --git a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/VMTrackerImpl.java b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/VMTrackerImpl.java index 6ccff1254..25efb07bb 100644 --- a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/VMTrackerImpl.java +++ b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/VMTrackerImpl.java @@ -19,9 +19,11 @@ import static com.intuit.tank.vm.common.TankConstants.NOTIFICATIONS_EVENT_EVENT_TIME_KEY; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; @@ -149,6 +151,7 @@ private void setStatusThread(@Nonnull final CloudVmStatus status) { synchronized (getCacheSyncObject(status.getJobId())) { status.setReportTime(new Date()); CloudVmStatus currentStatus = getStatus(status.getInstanceId()); + if (shouldUpdateStatus(currentStatus)) { statusMap.put(status.getInstanceId(), status); if (status.getVmStatus() == VMStatus.running @@ -157,6 +160,10 @@ private void setStatusThread(@Nonnull final CloudVmStatus status) { AmazonInstance amzInstance = new AmazonInstance(status.getVmRegion()); amzInstance.killInstances(Collections.singletonList(status.getInstanceId())); } + } else { + LOG.debug(new ObjectMessage(Map.of("Message", + "Skipping status update for instance " + status.getInstanceId() + + " - current status is " + (currentStatus != null ? currentStatus.getVmStatus() : "null")))); } String jobId = status.getJobId(); CloudVmStatusContainer cloudVmStatusContainer = jobMap.get(jobId); @@ -224,17 +231,27 @@ private JobQueueStatus getQueueStatus(JobQueueStatus oldStatus, JobStatus jobSta } /** - * If the vm is shutting down or terminated, don't update the status to something else. + * If the vm is shutting down, terminated, or replaced, don't update the status to something else. + * This prevents race conditions where a user kills an instance while watchdog is replacing it, + * or stale status updates arrive after an instance has already transitioned to a terminal state. * @param currentStatus * @return */ private boolean shouldUpdateStatus(CloudVmStatus currentStatus) { if (currentStatus != null) { VMStatus status = currentStatus.getVmStatus(); - return (status != VMStatus.shutting_down - && status != VMStatus.stopped - && status != VMStatus.stopping - && status != VMStatus.terminated); + boolean isTerminalState = (status == VMStatus.shutting_down + || status == VMStatus.stopped + || status == VMStatus.stopping + || status == VMStatus.terminated + || status == VMStatus.replaced); + if (isTerminalState) { + LOG.info(new ObjectMessage(Map.of("Message", + "Ignoring status update for instance " + currentStatus.getInstanceId() + + " - already in terminal state: " + status + + " (possible race between user kill and watchdog replace)"))); + return false; + } } return true; } @@ -245,7 +262,33 @@ private boolean shouldUpdateStatus(CloudVmStatus currentStatus) { */ @Override public void removeStatusForInstance(String instanceId) { - statusMap.remove(instanceId); + // First, get the status to find the jobId (without removing yet) + CloudVmStatus status = statusMap.get(instanceId); + + if (status != null) { + String jobId = status.getJobId(); + // Synchronize on the job lock to ensure atomic removal from both + // statusMap and container - prevents race with setStatusThread + synchronized (getCacheSyncObject(jobId)) { + // Now remove from statusMap inside the sync block + CloudVmStatus removedStatus = statusMap.remove(instanceId); + + // Also remove from the job's container to keep counts accurate + if (removedStatus != null) { + CloudVmStatusContainer container = jobMap.get(jobId); + if (container != null) { + boolean removed = container.getStatuses().remove(removedStatus); + if (removed) { + LOG.info(new ObjectMessage(Map.of("Message", + "Removed instance " + instanceId + " from container for job " + jobId))); + } + } + } + } + } else { + // Instance not in statusMap - just try to remove (no-op if not present) + statusMap.remove(instanceId); + } } /** @@ -254,12 +297,19 @@ public void removeStatusForInstance(String instanceId) { */ @Override public void removeStatusForJob(String jobId) { - CloudVmStatusContainer cloudVmStatusContainer = jobMap.get(jobId); - if (cloudVmStatusContainer != null) { - for (CloudVmStatus s : cloudVmStatusContainer.getStatuses()) { - removeStatusForInstance(s.getInstanceId()); + // Synchronize on the same lock used by setStatusThread to prevent + // ConcurrentModificationException when iterating over statuses + synchronized (getCacheSyncObject(jobId)) { + CloudVmStatusContainer cloudVmStatusContainer = jobMap.get(jobId); + if (cloudVmStatusContainer != null) { + // Copy to avoid ConcurrentModificationException - the underlying + // HashSet could be modified by setStatusThread while we iterate + List statusesCopy = new ArrayList<>(cloudVmStatusContainer.getStatuses()); + for (CloudVmStatus s : statusesCopy) { + removeStatusForInstance(s.getInstanceId()); + } + jobMap.remove(jobId); } - jobMap.remove(jobId); } } @@ -314,7 +364,21 @@ private void addStatusToJobContainer(CloudVmStatus status, CloudVmStatusContaine // look up the job JobInstance job = jobInstanceDao.get().findById(Integer.parseInt(status.getJobId())); - for (CloudVmStatus s : cloudVmStatusContainer.getStatuses()) { + + // Take a snapshot to avoid ConcurrentModificationException if another thread modifies + // the set while we iterate (defense in depth, even though we're synchronized) + Set statusesSnapshot = new HashSet<>(cloudVmStatusContainer.getStatuses()); + + int activeInstanceCount = 0; + for (CloudVmStatus s : statusesSnapshot) { + VMStatus vmStatus = s.getVmStatus(); + // skip replaced instances - these were replaced by AgentWatchdog due to failure + // but do NOT skip terminated/stopping/stopped/shutting_down - these are active agents in transition + if (vmStatus == VMStatus.replaced) { + continue; + } + activeInstanceCount++; + JobStatus jobStatus = s.getJobStatus(); if (jobStatus != JobStatus.Completed) { // If no VMs are Completed isFinished = false; @@ -332,6 +396,21 @@ private void addStatusToJobContainer(CloudVmStatus status, CloudVmStatusContaine running = false; } } + + // if all instances are replaced and replacements haven't reported yet, + // don't change job status - wait for active agents to report + if (activeInstanceCount == 0) { + LOG.info(new ObjectMessage(Map.of("Message", + "No active instances for job " + status.getJobId() + + " (all replaced or empty) - skipping status calculation until replacements report"))); + return; + } + + LOG.debug(new ObjectMessage(Map.of("Message", + "Status calc complete for job " + status.getJobId() + + " - isFinished=" + isFinished + ", paused=" + paused + + ", rampPaused=" + rampPaused + ", stopped=" + stopped + ", running=" + running))); + if (isFinished) { LOG.info(new ObjectMessage(Map.of("Message","Setting end time on container " + cloudVmStatusContainer.getJobId()))); if (cloudVmStatusContainer.getEndTime() == null) { @@ -343,6 +422,7 @@ private void addStatusToJobContainer(CloudVmStatus status, CloudVmStatusContaine } if (job != null) { job.setEndTime(cloudVmStatusContainer.getEndTime()); + JobQueueStatus oldStatus = job.getStatus(); JobQueueStatus newStatus = job.getStatus(); if (isFinished) { newStatus = JobQueueStatus.Completed; @@ -361,6 +441,12 @@ private void addStatusToJobContainer(CloudVmStatus status, CloudVmStatusContaine } } + if (oldStatus != newStatus) { + LOG.info(new ObjectMessage(Map.of("Message", + "Job " + status.getJobId() + " status transition: " + oldStatus + " -> " + newStatus + + " (isFinished=" + isFinished + ", paused=" + paused + ", rampPaused=" + rampPaused + + ", stopped=" + stopped + ", running=" + running + ")"))); + } LOG.trace("Setting Container for job=" + status.getJobId() + " newStatus to " + newStatus); job.setStatus(newStatus); jobInstanceDao.get().saveOrUpdate(job); diff --git a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/environment/JobRequest.java b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/environment/JobRequest.java index 105790a43..e8a189d6f 100644 --- a/tank_vmManager/src/main/java/com/intuit/tank/vmManager/environment/JobRequest.java +++ b/tank_vmManager/src/main/java/com/intuit/tank/vmManager/environment/JobRequest.java @@ -89,7 +89,12 @@ private void persistInstances(VMInstanceRequest instanceRequest, List create(VMRequest request) { // reboot(result); } + // Wait for instances to get public IPs assigned and refresh the details + if (!result.isEmpty()) { + try { + // Brief wait to allow AWS to assign public IPs + Thread.sleep(3000); + List instanceIds = result.stream() + .map(VMInformation::getInstanceId) + .collect(Collectors.toList()); + CompletableFuture future = ec2AsyncClient.describeInstances( + DescribeInstancesRequest.builder().instanceIds(instanceIds).build()); + if (future != null) { + DescribeInstancesResponse described = future.get(); + // Update result with fresh instance data that includes public IPs + List updated = described.reservations().stream() + .flatMap(reservation -> reservation.instances().stream() + .map(instance -> AmazonDataConverter.instanceToVmInformation( + reservation.requesterId(), instance, vmRegion))) + .collect(Collectors.toList()); + result.clear(); + result.addAll(updated); + LOG.debug("Refreshed {} instance details with public IP information", updated.size()); + } + } catch (InterruptedException e) { + LOG.warn("Interrupted while waiting for public IP assignment", e); + Thread.currentThread().interrupt(); + } catch (ExecutionException e) { + LOG.warn("Failed to refresh instance details for public IPs: {}", e.getMessage()); + } + } + } catch (SdkException ae) { LOG.error("Amazon issue starting instances: {} : {}", vmRegion, ae.getMessage(), ae); throw new RuntimeException(ae); diff --git a/tank_vmManager/src/test/java/com/intuit/tank/vmManager/AgentWatchdogTest.java b/tank_vmManager/src/test/java/com/intuit/tank/vmManager/AgentWatchdogTest.java index e43775931..82f714472 100644 --- a/tank_vmManager/src/test/java/com/intuit/tank/vmManager/AgentWatchdogTest.java +++ b/tank_vmManager/src/test/java/com/intuit/tank/vmManager/AgentWatchdogTest.java @@ -17,6 +17,7 @@ import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -98,4 +99,117 @@ public void progressToPendingRunTest(@Mock VMTracker vmTrackerMock, @Mock CloudV verify(cloudVmStatusContainerMock, times(2)).getEndTime(); verify(cloudVmStatusContainerMock, times(2)).getStatuses(); } + + /** + * CRITICAL TEST: Verifies that replacement agents are created with VMStatus.starting, + * NOT VMStatus.pending. This is essential because: + * + * - checkForReportingInstances() considers 'pending' as "agent has reported" + * - If replacements start with 'pending', they're immediately moved to reportedInstances + * - This causes watchdog to exit before agents actually call /v2/agent/ready + * - JobManager then waits forever for registrations that never come + * + * See: https://github.intuit.com/user/Tank/issues/XXX (if applicable) + */ + @Test + public void createCloudStatus_usesStartingNotPending() throws Exception { + VMTracker vmTracker = new VMTrackerImpl(); + VMInstanceRequest instanceRequest = new VMInstanceRequest(); + instanceRequest.setJobId("test-job-123"); + instanceRequest.setRegion(VMRegion.STANDALONE); + List vmInfo = new ArrayList<>(); + + AgentWatchdog agentWatchdog = new AgentWatchdog(instanceRequest, vmInfo, vmTracker); + + // Use reflection to access private createCloudStatus method + Method createCloudStatus = AgentWatchdog.class.getDeclaredMethod( + "createCloudStatus", VMInstanceRequest.class, VMInformation.class); + createCloudStatus.setAccessible(true); + + VMInformation vmInformation = new VMInformation(); + vmInformation.setInstanceId("i-replacement-test"); + + CloudVmStatus result = (CloudVmStatus) createCloudStatus.invoke( + agentWatchdog, instanceRequest, vmInformation); + + // THE CRITICAL ASSERTION: Must be 'starting', not 'pending' + assertEquals(VMStatus.starting, result.getVmStatus(), + "Replacement agents MUST start with VMStatus.starting so watchdog waits for actual /v2/agent/ready call. " + + "Using 'pending' causes watchdog to immediately consider them as 'reported' and exit prematurely."); + + // Verify other fields are correct + assertEquals("test-job-123", result.getJobId()); + assertEquals("i-replacement-test", result.getInstanceId()); + assertEquals(JobStatus.Starting, result.getJobStatus()); + } + + /** + * Verifies the checkForReportingInstances() logic only considers 'pending' agents as reported. + * This test ensures that 'starting' agents are NOT moved to reportedInstances. + */ + @Test + public void checkForReportingInstances_onlyMovesPendingAgents( + @Mock VMTracker vmTrackerMock, + @Mock CloudVmStatusContainer cloudVmStatusContainerMock) { + + // Setup: Two agents - one starting (should wait), one pending (should be reported) + when(cloudVmStatusContainerMock.getEndTime()).thenReturn(null); + + CloudVmStatus vmstatusStarting = new CloudVmStatus( + "i-still-starting", "123", "sg-123456", + JobStatus.Starting, VMImageType.AGENT, VMRegion.STANDALONE, + VMStatus.starting, new ValidationStatus(), 1, 1, new Date(), new Date()); + + CloudVmStatus vmstatusPending = new CloudVmStatus( + "i-reported", "123", "sg-123456", + JobStatus.Starting, VMImageType.AGENT, VMRegion.STANDALONE, + VMStatus.pending, new ValidationStatus(), 1, 1, new Date(), new Date()); + + Set statuses = new HashSet<>(); + statuses.add(vmstatusStarting); + statuses.add(vmstatusPending); + when(cloudVmStatusContainerMock.getStatuses()).thenReturn(statuses); + when(vmTrackerMock.getVmStatusForJob(null)).thenReturn(cloudVmStatusContainerMock); + + // Create VMInformation for both agents + VMInformation vmInfoStarting = new VMInformation(); + vmInfoStarting.setState("pending"); + vmInfoStarting.setInstanceId("i-still-starting"); + + VMInformation vmInfoPending = new VMInformation(); + vmInfoPending.setState("pending"); + vmInfoPending.setInstanceId("i-reported"); + + List vmInfo = new ArrayList<>(); + vmInfo.add(vmInfoStarting); + vmInfo.add(vmInfoPending); + + VMInstanceRequest instanceRequest = new VMInstanceRequest(); + instanceRequest.setRegion(VMRegion.STANDALONE); + + // Short timeout to let it check once then timeout waiting for i-still-starting + // maxWaitForResponse=1 means it will immediately try to relaunch + // But since we're not mocking amazonInstance.create, it should fail + // and we just verify the initial check behavior + AgentWatchdog agentWatchdog = new AgentWatchdog( + instanceRequest, vmInfo, vmTrackerMock, amazonInstanceMock, 10, 50); + + // Run in a way that only checks once - the job will "appear stopped" after first check + when(cloudVmStatusContainerMock.getEndTime()) + .thenReturn(null) // First check + .thenReturn(new Date()); // Second check - job appears stopped, exit + + try { + agentWatchdog.run(); + } catch (RuntimeException e) { + // Expected - job appears stopped + } + + // After ONE check: + // - i-reported (pending) should be in reportedInstances + // - i-still-starting (starting) should still be in startedInstances + // The key assertion is that the watchdog did NOT immediately exit + // because i-still-starting is still 'starting' not 'pending' + verify(cloudVmStatusContainerMock, atLeast(1)).getStatuses(); + } } diff --git a/tank_vmManager/src/test/java/com/intuit/tank/vmManager/VMTrackerImplTest.java b/tank_vmManager/src/test/java/com/intuit/tank/vmManager/VMTrackerImplTest.java new file mode 100644 index 000000000..6849cfeb0 --- /dev/null +++ b/tank_vmManager/src/test/java/com/intuit/tank/vmManager/VMTrackerImplTest.java @@ -0,0 +1,528 @@ +package com.intuit.tank.vmManager; + +import com.intuit.tank.vm.vmManager.models.CloudVmStatus; +import com.intuit.tank.vm.vmManager.models.CloudVmStatusContainer; +import com.intuit.tank.vm.vmManager.models.VMStatus; +import com.intuit.tank.vm.vmManager.models.ValidationStatus; +import com.intuit.tank.vm.api.enumerated.JobStatus; +import com.intuit.tank.vm.api.enumerated.VMImageType; +import com.intuit.tank.vm.api.enumerated.VMRegion; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.RepeatedTest; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for VMTrackerImpl, focusing on the bug fix for terminated agents + * blocking job status transitions and the VMStatus.replaced handling. + */ +public class VMTrackerImplTest { + + private VMTrackerImpl vmTracker; + + @BeforeEach + void setUp() { + vmTracker = new VMTrackerImpl(); + } + + // ============ Basic functionality tests ============ + + @Test + @DisplayName("getStatus returns null for non-existent instance") + void getStatus_returnsNullForNonExistent() { + assertNull(vmTracker.getStatus("non-existent-instance")); + } + + @Test + @DisplayName("removeStatusForInstance handles non-existent instance gracefully") + void removeStatusForInstance_handlesNonExistent() { + String instanceId = "i-test123"; + + // First verify getStatus returns null initially + assertNull(vmTracker.getStatus(instanceId)); + + // Call removeStatusForInstance - should handle non-existent gracefully (no exception) + vmTracker.removeStatusForInstance(instanceId); + + // Should still be null (no exception thrown) + assertNull(vmTracker.getStatus(instanceId)); + } + + @Test + @DisplayName("getVmStatusForJob returns null for non-existent job") + void getVmStatusForJob_returnsNullForNonExistent() { + assertNull(vmTracker.getVmStatusForJob("non-existent-job")); + } + + @Test + @DisplayName("getAllJobs returns empty set initially") + void getAllJobs_returnsEmptySetInitially() { + Set jobs = vmTracker.getAllJobs(); + assertNotNull(jobs); + assertTrue(jobs.isEmpty()); + } + + @Test + @DisplayName("stopJob marks job as stopped") + void stopJob_marksJobAsStopped() { + String jobId = "123"; + + // Initially running + assertTrue(vmTracker.isRunning(jobId)); + + // Stop the job + vmTracker.stopJob(jobId); + + // Now stopped + assertFalse(vmTracker.isRunning(jobId)); + } + + @Test + @DisplayName("isRunning returns true for jobs not explicitly stopped") + void isRunning_returnsTrueByDefault() { + assertTrue(vmTracker.isRunning("any-job-id")); + } + + @Test + @DisplayName("removeStatusForJob handles non-existent job gracefully") + void removeStatusForJob_handlesNonExistent() { + // Should not throw + vmTracker.removeStatusForJob("non-existent-job"); + + // Verify state is still consistent + assertNull(vmTracker.getVmStatusForJob("non-existent-job")); + } + + @Test + @DisplayName("isDevMode returns false by default") + void isDevMode_returnsFalseByDefault() { + // The devMode is set based on TankConfig which defaults to false + // unless explicitly configured as standalone + assertFalse(vmTracker.isDevMode()); + } + + @Test + @DisplayName("getProjectStatusContainer returns null for non-existent project") + void getProjectStatusContainer_returnsNullForNonExistent() { + assertNull(vmTracker.getProjectStatusContainer("non-existent-project")); + } + + // ============ shouldUpdateStatus tests (terminal state handling) ============ + + /** + * Use reflection to test the private shouldUpdateStatus method. + */ + private boolean invokeShouldUpdateStatus(CloudVmStatus status) throws Exception { + Method method = VMTrackerImpl.class.getDeclaredMethod("shouldUpdateStatus", CloudVmStatus.class); + method.setAccessible(true); + return (boolean) method.invoke(vmTracker, status); + } + + private CloudVmStatus createStatus(String instanceId, VMStatus vmStatus) { + return new CloudVmStatus( + instanceId, + "123", + "sg-test", + JobStatus.Starting, + VMImageType.AGENT, + VMRegion.US_WEST_2, + vmStatus, + new ValidationStatus(), + 100, + 50, + new Date(), + null + ); + } + + @Test + @DisplayName("shouldUpdateStatus returns true for null current status") + void shouldUpdateStatus_nullCurrentStatus_returnsTrue() throws Exception { + assertTrue(invokeShouldUpdateStatus(null)); + } + + @ParameterizedTest + @EnumSource(value = VMStatus.class, names = {"terminated", "replaced", "stopped", "stopping", "shutting_down"}) + @DisplayName("shouldUpdateStatus returns false for terminal states") + void shouldUpdateStatus_terminalStates_returnsFalse(VMStatus terminalStatus) throws Exception { + CloudVmStatus status = createStatus("i-123", terminalStatus); + assertFalse(invokeShouldUpdateStatus(status), + "Should reject updates for terminal state: " + terminalStatus); + } + + @ParameterizedTest + @EnumSource(value = VMStatus.class, names = {"unknown", "starting", "pending", "ready", "running", "rampPaused", "rebooting"}) + @DisplayName("shouldUpdateStatus returns true for active states") + void shouldUpdateStatus_activeStates_returnsTrue(VMStatus activeStatus) throws Exception { + CloudVmStatus status = createStatus("i-123", activeStatus); + assertTrue(invokeShouldUpdateStatus(status), + "Should allow updates for active state: " + activeStatus); + } + + @Test + @DisplayName("shouldUpdateStatus blocks update for replaced instance (AgentWatchdog scenario)") + void shouldUpdateStatus_replacedInstance_blocksUpdate() throws Exception { + // Given: An instance that was replaced by AgentWatchdog + CloudVmStatus replacedStatus = createStatus("i-replaced", VMStatus.replaced); + + // When/Then: Updates should be blocked + assertFalse(invokeShouldUpdateStatus(replacedStatus), + "Replaced instances should not accept status updates (race condition guard)"); + } + + // ============ VMStatus.replaced integration tests ============ + + @Test + @DisplayName("replaced status is recognized as a valid VMStatus") + void replacedStatus_isValidEnum() { + assertNotNull(VMStatus.replaced); + assertEquals("replaced", VMStatus.replaced.name()); + } + + @Test + @DisplayName("setStatus respects terminal state guard for replaced instances") + void setStatus_respectsTerminalGuard_forReplaced() throws Exception { + // Note: This is a partial test - full integration requires mocked dependencies + // The key behavior is tested via shouldUpdateStatus tests above + + // Verify the enum value exists and can be used + CloudVmStatus status = createStatus("i-test", VMStatus.replaced); + assertEquals(VMStatus.replaced, status.getVmStatus()); + } + + // ============ Status calculation tests (using reflection to bypass CDI dependencies) ============ + + private CloudVmStatus createStatusWithJobStatus(String instanceId, String jobId, VMStatus vmStatus, JobStatus jobStatus) { + return new CloudVmStatus( + instanceId, + jobId, + "sg-test", + jobStatus, + VMImageType.AGENT, + VMRegion.US_WEST_2, + vmStatus, + new ValidationStatus(), + 100, + 50, + new Date(), + null + ); + } + + /** + * Directly add status to internal maps using reflection (bypasses async setStatus and CDI dependencies). + * This allows testing the status calculation logic in isolation. + */ + @SuppressWarnings("unchecked") + private void addStatusDirectly(CloudVmStatus status) throws Exception { + // Access the private statusMap field + java.lang.reflect.Field statusMapField = VMTrackerImpl.class.getDeclaredField("statusMap"); + statusMapField.setAccessible(true); + java.util.concurrent.ConcurrentHashMap statusMap = + (java.util.concurrent.ConcurrentHashMap) statusMapField.get(vmTracker); + + // Access the private jobMap field + java.lang.reflect.Field jobMapField = VMTrackerImpl.class.getDeclaredField("jobMap"); + jobMapField.setAccessible(true); + java.util.concurrent.ConcurrentHashMap jobMap = + (java.util.concurrent.ConcurrentHashMap) jobMapField.get(vmTracker); + + // Add to statusMap + statusMap.put(status.getInstanceId(), status); + + // Add to job container + String jobId = status.getJobId(); + CloudVmStatusContainer container = jobMap.computeIfAbsent(jobId, k -> { + CloudVmStatusContainer c = new CloudVmStatusContainer(); + c.setJobId(jobId); + return c; + }); + container.getStatuses().add(status); + } + + @Test + @DisplayName("Status calculation with all agents replaced returns early (activeInstanceCount = 0)") + void statusCalculation_allAgentsReplaced_returnsEarly() throws Exception { + // Given: A job where ALL agents have been replaced by watchdog + String jobId = "all-replaced-job"; + + // Set up 3 agents, all marked as replaced (using direct manipulation) + addStatusDirectly(createStatusWithJobStatus("i-001", jobId, VMStatus.replaced, JobStatus.Starting)); + addStatusDirectly(createStatusWithJobStatus("i-002", jobId, VMStatus.replaced, JobStatus.Starting)); + addStatusDirectly(createStatusWithJobStatus("i-003", jobId, VMStatus.replaced, JobStatus.Starting)); + + // When: Get job status + CloudVmStatusContainer container = vmTracker.getVmStatusForJob(jobId); + + // Then: Container should exist with 3 statuses + assertNotNull(container, "Container should exist"); + assertEquals(3, container.getStatuses().size()); + + // Verify all have replaced status + assertTrue(container.getStatuses().stream().allMatch(s -> s.getVmStatus() == VMStatus.replaced), + "All agents should be in replaced state"); + } + + @Test + @DisplayName("Status calculation with mixed statuses correctly tracks replaced agents") + void statusCalculation_mixedStatuses_correctlyDeterminesJobStatus() throws Exception { + String jobId = "mixed-status-job"; + + // Given: Mix of running, pending, and replaced agents (using direct manipulation) + addStatusDirectly(createStatusWithJobStatus("i-running1", jobId, VMStatus.running, JobStatus.Running)); + addStatusDirectly(createStatusWithJobStatus("i-running2", jobId, VMStatus.running, JobStatus.Running)); + addStatusDirectly(createStatusWithJobStatus("i-replaced", jobId, VMStatus.replaced, JobStatus.Starting)); + addStatusDirectly(createStatusWithJobStatus("i-pending", jobId, VMStatus.pending, JobStatus.Starting)); + + // When + CloudVmStatusContainer container = vmTracker.getVmStatusForJob(jobId); + + // Then: Should have 4 total statuses + assertNotNull(container, "Container should exist"); + assertEquals(4, container.getStatuses().size()); + + // Replaced should be in container + assertTrue(container.getStatuses().stream() + .anyMatch(s -> s.getInstanceId().equals("i-replaced") && s.getVmStatus() == VMStatus.replaced), + "Replaced instance should be in container"); + + // Running instances should also be present + assertEquals(2, container.getStatuses().stream() + .filter(s -> s.getVmStatus() == VMStatus.running).count(), + "Should have 2 running instances"); + } + + @Test + @DisplayName("Status calculation container includes all terminal states") + void statusCalculation_onlySkipsReplaced_notOtherTerminalStates() throws Exception { + String jobId = "terminal-states-job"; + + // Given: Various terminal and non-terminal states (using direct manipulation) + addStatusDirectly(createStatusWithJobStatus("i-running", jobId, VMStatus.running, JobStatus.Running)); + addStatusDirectly(createStatusWithJobStatus("i-replaced", jobId, VMStatus.replaced, JobStatus.Starting)); + addStatusDirectly(createStatusWithJobStatus("i-terminated", jobId, VMStatus.terminated, JobStatus.Completed)); + addStatusDirectly(createStatusWithJobStatus("i-stopped", jobId, VMStatus.stopped, JobStatus.Stopped)); + + // When + CloudVmStatusContainer container = vmTracker.getVmStatusForJob(jobId); + + // Then: Container should have all 4 statuses + assertNotNull(container, "Container should exist"); + assertEquals(4, container.getStatuses().size()); + + // Verify each status type is present + assertTrue(container.getStatuses().stream().anyMatch(s -> s.getVmStatus() == VMStatus.running)); + assertTrue(container.getStatuses().stream().anyMatch(s -> s.getVmStatus() == VMStatus.replaced)); + assertTrue(container.getStatuses().stream().anyMatch(s -> s.getVmStatus() == VMStatus.terminated)); + assertTrue(container.getStatuses().stream().anyMatch(s -> s.getVmStatus() == VMStatus.stopped)); + } + + // ============ Thread safety tests (using direct manipulation to avoid CDI issues) ============ + + @RepeatedTest(3) + @DisplayName("Concurrent direct status additions do not cause ConcurrentModificationException") + void concurrentDirectStatusAdd_noException() throws Exception { + String jobId = "concurrent-test-job-" + System.nanoTime(); + int numThreads = 10; + int operationsPerThread = 50; + ExecutorService executor = Executors.newFixedThreadPool(numThreads); + CountDownLatch startLatch = new CountDownLatch(1); + CountDownLatch doneLatch = new CountDownLatch(numThreads); + AtomicInteger errorCount = new AtomicInteger(0); + + for (int t = 0; t < numThreads; t++) { + final int threadNum = t; + executor.submit(() -> { + try { + startLatch.await(); + for (int i = 0; i < operationsPerThread; i++) { + String instanceId = "i-thread" + threadNum + "-op" + i; + addStatusDirectly(createStatusWithJobStatus( + instanceId, jobId, VMStatus.running, JobStatus.Running)); + } + } catch (Exception e) { + errorCount.incrementAndGet(); + e.printStackTrace(); + } finally { + doneLatch.countDown(); + } + }); + } + + startLatch.countDown(); + assertTrue(doneLatch.await(30, TimeUnit.SECONDS), "Test timed out"); + executor.shutdown(); + + assertEquals(0, errorCount.get(), "Concurrent operations should not throw exceptions"); + + CloudVmStatusContainer container = vmTracker.getVmStatusForJob(jobId); + assertNotNull(container, "Container should exist"); + assertTrue(container.getStatuses().size() > 0, "Should have some statuses"); + } + + @RepeatedTest(3) + @DisplayName("Concurrent add and removeStatusForInstance do not cause race conditions") + void concurrentAddAndRemove_noRaceCondition() throws Exception { + String jobId = "concurrent-remove-job-" + System.nanoTime(); + int numAddThreads = 5; + int numRemoveThreads = 5; + int operationsPerThread = 30; + ExecutorService executor = Executors.newFixedThreadPool(numAddThreads + numRemoveThreads); + CountDownLatch startLatch = new CountDownLatch(1); + CountDownLatch doneLatch = new CountDownLatch(numAddThreads + numRemoveThreads); + AtomicInteger errorCount = new AtomicInteger(0); + List instanceIds = new ArrayList<>(); + + // Pre-populate some instances + for (int i = 0; i < operationsPerThread; i++) { + String instanceId = "i-prepop-" + i; + instanceIds.add(instanceId); + addStatusDirectly(createStatusWithJobStatus(instanceId, jobId, VMStatus.running, JobStatus.Running)); + } + + // Threads that add new statuses + for (int t = 0; t < numAddThreads; t++) { + final int threadNum = t; + executor.submit(() -> { + try { + startLatch.await(); + for (int i = 0; i < operationsPerThread; i++) { + String instanceId = "i-add-thread" + threadNum + "-op" + i; + addStatusDirectly(createStatusWithJobStatus( + instanceId, jobId, VMStatus.running, JobStatus.Running)); + } + } catch (Exception e) { + errorCount.incrementAndGet(); + e.printStackTrace(); + } finally { + doneLatch.countDown(); + } + }); + } + + // Threads that remove statuses + for (int t = 0; t < numRemoveThreads; t++) { + final int threadNum = t; + executor.submit(() -> { + try { + startLatch.await(); + for (int i = 0; i < operationsPerThread; i++) { + if (i < instanceIds.size()) { + vmTracker.removeStatusForInstance(instanceIds.get(i)); + } + vmTracker.removeStatusForInstance("i-nonexistent-" + threadNum + "-" + i); + } + } catch (Exception e) { + errorCount.incrementAndGet(); + e.printStackTrace(); + } finally { + doneLatch.countDown(); + } + }); + } + + startLatch.countDown(); + assertTrue(doneLatch.await(30, TimeUnit.SECONDS), "Test timed out"); + executor.shutdown(); + + assertEquals(0, errorCount.get(), "Concurrent add/remove should not throw exceptions"); + } + + @RepeatedTest(3) + @DisplayName("Concurrent removeStatusForJob does not cause ConcurrentModificationException") + void concurrentRemoveStatusForJob_noException() throws Exception { + int numJobs = 5; + int instancesPerJob = 10; + String prefix = "job-" + System.nanoTime() + "-"; + ExecutorService executor = Executors.newFixedThreadPool(numJobs); + CountDownLatch doneLatch = new CountDownLatch(numJobs); + AtomicInteger errorCount = new AtomicInteger(0); + + // Set up multiple jobs with instances + for (int j = 0; j < numJobs; j++) { + String jobId = prefix + j; + for (int i = 0; i < instancesPerJob; i++) { + addStatusDirectly(createStatusWithJobStatus( + "i-" + jobId + "-instance" + i, jobId, VMStatus.running, JobStatus.Running)); + } + } + + // Concurrently remove all jobs + for (int j = 0; j < numJobs; j++) { + final String jobId = prefix + j; + executor.submit(() -> { + try { + vmTracker.removeStatusForJob(jobId); + } catch (Exception e) { + errorCount.incrementAndGet(); + e.printStackTrace(); + } finally { + doneLatch.countDown(); + } + }); + } + + assertTrue(doneLatch.await(30, TimeUnit.SECONDS), "Test timed out"); + executor.shutdown(); + + assertEquals(0, errorCount.get(), "Concurrent removeStatusForJob should not throw exceptions"); + + // Verify all jobs are removed + for (int j = 0; j < numJobs; j++) { + assertNull(vmTracker.getVmStatusForJob(prefix + j), "Job " + prefix + j + " should be removed"); + } + } + + // ============ removeStatusForInstance race condition fix tests ============ + + @Test + @DisplayName("removeStatusForInstance removes from both statusMap and container atomically") + void removeStatusForInstance_atomicRemoval() throws Exception { + String jobId = "atomic-remove-job"; + String instanceId = "i-atomic-test"; + + // Given: An instance with status (using direct manipulation) + CloudVmStatus status = createStatusWithJobStatus(instanceId, jobId, VMStatus.running, JobStatus.Running); + addStatusDirectly(status); + + // Verify it exists in both places + assertNotNull(vmTracker.getStatus(instanceId), "Status should exist in statusMap"); + CloudVmStatusContainer container = vmTracker.getVmStatusForJob(jobId); + assertNotNull(container, "Container should exist"); + assertTrue(container.getStatuses().stream().anyMatch(s -> s.getInstanceId().equals(instanceId)), + "Instance should be in container"); + + // When: Remove the status + vmTracker.removeStatusForInstance(instanceId); + + // Then: Should be removed from both statusMap and container + assertNull(vmTracker.getStatus(instanceId), "Status should be removed from statusMap"); + container = vmTracker.getVmStatusForJob(jobId); + assertNotNull(container); // Container still exists (job not removed) + assertFalse(container.getStatuses().stream().anyMatch(s -> s.getInstanceId().equals(instanceId)), + "Instance should be removed from container"); + } + + @Test + @DisplayName("removeStatusForInstance handles non-existent instance gracefully with sync") + void removeStatusForInstance_nonExistent_noException() { + // Should not throw even with the new synchronized logic + assertDoesNotThrow(() -> vmTracker.removeStatusForInstance("i-does-not-exist")); + } +} + diff --git a/web/web_support/src/main/java/com/intuit/tank/job/ActJobNodeBean.java b/web/web_support/src/main/java/com/intuit/tank/job/ActJobNodeBean.java index 99947207d..5f58b39fb 100644 --- a/web/web_support/src/main/java/com/intuit/tank/job/ActJobNodeBean.java +++ b/web/web_support/src/main/java/com/intuit/tank/job/ActJobNodeBean.java @@ -129,7 +129,12 @@ public List getSubNodes() { @Override public List getCurrentSubNodes() { - return vmBeans.stream().filter(vm -> !vm.getStatus().equals(VMStatus.terminated.toString())).collect(Collectors.toList()); + // Filter out both terminated and replaced agents from the count + // terminated = normal shutdown, replaced = watchdog replaced a failed agent + return vmBeans.stream() + .filter(vm -> !vm.getStatus().equals(VMStatus.terminated.toString()) + && !vm.getStatus().equals(VMStatus.replaced.toString())) + .collect(Collectors.toList()); } @Override diff --git a/web/web_support/src/test/java/com/intuit/tank/job/ActJobNodeBeanTest.java b/web/web_support/src/test/java/com/intuit/tank/job/ActJobNodeBeanTest.java index 54431df82..378ab5f94 100644 --- a/web/web_support/src/test/java/com/intuit/tank/job/ActJobNodeBeanTest.java +++ b/web/web_support/src/test/java/com/intuit/tank/job/ActJobNodeBeanTest.java @@ -745,4 +745,120 @@ public void testSetVmBeans_1() // at com.intuit.tank.project.JobConfiguration.(JobConfiguration.java:63) // at com.intuit.tank.project.Workload.(Workload.java:57) } + + // ============ getCurrentSubNodes tests (VMStatus.replaced filtering) ============ + + /** + * Helper to create a VMNodeBean with a specific VMStatus. + */ + private VMNodeBean createVMNodeBean(String instanceId, VMStatus vmStatus) { + CloudVmStatus cvs = new CloudVmStatus( + instanceId, + "123", + "sg-1", + JobStatus.Running, + VMImageType.AGENT, + VMRegion.US_EAST_2, + vmStatus, + new ValidationStatus(), + 10, + 100, + new Date(), + null + ); + return new VMNodeBean(cvs, true, FastDateFormat.getDateTimeInstance(FastDateFormat.MEDIUM, FastDateFormat.MEDIUM)); + } + + @Test + public void testGetCurrentSubNodes_filtersReplacedInstances() throws Exception { + // Setup + Workload workload = new Workload(); + workload.setJobConfiguration(new JobConfiguration()); + JobInstance jobInstance = new JobInstance(workload, "test"); + jobInstance.setStatus(JobQueueStatus.Running); + ActJobNodeBean fixture = new ActJobNodeBean(jobInstance, true, FastDateFormat.getDateTimeInstance(FastDateFormat.MEDIUM, FastDateFormat.MEDIUM)); + + List vmBeans = new LinkedList<>(); + vmBeans.add(createVMNodeBean("i-001", VMStatus.running)); + vmBeans.add(createVMNodeBean("i-002", VMStatus.running)); + vmBeans.add(createVMNodeBean("i-003", VMStatus.replaced)); // Should be filtered + vmBeans.add(createVMNodeBean("i-004", VMStatus.running)); + fixture.setVmBeans(vmBeans); + + // Execute + List result = fixture.getCurrentSubNodes(); + + // Verify: replaced instance should be filtered out + assertEquals(3, result.size(), "Should exclude replaced instance"); + assertEquals(4, fixture.getSubNodes().size(), "getSubNodes should include ALL"); + } + + @Test + public void testGetCurrentSubNodes_filtersTerminatedInstances() throws Exception { + // Setup + Workload workload = new Workload(); + workload.setJobConfiguration(new JobConfiguration()); + JobInstance jobInstance = new JobInstance(workload, "test"); + jobInstance.setStatus(JobQueueStatus.Running); + ActJobNodeBean fixture = new ActJobNodeBean(jobInstance, true, FastDateFormat.getDateTimeInstance(FastDateFormat.MEDIUM, FastDateFormat.MEDIUM)); + + List vmBeans = new LinkedList<>(); + vmBeans.add(createVMNodeBean("i-001", VMStatus.running)); + vmBeans.add(createVMNodeBean("i-002", VMStatus.terminated)); // Should be filtered + vmBeans.add(createVMNodeBean("i-003", VMStatus.running)); + fixture.setVmBeans(vmBeans); + + // Execute + List result = fixture.getCurrentSubNodes(); + + // Verify + assertEquals(2, result.size(), "Should exclude terminated instance"); + } + + @Test + public void testGetCurrentSubNodes_filtersBothReplacedAndTerminated() throws Exception { + // Setup: Mix of running, replaced, and terminated + Workload workload = new Workload(); + workload.setJobConfiguration(new JobConfiguration()); + JobInstance jobInstance = new JobInstance(workload, "test"); + jobInstance.setStatus(JobQueueStatus.Running); + ActJobNodeBean fixture = new ActJobNodeBean(jobInstance, true, FastDateFormat.getDateTimeInstance(FastDateFormat.MEDIUM, FastDateFormat.MEDIUM)); + + List vmBeans = new LinkedList<>(); + vmBeans.add(createVMNodeBean("i-001", VMStatus.running)); + vmBeans.add(createVMNodeBean("i-002", VMStatus.running)); + vmBeans.add(createVMNodeBean("i-003", VMStatus.replaced)); // Should be filtered + vmBeans.add(createVMNodeBean("i-004", VMStatus.terminated)); // Should be filtered + vmBeans.add(createVMNodeBean("i-005", VMStatus.running)); + fixture.setVmBeans(vmBeans); + + // Execute + List result = fixture.getCurrentSubNodes(); + + // Verify: Both replaced and terminated should be filtered + assertEquals(3, result.size(), "Should only include running instances"); + assertEquals(5, fixture.getSubNodes().size(), "getSubNodes should include ALL for visibility"); + } + + @Test + public void testGetCurrentSubNodes_allReplaced_returnsEmpty() throws Exception { + // Setup: Edge case where all instances are replaced + Workload workload = new Workload(); + workload.setJobConfiguration(new JobConfiguration()); + JobInstance jobInstance = new JobInstance(workload, "test"); + jobInstance.setStatus(JobQueueStatus.Starting); + ActJobNodeBean fixture = new ActJobNodeBean(jobInstance, true, FastDateFormat.getDateTimeInstance(FastDateFormat.MEDIUM, FastDateFormat.MEDIUM)); + + List vmBeans = new LinkedList<>(); + vmBeans.add(createVMNodeBean("i-001", VMStatus.replaced)); + vmBeans.add(createVMNodeBean("i-002", VMStatus.replaced)); + fixture.setVmBeans(vmBeans); + + // Execute + List result = fixture.getCurrentSubNodes(); + + // Verify + assertEquals(0, result.size(), "Should return empty when all replaced"); + assertEquals(2, fixture.getSubNodes().size(), "All should still be visible in getSubNodes"); + } } \ No newline at end of file