From 6b5955652a9f54e4debb5c5aec73aecf735b8f56 Mon Sep 17 00:00:00 2001 From: Francesco Nidito Date: Fri, 21 Feb 2020 10:02:08 +0100 Subject: [PATCH 1/5] Improves on the fix for 8918 --- .../druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java index 2f3716df11ce..77d8dc69c84d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java @@ -106,11 +106,14 @@ void doTest() postEvents(); // wait for a while to let the events be ingested - ITRetryUtil.retryUntilTrue( + ITRetryUtil.retryUntil( () -> { final int countRows = queryHelper.countRows(fullDatasourceName, Intervals.ETERNITY.toString()); return countRows == getNumExpectedRowsIngested(); }, + true, + 10000, + 100, "Waiting all events are ingested" ); From ae92c538a3a4c4947c0475b887e03a7b1dbbcbf5 Mon Sep 17 00:00:00 2001 From: Francesco Nidito Date: Fri, 21 Feb 2020 10:48:05 +0100 Subject: [PATCH 2/5] factorize constants for ITRetryUtil.retryUntil call --- .../indexer/AbstractITRealtimeIndexTaskTest.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java index 77d8dc69c84d..2083215d8a76 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITRealtimeIndexTaskTest.java @@ -69,6 +69,9 @@ public abstract class AbstractITRealtimeIndexTaskTest extends AbstractIndexerTes DateTime dtLast; // timestamp of last event DateTime dtGroupBy; // timestamp for expected response for groupBy query + static final int NUM_RETRIES = 60; + static final long DELAY_FOR_RETRIES_MS = 10000; + @Inject ServerDiscoveryFactory factory; @Inject @@ -112,8 +115,8 @@ void doTest() return countRows == getNumExpectedRowsIngested(); }, true, - 10000, - 100, + DELAY_FOR_RETRIES_MS, + NUM_RETRIES, "Waiting all events are ingested" ); @@ -155,8 +158,8 @@ void doTest() ITRetryUtil.retryUntil( () -> coordinator.areSegmentsLoaded(fullDatasourceName), true, - 10000, - 60, + DELAY_FOR_RETRIES_MS, + NUM_RETRIES, "Real-time generated segments loaded" ); From cd46f4ab885c0dccde37db111998e87174fc3749 Mon Sep 17 00:00:00 2001 From: Francesco Nidito Date: Fri, 21 Feb 2020 11:45:11 +0100 Subject: [PATCH 3/5] increasing retries and sleep in HttpUtil to cope with 401s in testing --- .../main/java/org/apache/druid/testing/utils/HttpUtil.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/utils/HttpUtil.java b/integration-tests/src/main/java/org/apache/druid/testing/utils/HttpUtil.java index 5e011264c478..5e9ad4d384d0 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/utils/HttpUtil.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/HttpUtil.java @@ -39,6 +39,9 @@ public class HttpUtil private static final Logger LOG = new Logger(AbstractQueryResourceTestClient.class); private static final StatusResponseHandler RESPONSE_HANDLER = StatusResponseHandler.getInstance(); + static final int NUM_RETRIES = 30; + static final long DELAY_FOR_RETRIES_MS = 10000; + public static StatusResponseHolder makeRequest(HttpClient httpClient, HttpMethod method, String url, byte[] content) { return makeRequestWithExpectedStatus( @@ -78,13 +81,13 @@ public static StatusResponseHolder makeRequestWithExpectedStatus( response.getContent() ); // it can take time for the auth config to propagate, so we retry - if (retryCount > 10) { + if (retryCount > NUM_RETRIES) { throw new ISE(errMsg); } else { LOG.error(errMsg); LOG.error("retrying in 3000ms, retryCount: " + retryCount); retryCount++; - Thread.sleep(3000); + Thread.sleep(DELAY_FOR_RETRIES_MS); } } else { break; From 972f4607bf702e39f79989500ac9d335add79dfb Mon Sep 17 00:00:00 2001 From: Francesco Nidito Date: Tue, 25 Feb 2020 10:06:54 +0100 Subject: [PATCH 4/5] adding retries in EventReceiverFirehoseTestClient --- .../EventReceiverFirehoseTestClient.java | 63 ++++++++++++------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java b/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java index 9ab6636a723a..69351191214d 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.http.client.HttpClient; import org.apache.druid.java.util.http.client.Request; import org.apache.druid.java.util.http.client.response.StatusResponseHandler; @@ -41,9 +42,15 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Map; +import java.util.concurrent.ExecutionException; public class EventReceiverFirehoseTestClient { + private static final Logger LOG = new Logger(EventReceiverFirehoseTestClient.class); + + static final int NUM_RETRIES = 30; + static final long DELAY_FOR_RETRIES_MS = 10000; + private final String host; private final ObjectMapper jsonMapper; private final HttpClient httpClient; @@ -82,31 +89,45 @@ private String getURL() * @return */ public int postEvents(Collection> events, ObjectMapper objectMapper, String mediaType) + throws InterruptedException { - try { - StatusResponseHolder response = httpClient.go( - new Request(HttpMethod.POST, new URL(getURL())) - .setContent(mediaType, objectMapper.writeValueAsBytes(events)), - StatusResponseHandler.getInstance() - ).get(); + int retryCount = 0; + while (true) { + try { + StatusResponseHolder response = httpClient.go( + new Request(HttpMethod.POST, new URL(getURL())) + .setContent(mediaType, objectMapper.writeValueAsBytes(events)), + StatusResponseHandler.getInstance() + ).get(); - if (!response.getStatus().equals(HttpResponseStatus.OK)) { - throw new ISE( - "Error while posting events to url[%s] status[%s] content[%s]", - getURL(), - response.getStatus(), - response.getContent() + if (!response.getStatus().equals(HttpResponseStatus.OK)) { + throw new ISE( + "Error while posting events to url[%s] status[%s] content[%s]", + getURL(), + response.getStatus(), + response.getContent() + ); + } + Map responseData = objectMapper.readValue( + response.getContent(), new TypeReference>() + { + } ); + return responseData.get("eventCount"); + } + // adding retries to flaky tests using channels + catch (ExecutionException e) { + if (retryCount > NUM_RETRIES) { + throw new RuntimeException(e); //giving up now + } else { + LOG.info(e,"received exception, sleeping and retrying"); + retryCount++; + Thread.sleep(DELAY_FOR_RETRIES_MS); + } + } + catch (Exception e) { + throw new RuntimeException(e); } - Map responseData = objectMapper.readValue( - response.getContent(), new TypeReference>() - { - } - ); - return responseData.get("eventCount"); - } - catch (Exception e) { - throw new RuntimeException(e); } } From 53761bebdf87d96e0559164127aabad2ca3d50f9 Mon Sep 17 00:00:00 2001 From: Francesco Nidito Date: Tue, 25 Feb 2020 10:41:54 +0100 Subject: [PATCH 5/5] adding missing space --- .../druid/testing/clients/EventReceiverFirehoseTestClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java b/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java index 69351191214d..761d9f55dbaf 100644 --- a/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java +++ b/integration-tests/src/main/java/org/apache/druid/testing/clients/EventReceiverFirehoseTestClient.java @@ -120,7 +120,7 @@ public int postEvents(Collection> events, ObjectMapper objec if (retryCount > NUM_RETRIES) { throw new RuntimeException(e); //giving up now } else { - LOG.info(e,"received exception, sleeping and retrying"); + LOG.info(e, "received exception, sleeping and retrying"); retryCount++; Thread.sleep(DELAY_FOR_RETRIES_MS); }