Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions server/src/main/java/org/apache/druid/rpc/ServiceClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ public interface ServiceClient
* encountered error.
*
* Redirects from 3xx responses are followed up to a chain length of {@link #MAX_REDIRECTS} and do not consume
* attempts. Redirects are validated against the targets returned by {@link ServiceLocator}: the client will not
* follow a redirect to a target that does not appear in the returned {@link ServiceLocations}.
* attempts. Redirects are validated against the targets returned by {@link ServiceLocator}: the client will only
* follow redirects to targets that appear in {@link ServiceLocations}. If the client encounters a redirect to an
* unknown target, or if a redirect loop or self-redirect is detected, it is treated as an unavailable service and
* an attempt is consumed.
*
* If the service is unavailable at the time an attempt is made, the client will automatically retry based on
* {@link ServiceRetryPolicy#retryNotAvailable()}. If true, an attempt is consumed and the client will try to locate
Expand Down
237 changes: 166 additions & 71 deletions server/src/main/java/org/apache/druid/rpc/ServiceClientImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
Expand Down Expand Up @@ -88,7 +89,7 @@ public <IntermediateType, FinalType> ListenableFuture<FinalType> asyncRequest(
)
{
final SettableFuture<FinalType> retVal = SettableFuture.create();
tryRequest(requestBuilder, handler, retVal, 0, 0);
tryRequest(requestBuilder, handler, retVal, 0, ImmutableSet.of());
return retVal;
}

Expand All @@ -98,21 +99,35 @@ public ServiceClientImpl withRetryPolicy(ServiceRetryPolicy newRetryPolicy)
return new ServiceClientImpl(serviceName, httpClient, serviceLocator, newRetryPolicy, connectExec);
}

/**
* Internal helper used by {@link #asyncRequest(RequestBuilder, HttpResponseHandler)}.
*
* Handles retries by calling itself back in {@link #connectExec} with an incremented {@code attemptNumber}.
*
* @param requestBuilder request builder from call to {@link #asyncRequest}
* @param handler handler from call to {@link #asyncRequest}
* @param retVal return future generated by {@link #asyncRequest}
* @param attemptNumber attempt number; starts at 0 and is incremented on each retry
* @param redirectLocations redirect locations observed from the server on this attempt; used for detecting redirect
* loops and for limiting redirect chain length to {@link #MAX_REDIRECTS}. Cleared when
* a new attempt is issued.
*/
private <IntermediateType, FinalType> void tryRequest(
final RequestBuilder requestBuilder,
final HttpResponseHandler<IntermediateType, FinalType> handler,
final SettableFuture<FinalType> retVal,
final long attemptNumber,
final int redirectCount
final ImmutableSet<String> redirectLocations
)
{
whenServiceReady(
serviceLocation -> {
serviceLocations -> {
if (retVal.isCancelled()) {
// Return early if the caller canceled the return future.
return;
}

final ServiceLocation serviceLocation = pick(serviceLocations);
final long nextAttemptNumber = attemptNumber + 1;

if (serviceLocation == null) {
Expand All @@ -128,7 +143,7 @@ private <IntermediateType, FinalType> void tryRequest(
);

connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, attemptNumber + 1, redirectCount),
() -> tryRequest(requestBuilder, handler, retVal, nextAttemptNumber, ImmutableSet.of()),
backoffMs,
TimeUnit.MILLISECONDS
);
Expand Down Expand Up @@ -172,71 +187,15 @@ public void onSuccess(@Nullable final Either<StringFullResponseHolder, FinalType
try {
// result can be null if the HttpClient encounters a problem midstream on an unfinished response.
if (result != null && result.isValue()) {
if (nextAttemptNumber > 1) {
// There were retries. Log at INFO level to provide the user some closure.
log.info(
"Service [%s] request [%s %s] completed.",
serviceName,
request.getMethod(),
request.getUrl()
);
} else {
// No retries. Log at debug level to avoid cluttering the logs.
log.debug(
"Service [%s] request [%s %s] completed.",
serviceName,
request.getMethod(),
request.getUrl()
);
}

// Will not throw, because we checked result.isValue() earlier.
retVal.set(result.valueOrThrow());
handleResultValue(result.valueOrThrow());
} else {
final StringFullResponseHolder errorHolder = result != null ? result.error() : null;

if (errorHolder != null && isRedirect(errorHolder.getResponse().getStatus())) {
// Redirect. Update preferredLocationNoPath if appropriate, then reissue.
final String newUri = result.error().getResponse().headers().get("Location");

if (redirectCount >= MAX_REDIRECTS) {
retVal.setException(new RpcException(
"Service [%s] redirected too many times [%d] to invalid url %s",
serviceName,
redirectCount,
newUri
));
} else {
// Update preferredLocationNoPath if we got a redirect.
final ServiceLocation redirectLocationNoPath = serviceLocationNoPathFromUri(newUri);

if (redirectLocationNoPath != null) {
preferredLocationNoPath.set(redirectLocationNoPath);
connectExec.submit(
() -> tryRequest(requestBuilder, handler, retVal, attemptNumber, redirectCount + 1)
);
} else {
retVal.setException(
new RpcException(
"Service [%s] redirected [%d] times to invalid URL [%s]",
serviceName,
redirectCount,
newUri
)
);
}
}
handleRedirect(errorHolder);
} else if (shouldTry(nextAttemptNumber)
&& (errorHolder == null || retryPolicy.retryHttpResponse(errorHolder.getResponse()))) {
// Retryable server response (or null errorHolder, which means null result, which can happen
// if the HttpClient encounters an exception in the midst of response processing).
final long backoffMs = computeBackoffMs(retryPolicy, attemptNumber);
log.noStackTrace().info(buildErrorMessage(request, errorHolder, backoffMs, nextAttemptNumber));
connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, attemptNumber + 1, redirectCount),
backoffMs,
TimeUnit.MILLISECONDS
);
handleRetryableErrorResponse(errorHolder);
} else if (errorHolder != null) {
// Nonretryable server response.
retVal.setException(new HttpResponseException(errorHolder));
Expand Down Expand Up @@ -264,7 +223,7 @@ public void onFailure(final Throwable t)
log.noStackTrace().info(t, buildErrorMessage(request, null, backoffMs, nextAttemptNumber));

connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, attemptNumber + 1, redirectCount),
() -> tryRequest(requestBuilder, handler, retVal, nextAttemptNumber, ImmutableSet.of()),
backoffMs,
TimeUnit.MILLISECONDS
);
Expand All @@ -277,6 +236,135 @@ public void onFailure(final Throwable t)
retVal.setException(new RpcException(t, "Service [%s] handler exited unexpectedly", serviceName));
}
}

/**
* Handles HTTP 2xx responses from the server.
*/
private void handleResultValue(final FinalType value)
{
if (nextAttemptNumber > 1) {
// There were retries. Log at INFO level to provide the user some closure.
log.info(
"Service [%s] request [%s %s] completed.",
serviceName,
request.getMethod(),
request.getUrl()
);
} else {
// No retries. Log at debug level to avoid cluttering the logs.
log.debug(
"Service [%s] request [%s %s] completed.",
serviceName,
request.getMethod(),
request.getUrl()
);
}

// Will not throw, because we checked result.isValue() earlier.
retVal.set(value);
}

/**
* Handles retryable HTTP error responses from the server.
*/
private void handleRetryableErrorResponse(final StringFullResponseHolder errorHolder)
{
// Retryable server response (or null errorHolder, which means null result, which can happen
// if the HttpClient encounters an exception in the midst of response processing).
final long backoffMs = computeBackoffMs(retryPolicy, attemptNumber);
log.info(buildErrorMessage(request, errorHolder, backoffMs, nextAttemptNumber));
connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, nextAttemptNumber, ImmutableSet.of()),
backoffMs,
TimeUnit.MILLISECONDS
);
}

/**
* Handles HTTP redirect responses from the server.
*/
private void handleRedirect(final StringFullResponseHolder errorHolder)
{
// Redirect. Update preferredLocationNoPath if appropriate, then reissue.
final String newUri = errorHolder.getResponse().headers().get("Location");
final ServiceLocation redirectLocationNoPath = serviceLocationNoPathFromUri(newUri);

if (redirectLocationNoPath == null) {
// Redirect to invalid URL. Something is wrong with the server: fail immediately
// without retries.
retVal.setException(
new RpcException(
"Service [%s] redirected to invalid URL [%s]",
serviceName,
newUri
)
);
} else if (serviceLocations.getLocations()
.stream()
.anyMatch(loc -> serviceLocationNoPath(loc)
.equals(redirectLocationNoPath))) {
// Valid redirect, to a server that is one of the known locations.
final boolean isRedirectLoop = redirectLocations.contains(newUri);
final boolean isRedirectChainTooLong = redirectLocations.size() >= MAX_REDIRECTS;

if (isRedirectLoop || isRedirectChainTooLong) {
// Treat redirect loops, or too-long redirect chains, as unavailable services.
if (retryPolicy.retryNotAvailable() && shouldTry(nextAttemptNumber)) {
final long backoffMs = computeBackoffMs(retryPolicy, attemptNumber);

log.info(
"Service [%s] issued too many redirects on attempt #%d; retrying in %,d ms.",
serviceName,
nextAttemptNumber,
backoffMs
);

connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, nextAttemptNumber, ImmutableSet.of()),
backoffMs,
TimeUnit.MILLISECONDS
);
} else {
retVal.setException(new ServiceNotAvailableException(serviceName, "issued too many redirects"));
}
} else {
// Valid redirect. Follow it without incrementing the attempt number.
preferredLocationNoPath.set(redirectLocationNoPath);
final ImmutableSet<String> newRedirectLocations =
ImmutableSet.<String>builder().addAll(redirectLocations).add(newUri).build();
connectExec.submit(
() -> tryRequest(requestBuilder, handler, retVal, attemptNumber, newRedirectLocations)
);
}
} else {
// Redirect to a server that is not one of the known locations. Treat service as unavailable.
if (retryPolicy.retryNotAvailable() && shouldTry(nextAttemptNumber)) {
final long backoffMs = computeBackoffMs(retryPolicy, attemptNumber);

log.info(
"Service [%s] issued redirect to unknown URL [%s] on attempt #%d; retrying in %,d ms.",
serviceName,
newUri,
nextAttemptNumber,
backoffMs
);

connectExec.schedule(
() -> tryRequest(requestBuilder, handler, retVal, nextAttemptNumber, ImmutableSet.of()),
backoffMs,
TimeUnit.MILLISECONDS
);
} else {
retVal.setException(
new ServiceNotAvailableException(
serviceName,
"issued redirect to unknown URL [%s]",
newUri
)
);
}
}
}
},
connectExec
);
Expand All @@ -285,7 +373,7 @@ public void onFailure(final Throwable t)
);
}

private <T> void whenServiceReady(final Consumer<ServiceLocation> callback, final SettableFuture<T> retVal)
private <T> void whenServiceReady(final Consumer<ServiceLocations> callback, final SettableFuture<T> retVal)
{
Futures.addCallback(
serviceLocator.locate(),
Expand All @@ -300,8 +388,7 @@ public void onSuccess(final ServiceLocations locations)
}

try {
final ServiceLocation location = pick(locations);
callback.accept(location);
callback.accept(locations);
}
catch (Throwable t) {
// It's a bug if this happens. The purpose of this line is to help us debug what went wrong.
Expand All @@ -328,10 +415,7 @@ private ServiceLocation pick(final ServiceLocations locations)
if (preferred != null) {
// Preferred location is set. Use it if it's one of known locations.
for (final ServiceLocation location : locations.getLocations()) {
final ServiceLocation locationNoPath =
new ServiceLocation(location.getHost(), location.getPlaintextPort(), location.getTlsPort(), "");

if (locationNoPath.equals(preferred)) {
if (serviceLocationNoPath(location).equals(preferred)) {
return location;
}
}
Expand Down Expand Up @@ -392,6 +476,9 @@ static long computeBackoffMs(final ServiceRetryPolicy retryPolicy, final long at
);
}

/**
* Returns a {@link ServiceLocation} without a path component, based on a URI.
*/
@Nullable
@VisibleForTesting
static ServiceLocation serviceLocationNoPathFromUri(@Nullable final String uriString)
Expand Down Expand Up @@ -423,6 +510,14 @@ static ServiceLocation serviceLocationNoPathFromUri(@Nullable final String uriSt
}
}

/**
* Returns a {@link ServiceLocation} without its path.
*/
static ServiceLocation serviceLocationNoPath(final ServiceLocation location)
{
return new ServiceLocation(location.getHost(), location.getPlaintextPort(), location.getTlsPort(), "");
}

@VisibleForTesting
static boolean isRedirect(final HttpResponseStatus responseStatus)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,20 @@

package org.apache.druid.rpc;

import org.apache.druid.java.util.common.StringUtils;

/**
* Returned by {@link ServiceClient#asyncRequest} when a request has failed because the service is not available.
*/
public class ServiceNotAvailableException extends RpcException
{
public ServiceNotAvailableException(final String serviceName, final String reason, final Object... reasonArgs)
{
super("Service [%s] %s", serviceName, StringUtils.format(reason, reasonArgs));
}

public ServiceNotAvailableException(final String serviceName)
{
super("Service [%s] is not available", serviceName);
this(serviceName, "is not available");
}
}
Loading