Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
ad90509
autoretry with hooks
deepakverma Jun 7, 2021
4e9e97b
RequestMessage Implementation
deepakverma Jun 7, 2021
433da7f
Add hook to requestFailed on connection exception
deepakverma Jun 7, 2021
8a533f6
experimenting with the main retry loop to be handling time outs as well
deepakverma Jun 7, 2021
ae9e35a
Making requestfailed as internal for now, will expose it later after …
deepakverma Jun 7, 2021
754ba69
improve message timeout while waiting for connection restore to complete
deepakverma Jun 7, 2021
9ff40ef
Remove unused variable
deepakverma Jun 7, 2021
fd1f1da
Removing unused commandflags for now
deepakverma Jun 7, 2021
415bcdf
undo csproj change to match upstream
deepakverma Jun 7, 2021
889520c
Creating a connectiontry configurationoption and command flags
deepakverma Jun 19, 2021
2ed46cd
connection,ultipexer changes to handle message retry
deepakverma Jun 19, 2021
97d96cc
Message retry
deepakverma Jun 19, 2021
1f7c27c
ensure no retry is set as default
deepakverma Jun 21, 2021
bb1a1d1
whoops, cleaning up stale code and rety should be default as null
deepakverma Jun 21, 2021
9e815d3
typo
deepakverma Jun 21, 2021
78e6910
Merge branch 'main' into autoretry
deepakverma Jun 21, 2021
c7c6b8e
Merge branch 'main' into pr/1755
NickCraver Jun 22, 2021
d6b4d30
making Retryqueuelength nullable
deepakverma Jun 22, 2021
623d731
dropping onconnectionrestore from retry config option
deepakverma Jul 1, 2021
7cd57b8
renaming Retry => CommandRetry
deepakverma Jul 1, 2021
ed1be50
to honor redirent option specified by the user, do not reset noredirect
deepakverma Jul 1, 2021
906289f
incorporating feedback on PR
deepakverma Jul 1, 2021
fd3407a
do not retry admin commands
deepakverma Jul 1, 2021
ea5091a
unifiying commandretry and commandlfags
deepakverma Jul 1, 2021
97c957a
fixed failing test
deepakverma Jul 6, 2021
cc9f6da
IRetry
deepakverma Jul 18, 2021
f4f994f
introducing FailedMessage class to make IRetry shouldretry extensible
deepakverma Jul 19, 2021
127c25c
separating out retrymanager
deepakverma Jul 19, 2021
5968359
first attempt to decouple iretrymanager from mux
deepakverma Jul 19, 2021
e9f0c0f
moving out timeout check to retrymanager
deepakverma Jul 19, 2021
c2cb2e4
separating out Retrymanager and being explicit on policy depends on q…
deepakverma Jul 19, 2021
50802fd
reverting the rename
deepakverma Jul 19, 2021
df3e08b
rnaming methods
deepakverma Jul 20, 2021
819eb0b
improving failedmessage
deepakverma Jul 21, 2021
c5a89aa
Make messageretrymanager internal for now
deepakverma Jul 21, 2021
4cb0175
making MessageRetryQueueManager as internal for now
deepakverma Jul 21, 2021
fa12e0b
1. retryqueuelength
deepakverma Jul 22, 2021
14d54f8
few renames
deepakverma Jul 22, 2021
20f7bf4
cleaning up failedmessage
deepakverma Jul 23, 2021
e93d836
starting unit test
deepakverma Jul 23, 2021
2524fc7
using IInternalConnectionmultiplexer
deepakverma Jul 23, 2021
5e5edb9
Handle exception
deepakverma Jul 23, 2021
6631146
handleResult implementation
deepakverma Jul 23, 2021
e02e04b
handle timeout on sync for retry
deepakverma Jul 23, 2021
2a9d182
can handle redisexception only and first unit test
deepakverma Jul 23, 2021
6018207
making commandretryqueuemanager testable
deepakverma Jul 26, 2021
c5e2080
CommandRetryQueueManager tests
deepakverma Jul 26, 2021
a941dbc
mux should retry internal commands but not call shouldretry
deepakverma Jul 27, 2021
9e3d515
test to cover exception being thrown while processing the message
deepakverma Jul 27, 2021
3523397
integration test
deepakverma Jul 28, 2021
83c78bf
integrationTest
deepakverma Jul 28, 2021
3afa54f
command override tests
deepakverma Jul 28, 2021
c326927
Simplifying RetryPolicy
deepakverma Jul 28, 2021
87e1fb1
DefaultREtryPolicy
deepakverma Jul 28, 2021
b615eec
refactored out failedcommand
deepakverma Jul 28, 2021
a067467
updated summary and few renames
deepakverma Jul 29, 2021
50bd6df
code cleanup
deepakverma Jul 29, 2021
febffca
this should likely fix the incorrect diff with main
deepakverma Jul 29, 2021
bfdfb3b
fixing another diff that should have not been there
deepakverma Jul 29, 2021
83bbd87
no op where it doesn't retry timeouts
deepakverma Jul 29, 2021
bceb859
merge
deepakverma Jul 29, 2021
41cc755
Merge branch 'main' of https://github.com/StackExchange/StackExchange…
deepakverma Jul 30, 2021
335bef6
Expose retry policy config as RetryCommandsOnReconnect
philon-msft Aug 4, 2021
06bb176
Minor unit test improvements
philon-msft Aug 6, 2021
e0da9c4
Minor syntax cleanup
philon-msft Aug 6, 2021
2d3b860
Merge pull request #2 from philon-msft/master
deepakverma Aug 6, 2021
0a165bd
Merge branch 'autoretry' into autoretry2
deepakverma Aug 13, 2021
d29a7fc
moving methods from mux to retrypolicy
deepakverma Aug 13, 2021
7ef0a5b
fix conditions in IsMessageREtriable and make it simpler for reading
deepakverma Aug 13, 2021
dc6cfa1
Merge pull request #4 from deepakverma/autoretry2
deepakverma Aug 13, 2021
a0c26de
cleaning up debug code
deepakverma Aug 13, 2021
0ab8bf7
Throw in case retrypolicy is not set and pass failing test
deepakverma Aug 15, 2021
f68655d
Initial refactor/renaming
Aug 20, 2021
c857c4b
Fix ref issue
Aug 20, 2021
8189e7a
Move love
Aug 23, 2021
570b060
Cleanup
Aug 23, 2021
c48ff6d
Cleanup internals
NickCraver Sep 7, 2021
8d9d0fe
Simplify
NickCraver Sep 7, 2021
664ede2
Merge branch 'main' into command-retry
NickCraver Sep 7, 2021
3cc22ea
Revert "Cleanup internals"
NickCraver Sep 10, 2021
62d215a
Add note on why this internal visibility is needed
NickCraver Sep 10, 2021
f4a3a54
Merge remote-tracking branch 'origin/main' into command-retry
NickCraver Sep 10, 2021
da3e55d
Command Retry: Changes and Simplifications (#1857)
NickCraver Sep 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/StackExchange.Redis/AssemblyInfoHack.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
[assembly: InternalsVisibleTo("StackExchange.Redis.Server, PublicKey=00240000048000009400000006020000002400005253413100040000010001007791a689e9d8950b44a9a8886baad2ea180e7a8a854f158c9b98345ca5009cdd2362c84f368f1c3658c132b3c0f74e44ff16aeb2e5b353b6e0fe02f923a050470caeac2bde47a2238a9c7125ed7dab14f486a5a64558df96640933b9f2b6db188fc4a820f96dce963b662fa8864adbff38e5b4542343f162ecdc6dad16912fff")]
[assembly: InternalsVisibleTo("StackExchange.Redis.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001007791a689e9d8950b44a9a8886baad2ea180e7a8a854f158c9b98345ca5009cdd2362c84f368f1c3658c132b3c0f74e44ff16aeb2e5b353b6e0fe02f923a050470caeac2bde47a2238a9c7125ed7dab14f486a5a64558df96640933b9f2b6db188fc4a820f96dce963b662fa8864adbff38e5b4542343f162ecdc6dad16912fff")]
[assembly: InternalsVisibleTo("NRediSearch.Test, PublicKey=00240000048000009400000006020000002400005253413100040000010001007791a689e9d8950b44a9a8886baad2ea180e7a8a854f158c9b98345ca5009cdd2362c84f368f1c3658c132b3c0f74e44ff16aeb2e5b353b6e0fe02f923a050470caeac2bde47a2238a9c7125ed7dab14f486a5a64558df96640933b9f2b6db188fc4a820f96dce963b662fa8864adbff38e5b4542343f162ecdc6dad16912fff")]

// For mocking in tests
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2, PublicKey=0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7")]
[assembly: CLSCompliant(true)]
25 changes: 25 additions & 0 deletions src/StackExchange.Redis/CommandRetry/CommandFailureReason.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
namespace StackExchange.Redis
{
/// <summary>
/// The reason a command failed to send or complete.
/// </summary>
public enum CommandFailureReason
{
/// <summary>
/// No open/valid connection was avaialble to send on - we couldn't even write the command.
/// </summary>
WriteFailure,
/// <summary>
/// The message was sent, but we lost the connection and this command in-flight.
/// </summary>
ConnectionFailure,
/// <summary>
/// Command has timed out, exceeding the sync or async timeout limits
/// </summary>
Timeout,
/// <summary>
/// This command failed again, during a retry
/// </summary>
RetryFailure,
}
}
99 changes: 99 additions & 0 deletions src/StackExchange.Redis/CommandRetry/CommandRetryPolicy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using System;

namespace StackExchange.Redis
{
/// <summary>
/// Policy that determines which commands should be retried upon restoration of a lost connection.
/// </summary>
public abstract class CommandRetryPolicy
{
/// <summary>
/// Creates a policy instance for a specific multiplexer and its commands.
/// </summary>
/// <param name="muxer">The muleiplexer this policy is for.</param>
protected CommandRetryPolicy(ConnectionMultiplexer muxer) { }

/// <summary>
/// Returns the current length of the retry queue.
/// </summary>
public abstract int CurrentQueueLength { get; }
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For exception messages, probably want to CurrentlyProcessing (bool) for exception messages


/// <summary>
/// Returns whether the current queue is processing (e.g. retrying queued commands).
/// </summary>
public abstract bool CurrentlyProcessing { get; }

/// <summary>
/// Returns the status of the retry mechanism, e.g. what the queue is doing.
/// </summary>
public abstract string StatusDescription { get; }

/// <summary>
/// Determines if a message is eligible for retrying at all.
/// </summary>
/// <param name="message">The message to check eligibility for.</param>
/// <returns>True if a message is eligible.</returns>
internal static bool IsEligible(Message message)
{
if ((message.Flags & CommandFlags.NoRetry) != 0
|| ((message.Flags & CommandFlags.RetryIfNotSent) != 0 && message.Status == CommandStatus.Sent)
|| message.IsAdmin
|| message.IsInternalCall)
{
return false;
}

return true;
}

/// <summary>
/// Determines if an xception is eligible for retrying at all.
/// </summary>
/// <param name="exception">The exception to check eligibility for.</param>
/// <returns>True if an exception is eligible.</returns>
internal static bool IsEligible(Exception exception) => exception is RedisException;

/// <summary>
/// Tries to queue an eligible command.
/// Protected because this isn't called directly - eligibility (above) is checked first by the multiplexer.
/// </summary>
protected internal abstract bool TryQueue(FailedCommand command);

/// <summary>
/// Called when a heartbeat occurs.
/// </summary>
public abstract void OnHeartbeat();

/// <summary>
/// Called when a multiplexer reconnects.
/// </summary>
public abstract void OnReconnect();

/// <summary>
/// Default policy - retry only commands which fail before being sent to the server (alias for <see cref="IfNotSent"/>).
/// </summary>
/// <returns>An instance of a policy that retries only unsent commands.</returns>
public static Func<ConnectionMultiplexer, CommandRetryPolicy> Default => IfNotSent;

/// <summary>
/// Retry all commands.
/// </summary>
/// <returns>An instance of a retry policy that retries all commands.</returns>
public static Func<ConnectionMultiplexer, CommandRetryPolicy> Always
=> mutex => new DefaultCommandRetryPolicy(mutex, commandStatus => true);

/// <summary>
/// Retry only commands which fail before being sent to the server.
/// </summary>
/// <returns>An instance of a policy that retries only unsent commands.</returns>
public static Func<ConnectionMultiplexer, CommandRetryPolicy> IfNotSent
=> mutex => new DefaultCommandRetryPolicy(mutex, command => command.Status == CommandStatus.WaitingToBeSent);

/// <summary>
/// Never retry a command.
/// </summary>
/// <returns>An instance of a retry policy that retries no commands.</returns>
public static Func<ConnectionMultiplexer, CommandRetryPolicy> Never
=> mutex => new NeverCommandRetryPolicy(mutex);
}
}
81 changes: 81 additions & 0 deletions src/StackExchange.Redis/CommandRetry/DefaultCommandRetryPolicy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using System;

namespace StackExchange.Redis
{
/// <summary>
/// Command retry policy to determine which commands will be retried after a lost connection is retored
/// </summary>
public class DefaultCommandRetryPolicy : CommandRetryPolicy
{
private MessageRetryQueue RetryQueue { get; }

private readonly Func<FailedCommand, bool> _shouldRetry;

/// <summary>
/// Creates a <see cref="DefaultCommandRetryPolicy"/> for the given <see cref="ConnectionMultiplexer"/>.
/// </summary>
/// <param name="muxer">The <see cref="ConnectionMultiplexer"/> to handle retries for.</param>
/// <param name="shouldRetry">Whether a command should be retried.</param>
protected internal DefaultCommandRetryPolicy(ConnectionMultiplexer muxer, Func<FailedCommand, bool> shouldRetry) : base(muxer)
{
_shouldRetry = shouldRetry;
var messageRetryHelper = new MessageRetryHelper(muxer);
RetryQueue = new MessageRetryQueue(messageRetryHelper);
}

/// <summary>
/// Gets the current length of the retry queue.
/// </summary>
public override int CurrentQueueLength => RetryQueue.CurrentRetryQueueLength;

/// <summary>
/// Returns whether the current queue is processing (e.g. retrying queued commands).
/// </summary>
public override bool CurrentlyProcessing => RetryQueue.IsRunning;

/// <summary>
/// Returns whether the current queue is processing (e.g. retrying queued commands).
/// </summary>
public override string StatusDescription => RetryQueue.StatusDescription;

/// <summary>
/// Tries to queue a message for retry if possible.
/// </summary>
/// <param name="command">The command to tru queueing (contains the message and exception).</param>
/// <returns>True if the message was queued.</returns>
/// <remarks>Note that this is internal only - external callers cannot override it to bypass the CanRetry checks.</remarks>
protected internal override bool TryQueue(FailedCommand command)
{
// Sanity check if we should be trying this one
if (!_shouldRetry.Invoke(command))
{
return false;
}

if (RetryQueue.TryHandleFailedCommand(command.Message))
{
// if this message is a new message set the writetime
if (command.Message.GetWriteTime() == 0)
{
command.Message.SetEnqueued(null);
}

command.Message.ResetStatusToWaitingToBeSent();

return true;
}

return false;
}

/// <summary>
/// Called on heartbeat, evaluating if anything in queue has timed out and need pruning.
/// </summary>
public override void OnHeartbeat() => RetryQueue.CheckRetryQueueForTimeouts();

/// <summary>
/// Called on a multiplexer reconnect, to start sending anything in the queue.
/// </summary>
public override void OnReconnect() => RetryQueue.StartRetryQueueProcessor();
}
}
45 changes: 45 additions & 0 deletions src/StackExchange.Redis/CommandRetry/FailedCommand.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using System;

namespace StackExchange.Redis
{
/// <summary>
/// Command retry policy to act as a no-op for all commands.
/// </summary>
public sealed class FailedCommand
{
/// <summary>
/// The original/inner message that failed.
/// </summary>
internal Message Message;

/// <summary>
/// Status of the command.
/// </summary>
public CommandStatus Status => Message.Status;

/// <summary>
/// The redis command sent.
/// </summary>
public string CommandAndKey => Message.CommandAndKey;

/// <summary>
/// The reason this command failed, e.g. no connection, timeout, etc.
/// </summary>
public CommandFailureReason FailureReason { get; }

/// <summary>
/// The exception that happened to create this failed command.
/// </summary>
public Exception Exception { get; }

internal static FailedCommand FromWriteFail(Message message, Exception exception) =>
new FailedCommand(message, CommandFailureReason.WriteFailure, exception);

internal FailedCommand(Message message, CommandFailureReason reason, Exception exception)
{
Message = message;
FailureReason = reason;
Exception = exception;
}
}
}
14 changes: 14 additions & 0 deletions src/StackExchange.Redis/CommandRetry/IMessageRetryHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System;
using System.Threading.Tasks;

namespace StackExchange.Redis
{
internal interface IMessageRetryHelper
{
RedisTimeoutException GetTimeoutException(Message message);
bool HasTimedOut(Message message);
bool IsEndpointAvailable(Message message);
void SetExceptionAndComplete(Message message, Exception ex = null);
Task<bool> TryResendAsync(Message message);
}
}
71 changes: 71 additions & 0 deletions src/StackExchange.Redis/CommandRetry/MessageRetryHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
using System;
using System.Text;
using System.Threading.Tasks;

namespace StackExchange.Redis
{
internal class MessageRetryHelper : IMessageRetryHelper
{
private readonly IInternalConnectionMultiplexer multiplexer;

public MessageRetryHelper(IInternalConnectionMultiplexer multiplexer)
{
this.multiplexer = multiplexer;
}

public bool HasTimedOut(Message message)
{
var timeoutMilliseconds = message.ResultBoxIsAsync ? multiplexer.AsyncTimeoutMilliseconds : multiplexer.TimeoutMilliseconds;
int millisecondsTaken = unchecked(Environment.TickCount - message.GetWriteTime());
return millisecondsTaken >= timeoutMilliseconds;
}

/// <summary>
/// Gets the timeout exception for a message.
/// </summary>
/// <param name="message">The messae to get a message for</param>
/// <returns></returns>
/// <remarks>
/// Not using ExceptionFactory.Timeout as it can cause deadlock while trying to lock writtenawaiting response queue for GetHeadMessages.
/// </remarks>
public RedisTimeoutException GetTimeoutException(Message message)
{
var sb = new StringBuilder();
sb.Append("Timeout while waiting for connectionrestore ").Append(message.Command).Append(" (").Append(Format.ToString(multiplexer.TimeoutMilliseconds)).Append("ms)");
var ex = new RedisTimeoutException(sb.ToString(), message.Status);
return ex;
}

public bool IsEndpointAvailable(Message message) => multiplexer.SelectServer(message) != null;

/// <summary>
/// Tries to re-issue a <see cref="Message"/>.
/// </summary>
/// <param name="message">The message to re-send.</param>
/// <returns>Whether the write was successful.</returns>
public async Task<bool> TryResendAsync(Message message)
{
// Use a specific server if one was specified originally, otherwise auto-select
// This is important for things like REPLICAOF we really don't want going to another location
var server = message.SpecificServer ?? multiplexer.SelectServer(message);
if (server != null)
{
var result = await server.TryWriteAsync(message).ForAwait();

if (result != WriteResult.Success)
{
var ex = multiplexer.GetException(result, message, server);
SetExceptionAndComplete(message, ex);
}
return true;
}
return false;
}

public void SetExceptionAndComplete(Message message, Exception ex = null)
{
var inner = new RedisConnectionException(ConnectionFailureType.UnableToConnect, "Failed while retrying on connection restore: " + ex.Message, ex);
message.SetExceptionAndComplete(inner, null, CommandFailureReason.RetryFailure);
}
}
}
Loading